1 files changed, 0 insertions, 1669 deletions
diff --git a/recipes/mythtv/mythtv-0.21/ffmpeg-arm-update.diff b/recipes/mythtv/mythtv-0.21/ffmpeg-arm-update.diff
deleted file mode 100644
index d0e32eb512..0000000000
--- a/recipes/mythtv/mythtv-0.21/ffmpeg-arm-update.diff
+++ /dev/null
@@ -1,1669 +0,0 @@
-diff -Nurd mythtv.orig/libs/libavcodec/armv4l/dsputil_arm.c mythtv/libs/libavcodec/armv4l/dsputil_arm.c
---- mythtv.orig/libs/libavcodec/armv4l/dsputil_arm.c	2008-07-23 12:19:05.000000000 +0200
-+++ mythtv/libs/libavcodec/armv4l/dsputil_arm.c	2008-07-24 19:54:00.753198000 +0200
-@@ -19,12 +19,14 @@
-  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-  */
- 
--#include "dsputil.h"
-+#include "libavcodec/dsputil.h"
- #ifdef HAVE_IPP
--#include "ipp.h"
-+#include <ipp.h>
- #endif
- 
- extern void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx);
-+extern void ff_float_init_arm_vfp(DSPContext* c, AVCodecContext *avctx);
-+extern void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx);
- 
- extern void j_rev_dct_ARM(DCTELEM *data);
- extern void simple_idct_ARM(DCTELEM *data);
-@@ -41,6 +43,12 @@
- extern void ff_simple_idct_add_armv6(uint8_t *dest, int line_size,
-                                      DCTELEM *data);
- 
-+extern void ff_simple_idct_neon(DCTELEM *data);
-+extern void ff_simple_idct_put_neon(uint8_t *dest, int line_size,
-+                                    DCTELEM *data);
-+extern void ff_simple_idct_add_neon(uint8_t *dest, int line_size,
-+                                    DCTELEM *data);
-+
- /* XXX: local hack */
- static void (*ff_put_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
- static void (*ff_add_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
-@@ -202,6 +210,24 @@
- }
- #endif
- 
-+#ifdef HAVE_ARMV5TE
-+static void prefetch_arm(void *mem, int stride, int h)
-+{
-+    asm volatile(
-+        "1:              \n\t"
-+        "subs %0, %0, #1 \n\t"
-+        "pld  [%1]       \n\t"
-+        "add  %1, %1, %2 \n\t"
-+        "bgt  1b         \n\t"
-+        : "+r"(h), "+r"(mem) : "r"(stride));
-+}
-+#endif
-+
-+int mm_support(void)
-+{
-+    return ENABLE_IWMMXT * MM_IWMMXT;
-+}
-+
- void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx)
- {
-     int idct_algo= avctx->idct_algo;
-@@ -209,49 +235,60 @@
-     ff_put_pixels_clamped = c->put_pixels_clamped;
-     ff_add_pixels_clamped = c->add_pixels_clamped;
- 
--    if(idct_algo == FF_IDCT_AUTO){
-+    if (avctx->lowres == 0) {
-+        if(idct_algo == FF_IDCT_AUTO){
- #if defined(HAVE_IPP)
--        idct_algo = FF_IDCT_IPP;
-+            idct_algo = FF_IDCT_IPP;
-+#elif defined(HAVE_NEON)
-+            idct_algo = FF_IDCT_SIMPLENEON;
- #elif defined(HAVE_ARMV6)
--        idct_algo = FF_IDCT_SIMPLEARMV6;
-+            idct_algo = FF_IDCT_SIMPLEARMV6;
- #elif defined(HAVE_ARMV5TE)
--        idct_algo = FF_IDCT_SIMPLEARMV5TE;
-+            idct_algo = FF_IDCT_SIMPLEARMV5TE;
- #else
--        idct_algo = FF_IDCT_ARM;
-+            idct_algo = FF_IDCT_ARM;
- #endif
--    }
-+        }
- 
--    if(idct_algo==FF_IDCT_ARM){
--        c->idct_put= j_rev_dct_ARM_put;
--        c->idct_add= j_rev_dct_ARM_add;
--        c->idct    = j_rev_dct_ARM;
--        c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;/* FF_NO_IDCT_PERM */
--    } else if (idct_algo==FF_IDCT_SIMPLEARM){
--        c->idct_put= simple_idct_ARM_put;
--        c->idct_add= simple_idct_ARM_add;
--        c->idct    = simple_idct_ARM;
--        c->idct_permutation_type= FF_NO_IDCT_PERM;
-+        if(idct_algo==FF_IDCT_ARM){
-+            c->idct_put= j_rev_dct_ARM_put;
-+            c->idct_add= j_rev_dct_ARM_add;
-+            c->idct    = j_rev_dct_ARM;
-+            c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;/* FF_NO_IDCT_PERM */
-+        } else if (idct_algo==FF_IDCT_SIMPLEARM){
-+            c->idct_put= simple_idct_ARM_put;
-+            c->idct_add= simple_idct_ARM_add;
-+            c->idct    = simple_idct_ARM;
-+            c->idct_permutation_type= FF_NO_IDCT_PERM;
- #ifdef HAVE_ARMV6
--    } else if (idct_algo==FF_IDCT_SIMPLEARMV6){
--        c->idct_put= ff_simple_idct_put_armv6;
--        c->idct_add= ff_simple_idct_add_armv6;
--        c->idct    = ff_simple_idct_armv6;
--        c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
-+        } else if (idct_algo==FF_IDCT_SIMPLEARMV6){
-+            c->idct_put= ff_simple_idct_put_armv6;
-+            c->idct_add= ff_simple_idct_add_armv6;
-+            c->idct    = ff_simple_idct_armv6;
-+            c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
- #endif
- #ifdef HAVE_ARMV5TE
--    } else if (idct_algo==FF_IDCT_SIMPLEARMV5TE){
--        c->idct_put= simple_idct_put_armv5te;
--        c->idct_add= simple_idct_add_armv5te;
--        c->idct    = simple_idct_armv5te;
--        c->idct_permutation_type = FF_NO_IDCT_PERM;
-+        } else if (idct_algo==FF_IDCT_SIMPLEARMV5TE){
-+            c->idct_put= simple_idct_put_armv5te;
-+            c->idct_add= simple_idct_add_armv5te;
-+            c->idct    = simple_idct_armv5te;
-+            c->idct_permutation_type = FF_NO_IDCT_PERM;
- #endif
- #ifdef HAVE_IPP
--    } else if (idct_algo==FF_IDCT_IPP){
--        c->idct_put= simple_idct_ipp_put;
--        c->idct_add= simple_idct_ipp_add;
--        c->idct    = simple_idct_ipp;
--        c->idct_permutation_type= FF_NO_IDCT_PERM;
-+        } else if (idct_algo==FF_IDCT_IPP){
-+            c->idct_put= simple_idct_ipp_put;
-+            c->idct_add= simple_idct_ipp_add;
-+            c->idct    = simple_idct_ipp;
-+            c->idct_permutation_type= FF_NO_IDCT_PERM;
-+#endif
-+#ifdef HAVE_NEON
-+        } else if (idct_algo==FF_IDCT_SIMPLENEON){
-+            c->idct_put= ff_simple_idct_put_neon;
-+            c->idct_add= ff_simple_idct_add_neon;
-+            c->idct    = ff_simple_idct_neon;
-+            c->idct_permutation_type = FF_NO_IDCT_PERM;
- #endif
-+        }
-     }
- 
-     c->put_pixels_tab[0][0] = put_pixels16_arm;
-@@ -271,7 +308,17 @@
-     c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_arm; //OK
-     c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_arm;
- 
-+#ifdef HAVE_ARMV5TE
-+    c->prefetch = prefetch_arm;
-+#endif
-+
- #ifdef HAVE_IWMMXT
-     dsputil_init_iwmmxt(c, avctx);
- #endif
-+#ifdef HAVE_ARMVFP
-+    ff_float_init_arm_vfp(c, avctx);
-+#endif
-+#ifdef HAVE_NEON
-+    ff_dsputil_init_neon(c, avctx);
-+#endif
- }
-diff -Nurd mythtv.orig/libs/libavcodec/armv4l/dsputil_arm_s.S mythtv/libs/libavcodec/armv4l/dsputil_arm_s.S
---- mythtv.orig/libs/libavcodec/armv4l/dsputil_arm_s.S	2008-07-23 12:19:05.000000000 +0200
-+++ mythtv/libs/libavcodec/armv4l/dsputil_arm_s.S	2008-07-24 19:54:00.753198000 +0200
-@@ -19,6 +19,13 @@
- @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- @
- 
-+#include "config.h"
-+
-+#ifndef HAVE_PLD
-+.macro pld reg
-+.endm
-+#endif
-+
- .macro  ADJ_ALIGN_QUADWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
-         mov \Rd0, \Rn0, lsr #(\shift * 8)
-         mov \Rd1, \Rn1, lsr #(\shift * 8)
-diff -Nurd mythtv.orig/libs/libavcodec/armv4l/dsputil_iwmmxt.c mythtv/libs/libavcodec/armv4l/dsputil_iwmmxt.c
---- mythtv.orig/libs/libavcodec/armv4l/dsputil_iwmmxt.c	2008-07-23 12:19:05.000000000 +0200
-+++ mythtv/libs/libavcodec/armv4l/dsputil_iwmmxt.c	2008-07-24 19:54:00.753198000 +0200
-@@ -19,10 +19,10 @@
-  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-  */
- 
--#include "dsputil.h"
-+#include "libavcodec/dsputil.h"
- 
- #define DEF(x, y) x ## _no_rnd_ ## y ##_iwmmxt
--#define SET_RND(regd)  __asm__ __volatile__ ("mov r12, #1 \n\t tbcsth " #regd ", r12":::"r12");
-+#define SET_RND(regd)  asm volatile ("mov r12, #1 \n\t tbcsth " #regd ", r12":::"r12");
- #define WAVG2B "wavg2b"
- #include "dsputil_iwmmxt_rnd.h"
- #undef DEF
-@@ -30,7 +30,7 @@
- #undef WAVG2B
- 
- #define DEF(x, y) x ## _ ## y ##_iwmmxt
--#define SET_RND(regd)  __asm__ __volatile__ ("mov r12, #2 \n\t tbcsth " #regd ", r12":::"r12");
-+#define SET_RND(regd)  asm volatile ("mov r12, #2 \n\t tbcsth " #regd ", r12":::"r12");
- #define WAVG2B "wavg2br"
- #include "dsputil_iwmmxt_rnd.h"
- #undef DEF
-@@ -89,7 +89,7 @@
- {
-     uint8_t *pixels2 = pixels + line_size;
- 
--    __asm__ __volatile__ (
-+    asm volatile (
-         "mov            r12, #4                 \n\t"
-         "1:                                     \n\t"
-         "pld            [%[pixels], %[line_size2]]              \n\t"
-@@ -125,7 +125,7 @@
- 
- static void clear_blocks_iwmmxt(DCTELEM *blocks)
- {
--    __asm __volatile(
-+    asm volatile(
-                 "wzero wr0                      \n\t"
-                 "mov r1, #(128 * 6 / 32)        \n\t"
-                 "1:                             \n\t"
-diff -Nurd mythtv.orig/libs/libavcodec/armv4l/dsputil_iwmmxt_rnd.h mythtv/libs/libavcodec/armv4l/dsputil_iwmmxt_rnd.h
---- mythtv.orig/libs/libavcodec/armv4l/dsputil_iwmmxt_rnd.h	2008-07-23 12:19:05.000000000 +0200
-+++ mythtv/libs/libavcodec/armv4l/dsputil_iwmmxt_rnd.h	2008-07-24 19:54:01.023198000 +0200
-@@ -19,13 +19,14 @@
-  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-  */
- 
--#ifndef FFMPEG_DSPUTIL_IWMMXT_RND_H
--#define FFMPEG_DSPUTIL_IWMMXT_RND_H
-+/* This header intentionally has no multiple inclusion guards. It is meant to
-+ * be included multiple times and generates different code depending on the
-+ * value of certain #defines. */
- 
- void DEF(put, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
- {
-     int stride = line_size;
--    __asm__ __volatile__ (
-+    asm volatile (
-         "and r12, %[pixels], #7 \n\t"
-         "bic %[pixels], %[pixels], #7 \n\t"
-         "tmcr wcgr1, r12 \n\t"
-@@ -59,7 +60,7 @@
- void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
- {
-     int stride = line_size;
--    __asm__ __volatile__ (
-+    asm volatile (
-         "and r12, %[pixels], #7 \n\t"
-         "bic %[pixels], %[pixels], #7 \n\t"
-         "tmcr wcgr1, r12 \n\t"
-@@ -101,7 +102,7 @@
- void DEF(put, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
- {
-     int stride = line_size;
--    __asm__ __volatile__ (
-+    asm volatile (
-         "and r12, %[pixels], #7 \n\t"
-         "bic %[pixels], %[pixels], #7 \n\t"
-         "tmcr wcgr1, r12 \n\t"
-@@ -141,7 +142,7 @@
- void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
- {
-     int stride = line_size;
--    __asm__ __volatile__ (
-+    asm volatile (
-         "pld [%[pixels]]                \n\t"
-         "pld [%[pixels], #32]           \n\t"
-         "pld [%[block]]                 \n\t"
-@@ -200,7 +201,7 @@
-     // [wr0 wr1 wr2 wr3] for previous line
-     // [wr4 wr5 wr6 wr7] for current line
-     SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
--    __asm__ __volatile__(
-+    asm volatile(
-         "pld [%[pixels]]                \n\t"
-         "pld [%[pixels], #32]           \n\t"
-         "and r12, %[pixels], #7         \n\t"
-@@ -249,7 +250,7 @@
-     // [wr0 wr1 wr2 wr3] for previous line
-     // [wr4 wr5 wr6 wr7] for current line
-     SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
--    __asm__ __volatile__(
-+    asm volatile(
-         "pld [%[pixels]]                \n\t"
-         "pld [%[pixels], #32]           \n\t"
-         "and r12, %[pixels], #7         \n\t"
-@@ -310,7 +311,7 @@
-     // [wr0 wr1 wr2 wr3] for previous line
-     // [wr4 wr5 wr6 wr7] for current line
-     SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
--    __asm__ __volatile__(
-+    asm volatile(
-         "pld [%[pixels]]                \n\t"
-         "pld [%[pixels], #32]           \n\t"
-         "pld [%[block]]                 \n\t"
-@@ -371,7 +372,7 @@
-     // [wr0 wr1 wr2 wr3] for previous line
-     // [wr4 wr5 wr6 wr7] for current line
-     SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
--    __asm__ __volatile__(
-+    asm volatile(
-         "pld [%[pixels]]                \n\t"
-         "pld [%[pixels], #32]           \n\t"
-         "pld [%[block]]                 \n\t"
-@@ -447,7 +448,7 @@
-     int stride = line_size;
-     // [wr0 wr1 wr2 wr3] for previous line
-     // [wr4 wr5 wr6 wr7] for current line
--    __asm__ __volatile__(
-+    asm volatile(
-         "pld            [%[pixels]]                             \n\t"
-         "pld            [%[pixels], #32]                        \n\t"
-         "and            r12, %[pixels], #7                      \n\t"
-@@ -501,7 +502,7 @@
-     int stride = line_size;
-     // [wr0 wr1 wr2 wr3] for previous line
-     // [wr4 wr5 wr6 wr7] for current line
--    __asm__ __volatile__(
-+    asm volatile(
-         "pld [%[pixels]]                \n\t"
-         "pld [%[pixels], #32]           \n\t"
-         "and r12, %[pixels], #7         \n\t"
-@@ -558,7 +559,7 @@
-     int stride = line_size;
-     // [wr0 wr1 wr2 wr3] for previous line
-     // [wr4 wr5 wr6 wr7] for current line
--    __asm__ __volatile__(
-+    asm volatile(
-         "pld [%[pixels]]                \n\t"
-         "pld [%[pixels], #32]           \n\t"
-         "and r12, %[pixels], #7         \n\t"
-@@ -626,7 +627,7 @@
-     // [wr0 wr1 wr2 wr3] for previous line
-     // [wr4 wr5 wr6 wr7] for current line
-     SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
--    __asm__ __volatile__(
-+    asm volatile(
-         "pld [%[pixels]]                \n\t"
-         "mov r12, #2                    \n\t"
-         "pld [%[pixels], #32]           \n\t"
-@@ -720,7 +721,7 @@
-     // [wr0 wr1 wr2 wr3] for previous line
-     // [wr4 wr5 wr6 wr7] for current line
-     SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
--    __asm__ __volatile__(
-+    asm volatile(
-         "pld [%[pixels]]                \n\t"
-         "mov r12, #2                    \n\t"
-         "pld [%[pixels], #32]           \n\t"
-@@ -862,7 +863,7 @@
-     // [wr0 wr1 wr2 wr3] for previous line
-     // [wr4 wr5 wr6 wr7] for current line
-     SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
--    __asm__ __volatile__(
-+    asm volatile(
-         "pld [%[block]]                 \n\t"
-         "pld [%[block], #32]            \n\t"
-         "pld [%[pixels]]                \n\t"
-@@ -966,7 +967,7 @@
-     // [wr0 wr1 wr2 wr3] for previous line
-     // [wr4 wr5 wr6 wr7] for current line
-     SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
--    __asm__ __volatile__(
-+    asm volatile(
-         "pld [%[block]]                 \n\t"
-         "pld [%[block], #32]            \n\t"
-         "pld [%[pixels]]                \n\t"
-@@ -1115,5 +1116,3 @@
-         : [line_size]"r"(line_size)
-         : "r12", "memory");
- }
--
--#endif /* FFMPEG_DSPUTIL_IWMMXT_RND_H */
-diff -Nurd mythtv.orig/libs/libavcodec/armv4l/dsputil_neon.c mythtv/libs/libavcodec/armv4l/dsputil_neon.c
---- mythtv.orig/libs/libavcodec/armv4l/dsputil_neon.c	1970-01-01 01:00:00.000000000 +0100
-+++ mythtv/libs/libavcodec/armv4l/dsputil_neon.c	2008-07-24 19:54:01.023198000 +0200
-@@ -0,0 +1,397 @@
-+/*
-+ * ARM NEON optimised DSP functions
-+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
-+ *
-+ * This file is part of FFmpeg.
-+ *
-+ * FFmpeg is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU Lesser General Public
-+ * License as published by the Free Software Foundation; either
-+ * version 2.1 of the License, or (at your option) any later version.
-+ *
-+ * FFmpeg is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * Lesser General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU Lesser General Public
-+ * License along with FFmpeg; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
-+
-+#include <stdint.h>
-+
-+#include "libavcodec/avcodec.h"
-+#include "libavcodec/dsputil.h"
-+
-+extern void ff_put_h264_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride,
-+                                        int h, int x, int y);
-+
-+#define PUT_PIXELS_16_X2(vhadd)                                 \
-+        "1:                                          \n\t"      \
-+        "vld1.64   {d0,d1,d2}, [%[p]], %[line_size]  \n\t"      \
-+        "vld1.64   {d4,d5,d6}, [%[p]], %[line_size]  \n\t"      \
-+        "pld       [%[p]]                            \n\t"      \
-+        "subs      %[h], %[h], #2                    \n\t"      \
-+        "vext.8    q1, q0, q1, #1                    \n\t"      \
-+        "vext.8    q3, q2, q3, #1                    \n\t"      \
-+         vhadd".u8 q0, q0, q1                        \n\t"      \
-+         vhadd".u8 q2, q2, q3                        \n\t"      \
-+        "vst1.64   {d0,d1}, [%[b],:64], %[line_size] \n\t"      \
-+        "vst1.64   {d4,d5}, [%[b],:64], %[line_size] \n\t"      \
-+        "bne       1b                                \n\t"
-+
-+#define PUT_PIXELS_16_Y2(vhadd)                                 \
-+        "add       %[p1], %[p0], %[line_size]         \n\t"     \
-+        "lsl       %[l2], %[line_size], #1            \n\t"     \
-+        "vld1.64   {d0,d1}, [%[p0]], %[l2]            \n\t"     \
-+        "vld1.64   {d2,d3}, [%[p1]], %[l2]            \n\t"     \
-+        "1:                                           \n\t"     \
-+        "subs      %[h], %[h], #2                     \n\t"     \
-+         vhadd".u8 q2, q0, q1                         \n\t"     \
-+        "vst1.64   {d4,d5}, [%[b],:128], %[line_size] \n\t"     \
-+        "vld1.64   {d0,d1}, [%[p0]],     %[l2]        \n\t"     \
-+         vhadd".u8 q2, q0, q1                         \n\t"     \
-+        "vst1.64   {d4,d5}, [%[b],:128], %[line_size] \n\t"     \
-+        "vld1.64   {d2,d3}, [%[p1]],     %[l2]        \n\t"     \
-+        "bne 1b                                       \n\t"
-+
-+#define PUT_PIXELS_16_XY2(vshrn, no_rnd)                        \
-+        "lsl        %[l2], %[line_size], #1              \n\t"  \
-+        "add        %[p1], %[p0], %[line_size]           \n\t"  \
-+        "vld1.64    {d0,d1,d2}, [%[p0]], %[l2]           \n\t"  \
-+        "vld1.64    {d4,d5,d6}, [%[p1]], %[l2]           \n\t"  \
-+        "pld        [%[p0]]                              \n\t"  \
-+        "pld        [%[p1]]                              \n\t"  \
-+        "vext.8     q1,  q0, q1, #1                      \n\t"  \
-+        "vext.8     q3,  q2, q3, #1                      \n\t"  \
-+        "vaddl.u8   q8,  d0, d2                          \n\t"  \
-+        "vaddl.u8   q10, d1, d3                          \n\t"  \
-+        "vaddl.u8   q9,  d4, d6                          \n\t"  \
-+        "vaddl.u8   q11, d5, d7                          \n\t"  \
-+        "1:                                              \n\t"  \
-+        "subs       %[h], %[h], #2                       \n\t"  \
-+        "vld1.64    {d0,d1,d2}, [%[p0]], %[l2]           \n\t"  \
-+        "vadd.u16   q12, q8, q9                          \n\t"  \
-+        "pld        [%[p0]]                              \n\t"  \
-+ no_rnd "vadd.u16   q12, q12, q13                        \n\t"  \
-+        "vext.8     q15, q0, q1, #1                      \n\t"  \
-+        "vadd.u16   q1, q10, q11                         \n\t"  \
-+         vshrn".u16 d28, q12, #2                         \n\t"  \
-+ no_rnd "vadd.u16   q1, q1, q13                          \n\t"  \
-+         vshrn".u16 d29, q1, #2                          \n\t"  \
-+        "vaddl.u8   q8, d0, d30                          \n\t"  \
-+        "vld1.64    {d2,d3,d4}, [%[p1]], %[l2]           \n\t"  \
-+        "vaddl.u8   q10, d1, d31                         \n\t"  \
-+        "vst1.64    {d28,d29}, [%[b],:128], %[line_size] \n\t"  \
-+        "vadd.u16   q12, q8, q9                          \n\t"  \
-+        "pld        [%[p1]]                              \n\t"  \
-+ no_rnd "vadd.u16   q12, q12, q13                        \n\t"  \
-+        "vext.8     q2, q1, q2, #1                       \n\t"  \
-+        "vadd.u16   q0, q10, q11                         \n\t"  \
-+         vshrn".u16 d30, q12, #2                         \n\t"  \
-+ no_rnd "vadd.u16   q0, q0, q13                          \n\t"  \
-+         vshrn".u16 d31, q0, #2                          \n\t"  \
-+        "vaddl.u8   q9, d2, d4                           \n\t"  \
-+        "vst1.64    {d30,d31}, [%[b],:128], %[line_size] \n\t"  \
-+        "vaddl.u8   q11, d3, d5                          \n\t"  \
-+        "bgt     1b                                      \n\t"
-+
-+#define PUT_PIXELS_8_X2(vhadd)                          \
-+        "1:                                       \n\t" \
-+        "vld1.64   {d0,d1}, [%[p]], %[line_size]  \n\t" \
-+        "vld1.64   {d2,d3}, [%[p]], %[line_size]  \n\t" \
-+        "pld       [%[p]]                         \n\t" \
-+        "subs      %[h], %[h], #2                 \n\t" \
-+        "vext.8    d1, d0, d1, #1                 \n\t" \
-+        "vext.8    d3, d2, d3, #1                 \n\t" \
-+        "vswp      d1, d2                         \n\t" \
-+         vhadd".u8 q0, q0, q1                     \n\t" \
-+        "vst1.64   {d0}, [%[b],:64], %[line_size] \n\t" \
-+        "vst1.64   {d1}, [%[b],:64], %[line_size] \n\t" \
-+        "bne       1b                             \n\t"
-+
-+#define PUT_PIXELS_8_Y2(vhadd)                          \
-+        "add       %[p1], %[p0], %[line_size]     \n\t" \
-+        "lsl       %[l2], %[line_size], #1        \n\t" \
-+        "vld1.64   {d0}, [%[p0]], %[l2]           \n\t" \
-+        "vld1.64   {d1}, [%[p1]], %[l2]           \n\t" \
-+        "1:                                       \n\t" \
-+        "subs      %[h], %[h], #2                 \n\t" \
-+         vhadd".u8 d4, d0, d1                     \n\t" \
-+        "vst1.64   {d4}, [%[b],:64], %[line_size] \n\t" \
-+        "vld1.64   {d0}, [%[p0]],    %[l2]        \n\t" \
-+         vhadd".u8 d4, d0, d1                     \n\t" \
-+        "vst1.64   {d4}, [%[b],:64], %[line_size] \n\t" \
-+        "vld1.64   {d1}, [%[p1]],     %[l2]       \n\t" \
-+        "bne 1b                                   \n\t"
-+
-+#define PUT_PIXELS8_XY2(vshrn, no_rnd)                          \
-+        "lsl        %[l2],   %[line_size], #1       \n\t"       \
-+        "add        %[p1],   %[p0], %[line_size]    \n\t"       \
-+        "vld1.64    {d0,d1}, [%[p0]], %[l2]         \n\t"       \
-+        "vld1.64    {d2,d3}, [%[p1]], %[l2]         \n\t"       \
-+        "pld        [%[p0]]                         \n\t"       \
-+        "pld        [%[p1]]                         \n\t"       \
-+        "vext.8     d4, d0, d1, #1                  \n\t"       \
-+        "vext.8     d6, d2, d3, #1                  \n\t"       \
-+        "vaddl.u8   q8, d0, d4                      \n\t"       \
-+        "vaddl.u8   q9, d2, d6                      \n\t"       \
-+        "1:                                         \n\t"       \
-+        "subs       %[h], %[h], #2                  \n\t"       \
-+        "vld1.64    {d0,d1}, [%[p0]], %[l2]         \n\t"       \
-+        "pld        [%[p0]]                         \n\t"       \
-+        "vadd.u16   q10, q8, q9                     \n\t"       \
-+        "vext.8     d4, d0, d1, #1                  \n\t"       \
-+ no_rnd "vadd.u16   q10, q10, q11                   \n\t"       \
-+        "vaddl.u8   q8, d0, d4                      \n\t"       \
-+         vshrn".u16 d5, q10, #2                     \n\t"       \
-+        "vld1.64    {d2,d3}, [%[p1]], %[l2]         \n\t"       \
-+        "vadd.u16   q10, q8, q9                     \n\t"       \
-+        "pld        [%[p1]]                         \n\t"       \
-+ no_rnd "vadd.u16   q10, q10, q11                   \n\t"       \
-+        "vst1.64    {d5}, [%[b],:64], %[line_size]  \n\t"       \
-+         vshrn".u16 d7, q10, #2                     \n\t"       \
-+        "vext.8     d6, d2, d3, #1                  \n\t"       \
-+        "vaddl.u8   q9, d2, d6                      \n\t"       \
-+        "vst1.64    {d7}, [%[b],:64], %[line_size]  \n\t"       \
-+        "bgt     1b                                 \n\t"
-+
-+static void put_pixels16_neon(uint8_t *block, const uint8_t *pixels,
-+                              int line_size, int h)
-+{
-+    asm volatile(
-+        "1:                                         \n\t"
-+        "vld1.64 {d0,d1}, [%[pixels]], %[line_size] \n\t"
-+        "vld1.64 {d2,d3}, [%[pixels]], %[line_size] \n\t"
-+        "vld1.64 {d4,d5}, [%[pixels]], %[line_size] \n\t"
-+        "vld1.64 {d6,d7}, [%[pixels]], %[line_size] \n\t"
-+        "pld     [%[pixels]]                        \n\t"
-+        "subs    %[h], %[h], #4                     \n\t"
-+        "vst1.64 {d0,d1}, [%[block],:128], %[line_size]  \n\t"
-+        "vst1.64 {d2,d3}, [%[block],:128], %[line_size]  \n\t"
-+        "vst1.64 {d4,d5}, [%[block],:128], %[line_size]  \n\t"
-+        "vst1.64 {d6,d7}, [%[block],:128], %[line_size]  \n\t"
-+        "bne     1b                                 \n\t"
-+        : [block]"+r"(block), [pixels]"+r"(pixels), [h]"+r"(h)
-+        : [line_size]"r"(line_size)
-+        : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "memory");
-+}
-+
-+static void put_pixels16_x2_neon(uint8_t *block, const uint8_t *pixels,
-+                                 int line_size, int h)
-+{
-+    asm volatile(
-+        PUT_PIXELS_16_X2("vrhadd")
-+        : [b]"+r"(block), [p]"+r"(pixels), [h]"+r"(h)
-+        : [line_size]"r"(line_size)
-+        : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "memory");
-+}
-+
-+static void put_pixels16_y2_neon(uint8_t *block, const uint8_t *pixels,
-+                                 int line_size, int h)
-+{
-+    const uint8_t *p1;
-+    int l2;
-+
-+    asm volatile(
-+        PUT_PIXELS_16_Y2("vrhadd")
-+        : [b]"+r"(block), [p0]"+r"(pixels), [p1]"=&r"(p1), [h]"+r"(h),
-+          [l2]"=&r"(l2)
-+        : [line_size]"r"(line_size)
-+        : "d0", "d1", "d2", "d3", "d4", "d5", "memory");
-+}
-+
-+static void put_pixels16_xy2_neon(uint8_t *block, const uint8_t *pixels,
-+                                  int line_size, int h)
-+{
-+    const uint8_t *p1;
-+    int l2;
-+
-+    asm volatile(
-+        PUT_PIXELS_16_XY2("vrshrn", "@")
-+        : [b]"+r"(block),
-+          [p0]"+r"(pixels),
-+          [p1]"=&r"(p1), [h]"+r"(h),
-+          [l2]"=&r"(l2)
-+        : [line_size]"r"(line_size)
-+        : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
-+          "d28", "d29", "d30", "d31",
-+          "q8", "q9", "q10", "q11", "q12", "memory");
-+}
-+
-+static void put_pixels8_neon(uint8_t *block, const uint8_t *pixels,
-+                             int line_size, int h)
-+{
-+    asm volatile(
-+        "1:                                 \n\t"
-+        "vld1.64 {d0}, [%[p]], %[line_size] \n\t"
-+        "vld1.64 {d1}, [%[p]], %[line_size] \n\t"
-+        "vld1.64 {d2}, [%[p]], %[line_size] \n\t"
-+        "vld1.64 {d3}, [%[p]], %[line_size] \n\t"
-+        "subs    %[h], %[h], #4             \n\t"
-+        "vst1.64 {d0}, [%[b],:64], %[line_size] \n\t"
-+        "vst1.64 {d1}, [%[b],:64], %[line_size] \n\t"
-+        "vst1.64 {d2}, [%[b],:64], %[line_size] \n\t"
-+        "vst1.64 {d3}, [%[b],:64], %[line_size] \n\t"
-+        "bne     1b                         \n\t"
-+        : [b]"+r"(block), [p]"+r"(pixels), [h]"+r"(h)
-+        : [line_size]"r"(line_size)
-+        : "d0", "d1", "d2", "d3", "memory");
-+}
-+
-+static void put_pixels8_x2_neon(uint8_t *block, const uint8_t *pixels,
-+                                int line_size, int h)
-+{
-+    asm volatile(
-+        PUT_PIXELS_8_X2("vrhadd")
-+        : [b]"+r"(block), [p]"+r"(pixels), [h]"+r"(h)
-+        : [line_size]"r"(line_size)
-+        : "d0", "d1", "d2", "d3", "memory");
-+}
-+
-+static void put_pixels8_y2_neon(uint8_t *block, const uint8_t *pixels,
-+                                int line_size, int h)
-+{
-+    const uint8_t *p1;
-+    int l2;
-+
-+    asm volatile(
-+        PUT_PIXELS_8_Y2("vrhadd")
-+        : [b]"+r"(block), [p0]"+r"(pixels), [p1]"=&r"(p1), [h]"+r"(h),
-+          [l2]"=&r"(l2)
-+        : [line_size]"r"(line_size)
-+        : "d0", "d1", "d4", "memory");
-+}
-+
-+static void put_pixels8_xy2_neon(uint8_t *block, const uint8_t *pixels,
-+                                 int line_size, int h)
-+{
-+    const uint8_t *p1;
-+    int l2;
-+
-+    asm volatile(
-+        PUT_PIXELS8_XY2("vrshrn", "@")
-+        : [b]"+r"(block),
-+          [p0]"+r"(pixels),
-+          [p1]"=&r"(p1), [h]"+r"(h),
-+          [l2]"=&r"(l2)
-+        : [line_size]"r"(line_size)
-+        : "d0", "d1", "d2", "d3", "d4", "d6", "d7",
-+          "q8", "q9", "q10", "memory");
-+}
-+
-+static void put_no_rnd_pixels16_x2_neon(uint8_t *block, const uint8_t *pixels,
-+                                        int line_size, int h)
-+{
-+    asm volatile(
-+        PUT_PIXELS_16_X2("vhadd")
-+        : [b]"+r"(block), [p]"+r"(pixels), [h]"+r"(h)
-+        : [line_size]"r"(line_size)
-+        : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "memory");
-+}
-+
-+static void put_no_rnd_pixels16_y2_neon(uint8_t *block, const uint8_t *pixels,
-+                                        int line_size, int h)
-+{
-+    const uint8_t *p1;
-+    int l2;
-+
-+    asm volatile(
-+        PUT_PIXELS_16_Y2("vhadd")
-+        : [b]"+r"(block), [p0]"+r"(pixels), [p1]"=&r"(p1), [h]"+r"(h),
-+          [l2]"=&r"(l2)
-+        : [line_size]"r"(line_size)
-+        : "d0", "d1", "d2", "d3", "d4", "d5", "memory");
-+}
-+
-+static void put_no_rnd_pixels16_xy2_neon(uint8_t *block, const uint8_t *pixels,
-+                                         int line_size, int h)
-+{
-+    const uint8_t *p1;
-+    int l2;
-+
-+    asm volatile(
-+        "vmov.i16   q13, #1                         \n\t"
-+        PUT_PIXELS_16_XY2("vshrn", "")
-+        : [b]"+r"(block),
-+          [p0]"+r"(pixels),
-+          [p1]"=&r"(p1), [h]"+r"(h),
-+          [l2]"=&r"(l2)
-+        : [line_size]"r"(line_size)
-+        : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
-+          "d28", "d29", "d30", "d31",
-+          "q8", "q9", "q10", "q11", "q12", "q13", "memory");
-+}
-+
-+static void put_no_rnd_pixels8_x2_neon(uint8_t *block, const uint8_t *pixels,
-+                                       int line_size, int h)
-+{
-+    asm volatile(
-+        PUT_PIXELS_8_X2("vhadd")
-+        : [b]"+r"(block), [p]"+r"(pixels), [h]"+r"(h)
-+        : [line_size]"r"(line_size)
-+        : "d0", "d1", "d2", "d3", "memory");
-+}
-+
-+static void put_no_rnd_pixels8_y2_neon(uint8_t *block, const uint8_t *pixels,
-+                                       int line_size, int h)
-+{
-+    const uint8_t *p1;
-+    int l2;
-+
-+    asm volatile(
-+        PUT_PIXELS_8_Y2("vhadd")
-+        : [b]"+r"(block), [p0]"+r"(pixels), [p1]"=&r"(p1), [h]"+r"(h),
-+          [l2]"=&r"(l2)
-+        : [line_size]"r"(line_size)
-+        : "d0", "d1", "d4", "memory");
-+}
-+
-+static void put_no_rnd_pixels8_xy2_neon(uint8_t *block, const uint8_t *pixels,
-+                                        int line_size, int h)
-+{
-+    const uint8_t *p1;
-+    int l2;
-+
-+    asm volatile(
-+        "vmov.i16   q11, #1                         \n\t"
-+        PUT_PIXELS8_XY2("vshrn", "")
-+        : [b]"+r"(block),
-+          [p0]"+r"(pixels),
-+          [p1]"=&r"(p1), [h]"+r"(h),
-+          [l2]"=&r"(l2)
-+        : [line_size]"r"(line_size)
-+        : "d0", "d1", "d2", "d3", "d4", "d6", "d7",
-+          "q8", "q9", "q10", "q11", "memory");
-+}
-+
-+static void put_h264_qpel16_mc00_neon(uint8_t *dst, uint8_t *src, int stride)
-+{
-+    put_pixels16_neon(dst, src, stride, 16);
-+}
-+
-+void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
-+{
-+    c->put_pixels_tab[0][0] = put_pixels16_neon;
-+    c->put_pixels_tab[0][1] = put_pixels16_x2_neon;
-+    c->put_pixels_tab[0][2] = put_pixels16_y2_neon;
-+    c->put_pixels_tab[0][3] = put_pixels16_xy2_neon;
-+    c->put_pixels_tab[1][0] = put_pixels8_neon;
-+    c->put_pixels_tab[1][1] = put_pixels8_x2_neon;
-+    c->put_pixels_tab[1][2] = put_pixels8_y2_neon;
-+    c->put_pixels_tab[1][3] = put_pixels8_xy2_neon;
-+
-+    c->put_no_rnd_pixels_tab[0][0] = put_pixels16_neon;
-+    c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_neon;
-+    c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_neon;
-+    c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_neon;
-+    c->put_no_rnd_pixels_tab[1][0] = put_pixels8_neon;
-+    c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_neon;
-+    c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_neon;
-+    c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_neon;
-+
-+    c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon;
-+
-+    c->put_h264_qpel_pixels_tab[0][0] = put_h264_qpel16_mc00_neon;
-+}
-diff -Nurd mythtv.orig/libs/libavcodec/armv4l/float_arm_vfp.c mythtv/libs/libavcodec/armv4l/float_arm_vfp.c
---- mythtv.orig/libs/libavcodec/armv4l/float_arm_vfp.c	1970-01-01 01:00:00.000000000 +0100
-+++ mythtv/libs/libavcodec/armv4l/float_arm_vfp.c	2008-07-24 19:54:01.023198000 +0200
-@@ -0,0 +1,208 @@
-+/*
-+ * Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net>
-+ *
-+ * This file is part of FFmpeg.
-+ *
-+ * FFmpeg is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU Lesser General Public
-+ * License as published by the Free Software Foundation; either
-+ * version 2.1 of the License, or (at your option) any later version.
-+ *
-+ * FFmpeg is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * Lesser General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU Lesser General Public
-+ * License along with FFmpeg; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
-+
-+#include "libavcodec/dsputil.h"
-+
-+/*
-+ * VFP is a floating point coprocessor used in some ARM cores. VFP11 has 1 cycle
-+ * throughput for almost all the instructions (except for double precision
-+ * arithmetics), but rather high latency. Latency is 4 cycles for loads and 8 cycles
-+ * for arithmetic operations. Scheduling code to avoid pipeline stalls is very
-+ * important for performance. One more interesting feature is that VFP has
-+ * independent load/store and arithmetics pipelines, so it is possible to make
-+ * them work simultaneously and get more than 1 operation per cycle. Load/store
-+ * pipeline can process 2 single precision floating point values per cycle and
-+ * supports bulk loads and stores for large sets of registers. Arithmetic operations
-+ * can be done on vectors, which allows to keep the arithmetics pipeline busy,
-+ * while the processor may issue and execute other instructions. Detailed
-+ * optimization manuals can be found at http://www.arm.com
-+ */
-+
-+/**
-+ * ARM VFP optimized implementation of 'vector_fmul_c' function.
-+ * Assume that len is a positive number and is multiple of 8
-+ */
-+static void vector_fmul_vfp(float *dst, const float *src, int len)
-+{
-+    int tmp;
-+    asm volatile(
-+        "fmrx       %[tmp], fpscr\n\t"
-+        "orr        %[tmp], %[tmp], #(3 << 16)\n\t" /* set vector size to 4 */
-+        "fmxr       fpscr, %[tmp]\n\t"
-+
-+        "fldmias    %[dst_r]!, {s0-s3}\n\t"
-+        "fldmias    %[src]!, {s8-s11}\n\t"
-+        "fldmias    %[dst_r]!, {s4-s7}\n\t"
-+        "fldmias    %[src]!, {s12-s15}\n\t"
-+        "fmuls      s8, s0, s8\n\t"
-+    "1:\n\t"
-+        "subs       %[len], %[len], #16\n\t"
-+        "fmuls      s12, s4, s12\n\t"
-+        "fldmiasge  %[dst_r]!, {s16-s19}\n\t"
-+        "fldmiasge  %[src]!, {s24-s27}\n\t"
-+        "fldmiasge  %[dst_r]!, {s20-s23}\n\t"
-+        "fldmiasge  %[src]!, {s28-s31}\n\t"
-+        "fmulsge    s24, s16, s24\n\t"
-+        "fstmias    %[dst_w]!, {s8-s11}\n\t"
-+        "fstmias    %[dst_w]!, {s12-s15}\n\t"
-+        "fmulsge    s28, s20, s28\n\t"
-+        "fldmiasgt  %[dst_r]!, {s0-s3}\n\t"
-+        "fldmiasgt  %[src]!, {s8-s11}\n\t"
-+        "fldmiasgt  %[dst_r]!, {s4-s7}\n\t"
-+        "fldmiasgt  %[src]!, {s12-s15}\n\t"
-+        "fmulsge    s8, s0, s8\n\t"
-+        "fstmiasge  %[dst_w]!, {s24-s27}\n\t"
-+        "fstmiasge  %[dst_w]!, {s28-s31}\n\t"
-+        "bgt        1b\n\t"
-+
-+        "bic        %[tmp], %[tmp], #(7 << 16)\n\t" /* set vector size back to 1 */
-+        "fmxr       fpscr, %[tmp]\n\t"
-+        : [dst_w] "+&r" (dst), [dst_r] "+&r" (dst), [src] "+&r" (src), [len] "+&r" (len), [tmp] "=&r" (tmp)
-+        :
-+        : "s0",  "s1",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",
-+          "s8",  "s9",  "s10", "s11", "s12", "s13", "s14", "s15",
-+          "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
-+          "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
-+          "cc", "memory");
-+}
-+
-+/**
-+ * ARM VFP optimized implementation of 'vector_fmul_reverse_c' function.
-+ * Assume that len is a positive number and is multiple of 8
-+ */
-+static void vector_fmul_reverse_vfp(float *dst, const float *src0, const float *src1, int len)
-+{
-+    src1 += len;
-+    asm volatile(
-+        "fldmdbs    %[src1]!, {s0-s3}\n\t"
-+        "fldmias    %[src0]!, {s8-s11}\n\t"
-+        "fldmdbs    %[src1]!, {s4-s7}\n\t"
-+        "fldmias    %[src0]!, {s12-s15}\n\t"
-+        "fmuls      s8, s3, s8\n\t"
-+        "fmuls      s9, s2, s9\n\t"
-+        "fmuls      s10, s1, s10\n\t"
-+        "fmuls      s11, s0, s11\n\t"
-+    "1:\n\t"
-+        "subs       %[len], %[len], #16\n\t"
-+        "fldmdbsge  %[src1]!, {s16-s19}\n\t"
-+        "fmuls      s12, s7, s12\n\t"
-+        "fldmiasge  %[src0]!, {s24-s27}\n\t"
-+        "fmuls      s13, s6, s13\n\t"
-+        "fldmdbsge  %[src1]!, {s20-s23}\n\t"
-+        "fmuls      s14, s5, s14\n\t"
-+        "fldmiasge  %[src0]!, {s28-s31}\n\t"
-+        "fmuls      s15, s4, s15\n\t"
-+        "fmulsge    s24, s19, s24\n\t"
-+        "fldmdbsgt  %[src1]!, {s0-s3}\n\t"
-+        "fmulsge    s25, s18, s25\n\t"
-+        "fstmias    %[dst]!, {s8-s13}\n\t"
-+        "fmulsge    s26, s17, s26\n\t"
-+        "fldmiasgt  %[src0]!, {s8-s11}\n\t"
-+        "fmulsge    s27, s16, s27\n\t"
-+        "fmulsge    s28, s23, s28\n\t"
-+        "fldmdbsgt  %[src1]!, {s4-s7}\n\t"
-+        "fmulsge    s29, s22, s29\n\t"
-+        "fstmias    %[dst]!, {s14-s15}\n\t"
-+        "fmulsge    s30, s21, s30\n\t"
-+        "fmulsge    s31, s20, s31\n\t"
-+        "fmulsge    s8, s3, s8\n\t"
-+        "fldmiasgt  %[src0]!, {s12-s15}\n\t"
-+        "fmulsge    s9, s2, s9\n\t"
-+        "fmulsge    s10, s1, s10\n\t"
-+        "fstmiasge  %[dst]!, {s24-s27}\n\t"
-+        "fmulsge    s11, s0, s11\n\t"
-+        "fstmiasge  %[dst]!, {s28-s31}\n\t"
-+        "bgt        1b\n\t"
-+
-+        : [dst] "+&r" (dst), [src0] "+&r" (src0), [src1] "+&r" (src1), [len] "+&r" (len)
-+        :
-+        : "s0",  "s1",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",
-+          "s8",  "s9",  "s10", "s11", "s12", "s13", "s14", "s15",
-+          "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
-+          "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
-+          "cc", "memory");
-+}
-+
-+#ifdef HAVE_ARMV6
-+/**
-+ * ARM VFP optimized float to int16 conversion.
-+ * Assume that len is a positive number and is multiple of 8, destination
-+ * buffer is at least 4 bytes aligned (8 bytes alignment is better for
-+ * performance), little endian byte sex
-+ */
-+void float_to_int16_vfp(int16_t *dst, const float *src, int len)
-+{
-+    asm volatile(
-+        "fldmias    %[src]!, {s16-s23}\n\t"
-+        "ftosis     s0, s16\n\t"
-+        "ftosis     s1, s17\n\t"
-+        "ftosis     s2, s18\n\t"
-+        "ftosis     s3, s19\n\t"
-+        "ftosis     s4, s20\n\t"
-+        "ftosis     s5, s21\n\t"
-+        "ftosis     s6, s22\n\t"
-+        "ftosis     s7, s23\n\t"
-+    "1:\n\t"
-+        "subs       %[len], %[len], #8\n\t"
-+        "fmrrs      r3, r4, {s0, s1}\n\t"
-+        "fmrrs      r5, r6, {s2, s3}\n\t"
-+        "fmrrs      r7, r8, {s4, s5}\n\t"
-+        "fmrrs      ip, lr, {s6, s7}\n\t"
-+        "fldmiasgt  %[src]!, {s16-s23}\n\t"
-+        "ssat       r4, #16, r4\n\t"
-+        "ssat       r3, #16, r3\n\t"
-+        "ssat       r6, #16, r6\n\t"
-+        "ssat       r5, #16, r5\n\t"
-+        "pkhbt      r3, r3, r4, lsl #16\n\t"
-+        "pkhbt      r4, r5, r6, lsl #16\n\t"
-+        "ftosisgt   s0, s16\n\t"
-+        "ftosisgt   s1, s17\n\t"
-+        "ftosisgt   s2, s18\n\t"
-+        "ftosisgt   s3, s19\n\t"
-+        "ftosisgt   s4, s20\n\t"
-+        "ftosisgt   s5, s21\n\t"
-+        "ftosisgt   s6, s22\n\t"
-+        "ftosisgt   s7, s23\n\t"
-+        "ssat       r8, #16, r8\n\t"
-+        "ssat       r7, #16, r7\n\t"
-+        "ssat       lr, #16, lr\n\t"
-+        "ssat       ip, #16, ip\n\t"
-+        "pkhbt      r5, r7, r8, lsl #16\n\t"
-+        "pkhbt      r6, ip, lr, lsl #16\n\t"
-+        "stmia      %[dst]!, {r3-r6}\n\t"
-+        "bgt        1b\n\t"
-+
-+        : [dst] "+&r" (dst), [src] "+&r" (src), [len] "+&r" (len)
-+        :
-+        : "s0",  "s1",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",
-+          "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
-+          "r3", "r4", "r5", "r6", "r7", "r8", "ip", "lr",
-+          "cc", "memory");
-+}
-+#endif
-+
-+void ff_float_init_arm_vfp(DSPContext* c, AVCodecContext *avctx)
-+{
-+    c->vector_fmul = vector_fmul_vfp;
-+    c->vector_fmul_reverse = vector_fmul_reverse_vfp;
-+#ifdef HAVE_ARMV6
-+    c->float_to_int16 = float_to_int16_vfp;
-+#endif
-+}
-diff -Nurd mythtv.orig/libs/libavcodec/armv4l/h264dsp_neon.S mythtv/libs/libavcodec/armv4l/h264dsp_neon.S
---- mythtv.orig/libs/libavcodec/armv4l/h264dsp_neon.S	1970-01-01 01:00:00.000000000 +0100
-+++ mythtv/libs/libavcodec/armv4l/h264dsp_neon.S	2008-07-24 19:54:01.033198000 +0200
-@@ -0,0 +1,148 @@
-+/*
-+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
-+ *
-+ * This file is part of FFmpeg.
-+ *
-+ * FFmpeg is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU Lesser General Public
-+ * License as published by the Free Software Foundation; either
-+ * version 2.1 of the License, or (at your option) any later version.
-+ *
-+ * FFmpeg is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * Lesser General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU Lesser General Public
-+ * License along with FFmpeg; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
-+
-+        .fpu neon
-+
-+        .text
-+        .align
-+        .global ff_put_h264_chroma_mc8_neon
-+        .func   ff_put_h264_chroma_mc8_neon
-+/* void ff_put_h264_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride,
-+                                    int h, int x, int y) */
-+ff_put_h264_chroma_mc8_neon:
-+        push      {r4-r7}
-+        ldrd      r4, [sp, #16]
-+
-+        pld       [r1]
-+        pld       [r1, r2]
-+
-+        muls      r7, r4, r5
-+        rsb       r6, r7, r5, lsl #3
-+        rsb       ip, r7, r4, lsl #3
-+        sub       r4, r7, r4, lsl #3
-+        sub       r4, r4, r5, lsl #3
-+        add       r4, r4, #64
-+
-+        beq       2f
-+
-+        add       r5, r1, r2
-+
-+        vdup.8    d0, r4
-+        lsl       r4, r2, #1
-+        vdup.8    d1, ip
-+        vld1.64   {d4,d5}, [r1], r4
-+        vdup.8    d2, r6
-+        vld1.64   {d6,d7}, [r5], r4
-+        vdup.8    d3, r7
-+
-+        mov       r6, #32
-+        vext.8    d5, d4, d5, #1
-+        vdup.16   q12, r6
-+        vext.8    d7, d6, d7, #1
-+1:
-+        pld       [r5]
-+        vmull.u8  q8, d4, d0
-+        vmlal.u8  q8, d5, d1
-+        vld1.64   {d4,d5}, [r1], r4
-+        vmlal.u8  q8, d6, d2
-+        vext.8    d5, d4, d5, #1
-+        vmlal.u8  q8, d7, d3
-+        vmull.u8  q9, d6, d0
-+        vadd.i16  q8, q8, q12
-+        subs      r3, r3, #2
-+        vmlal.u8  q9, d7, d1
-+        vshrn.u16 d16, q8, #6
-+        vld1.64   {d6,d7}, [r5], r4
-+        vmlal.u8  q9, d4, d2
-+        vmlal.u8  q9, d5, d3
-+        pld       [r1]
-+        vadd.i16  q9, q9, q12
-+        vst1.64   {d16}, [r0,:64], r2
-+        vshrn.u16 d17, q9, #6
-+        vext.8    d7, d6, d7, #1
-+        vst1.64   {d17}, [r0,:64], r2
-+        bgt       1b
-+
-+        pop       {r4-r7}
-+        bx        lr
-+
-+2:
-+        tst       r6, r6
-+        add       ip, ip, r6
-+        vdup.8    d0, r4
-+        vdup.8    d1, ip
-+        mov       r6, #32
-+        vdup.16   q12, r6
-+
-+        beq       4f
-+
-+        add       r5, r1, r2
-+        lsl       r4, r2, #1
-+        vld1.64   {d4}, [r1], r4
-+        vld1.64   {d6}, [r5], r4
-+3:
-+        pld       [r5]
-+        vmull.u8  q8, d4, d0
-+        vmlal.u8  q8, d6, d1
-+        vld1.64   {d4}, [r1], r4
-+        vmull.u8  q9, d6, d0
-+        vadd.i16  q8, q8, q12
-+        vmlal.u8  q9, d4, d1
-+        vshrn.u16 d16, q8, #6
-+        vadd.i16  q9, q9, q12
-+        vst1.64   {d16}, [r0,:64], r2
-+        vshrn.u16 d17, q9, #6
-+        subs      r3, r3, #2
-+        vld1.64   {d6}, [r5], r4
-+        pld       [r1]
-+        vst1.64   {d17}, [r0,:64], r2
-+        bgt       3b
-+
-+        pop       {r4-r7}
-+        bx        lr
-+
-+4:
-+        vld1.64   {d4,d5}, [r1], r2
-+        vld1.64   {d6,d7}, [r1], r2
-+        vext.8    d5, d4, d5, #1
-+        vext.8    d7, d6, d7, #1
-+5:
-+        pld       [r1]
-+        subs      r3, r3, #2
-+        vmull.u8  q8, d4, d0
-+        vmlal.u8  q8, d5, d1
-+        vld1.64   {d4,d5}, [r1], r2
-+        vmull.u8  q9, d6, d0
-+        vmlal.u8  q9, d7, d1
-+        pld       [r1]
-+        vadd.i16  q8, q8, q12
-+        vadd.i16  q9, q9, q12
-+        vext.8    d5, d4, d5, #1
-+        vshrn.u16 d16, q8, #6
-+        vld1.64   {d6,d7}, [r1], r2
-+        vshrn.u16 d17, q9, #6
-+        vst1.64   {d16}, [r0,:64], r2
-+        vext.8    d7, d6, d7, #1
-+        vst1.64   {d17}, [r0,:64], r2
-+        bgt       5b
-+
-+        pop       {r4-r7}
-+        bx        lr
-+        .endfunc
-diff -Nurd mythtv.orig/libs/libavcodec/armv4l/mpegvideo_arm.c mythtv/libs/libavcodec/armv4l/mpegvideo_arm.c
---- mythtv.orig/libs/libavcodec/armv4l/mpegvideo_arm.c	2008-07-23 12:19:05.000000000 +0200
-+++ mythtv/libs/libavcodec/armv4l/mpegvideo_arm.c	2008-07-24 19:54:01.263198000 +0200
-@@ -18,9 +18,9 @@
-  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-  */
- 
--#include "dsputil.h"
--#include "mpegvideo.h"
--#include "avcodec.h"
-+#include "libavcodec/avcodec.h"
-+#include "libavcodec/dsputil.h"
-+#include "libavcodec/mpegvideo.h"
- 
- extern void MPV_common_init_iwmmxt(MpegEncContext *s);
- extern void MPV_common_init_armv5te(MpegEncContext *s);
-@@ -28,7 +28,7 @@
- void MPV_common_init_armv4l(MpegEncContext *s)
- {
-     /* IWMMXT support is a superset of armv5te, so
--     * allow optimised functions for armv5te unless
-+     * allow optimized functions for armv5te unless
-      * a better iwmmxt function exists
-      */
- #ifdef HAVE_ARMV5TE
-diff -Nurd mythtv.orig/libs/libavcodec/armv4l/mpegvideo_armv5te.c mythtv/libs/libavcodec/armv4l/mpegvideo_armv5te.c
---- mythtv.orig/libs/libavcodec/armv4l/mpegvideo_armv5te.c	2008-07-23 12:19:05.000000000 +0200
-+++ mythtv/libs/libavcodec/armv4l/mpegvideo_armv5te.c	2008-07-24 19:54:01.263198000 +0200
-@@ -19,9 +19,9 @@
-  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-  */
- 
--#include "dsputil.h"
--#include "mpegvideo.h"
--#include "avcodec.h"
-+#include "libavcodec/avcodec.h"
-+#include "libavcodec/dsputil.h"
-+#include "libavcodec/mpegvideo.h"
- 
- 
- #ifdef ENABLE_ARM_TESTS
-@@ -65,7 +65,7 @@
- ({ DCTELEM *xblock = xxblock; \
-    int xqmul = xxqmul, xqadd = xxqadd, xcount = xxcount, xtmp; \
-    int xdata1, xdata2; \
--__asm__ __volatile__( \
-+asm volatile( \
-         "subs %[count], %[count], #2       \n\t" \
-         "ble 2f                            \n\t" \
-         "ldrd r4, [%[block], #0]           \n\t" \
-diff -Nurd mythtv.orig/libs/libavcodec/armv4l/mpegvideo_iwmmxt.c mythtv/libs/libavcodec/armv4l/mpegvideo_iwmmxt.c
---- mythtv.orig/libs/libavcodec/armv4l/mpegvideo_iwmmxt.c	2008-07-23 12:19:05.000000000 +0200
-+++ mythtv/libs/libavcodec/armv4l/mpegvideo_iwmmxt.c	2008-07-24 19:54:01.273198000 +0200
-@@ -18,9 +18,9 @@
-  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-  */
- 
--#include "dsputil.h"
--#include "mpegvideo.h"
--#include "avcodec.h"
-+#include "libavcodec/avcodec.h"
-+#include "libavcodec/dsputil.h"
-+#include "libavcodec/mpegvideo.h"
- 
- static void dct_unquantize_h263_intra_iwmmxt(MpegEncContext *s,
-                                              DCTELEM *block, int n, int qscale)
-@@ -48,7 +48,7 @@
-     else
-         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
- 
--    __asm__ __volatile__ (
-+    asm volatile (
- /*      "movd %1, %%mm6                 \n\t" //qmul */
- /*      "packssdw %%mm6, %%mm6          \n\t" */
- /*      "packssdw %%mm6, %%mm6          \n\t" */
-diff -Nurd mythtv.orig/libs/libavcodec/armv4l/simple_idct_arm.S mythtv/libs/libavcodec/armv4l/simple_idct_arm.S
---- mythtv.orig/libs/libavcodec/armv4l/simple_idct_arm.S	2008-07-23 12:19:05.000000000 +0200
-+++ mythtv/libs/libavcodec/armv4l/simple_idct_arm.S	2008-07-24 19:54:01.503198000 +0200
-@@ -79,7 +79,7 @@
- 
- 
- __row_loop:
--        @@ read the row and check if it is null, almost null, or not, according to strongarm specs, it is not necessary to optimise ldr accesses (i.e. split 32bits in 2 16bits words), at least it gives more usable registers :)
-+        @@ read the row and check if it is null, almost null, or not, according to strongarm specs, it is not necessary to optimize ldr accesses (i.e. split 32bits in 2 16bits words), at least it gives more usable registers :)
-         ldr r1, [r14, #0]        @ R1=(int32)(R12)[0]=ROWr32[0] (relative row cast to a 32b pointer)
-         ldr r2, [r14, #4]        @ R2=(int32)(R12)[1]=ROWr32[1]
-         ldr r3, [r14, #8]        @ R3=ROWr32[2]
-@@ -421,7 +421,7 @@
-         @@ col[40] = ((a2 - b2) >> COL_SHIFT);
-         @@ col[48] = ((a1 - b1) >> COL_SHIFT);
-         @@ col[56] = ((a0 - b0) >> COL_SHIFT);
--        @@@@@ no optimisation here @@@@@
-+        @@@@@ no optimization here @@@@@
-         add r8, r6, r0           @ R8=a0+b0
-         add r9, r2, r1           @ R9=a1+b1
-         mov r8, r8, asr #COL_SHIFT
-diff -Nurd mythtv.orig/libs/libavcodec/armv4l/simple_idct_neon.S mythtv/libs/libavcodec/armv4l/simple_idct_neon.S
---- mythtv.orig/libs/libavcodec/armv4l/simple_idct_neon.S	1970-01-01 01:00:00.000000000 +0100
-+++ mythtv/libs/libavcodec/armv4l/simple_idct_neon.S	2008-07-24 19:54:01.503198000 +0200
-@@ -0,0 +1,388 @@
-+/*
-+ * ARM NEON IDCT
-+ *
-+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
-+ *
-+ * Based on Simple IDCT
-+ * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
-+ *
-+ * This file is part of FFmpeg.
-+ *
-+ * FFmpeg is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU Lesser General Public
-+ * License as published by the Free Software Foundation; either
-+ * version 2.1 of the License, or (at your option) any later version.
-+ *
-+ * FFmpeg is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * Lesser General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU Lesser General Public
-+ * License along with FFmpeg; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
-+
-+#define W1  22725  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-+#define W2  21407  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-+#define W3  19266  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-+#define W4  16383  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-+#define W5  12873  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-+#define W6  8867   //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-+#define W7  4520   //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-+#define W4c ((1<<(COL_SHIFT-1))/W4)
-+#define ROW_SHIFT 11
-+#define COL_SHIFT 20
-+
-+#define w1 d0[0]
-+#define w2 d0[1]
-+#define w3 d0[2]
-+#define w4 d0[3]
-+#define w5 d1[0]
-+#define w6 d1[1]
-+#define w7 d1[2]
-+#define w4c d1[3]
-+
-+        .fpu neon
-+
-+        .macro idct_col4_top
-+        vmull.s16 q7,  d6,  w2    /* q9   = W2 * col[2] */
-+        vmull.s16 q8,  d6,  w6    /* q10  = W6 * col[2] */
-+        vmull.s16 q9,  d4,  w1    /* q9  = W1 * col[1] */
-+        vadd.i32  q11, q15, q7
-+        vmull.s16 q10, d4,  w3    /* q10 = W3 * col[1] */
-+        vadd.i32  q12, q15, q8
-+        vmull.s16 q5,  d4,  w5    /* q5  = W5 * col[1] */
-+        vsub.i32  q13, q15, q8
-+        vmull.s16 q6,  d4,  w7    /* q6  = W7 * col[1] */
-+        vsub.i32  q14, q15, q7
-+
-+        vmlal.s16 q9,  d8, w3     /* q9  += W3 * col[3] */
-+        vmlsl.s16 q10, d8, w7     /* q10 -= W7 * col[3] */
-+        vmlsl.s16 q5,  d8, w1     /* q5  -= W1 * col[3] */
-+        vmlsl.s16 q6,  d8, w5     /* q6  -= W5 * col[3] */
-+        .endm
-+
-+        .macro idct_col4_mid1
-+        vmull.s16 q7,  d3,  w4    /* q7 = W4 * col[4] */
-+        vadd.i32  q11, q11, q7
-+        vsub.i32  q12, q12, q7
-+        vsub.i32  q13, q13, q7
-+        vadd.i32  q14, q14, q7
-+        .endm
-+
-+        .macro idct_col4_mid2
-+        vmlal.s16 q9,  d5, w5     /* q9  += W5 * col[5] */
-+        vmlsl.s16 q10, d5, w1     /* q10 -= W1 * col[5] */
-+        vmlal.s16 q5,  d5, w7     /* q5  += W7 * col[5] */
-+        vmlal.s16 q6,  d5, w3     /* q6  += W3 * col[5] */
-+        .endm
-+
-+        .macro idct_col4_mid3
-+        vmull.s16 q7,  d7, w6     /* q7 = W6 * col[6] */
-+        vmull.s16 q8,  d7, w2     /* q8 = W2 * col[6] */
-+        vadd.i32  q11, q11, q7
-+        vsub.i32  q12, q12, q8
-+        vadd.i32  q13, q13, q8
-+        vsub.i32  q14, q14, q7
-+        .endm
-+
-+        .macro idct_col4_mid4
-+        vmlal.s16 q9,  d9, w7
-+        vmlsl.s16 q10, d9, w5
-+        vmlal.s16 q5,  d9, w3
-+        vmlsl.s16 q6,  d9, w1
-+        .endm
-+
-+        .macro idct_col4_mid
-+        vmull.s16 q7,  d3,  w4    /* q7   = W4 * col[4] */
-+        vmlal.s16 q9,  d5,  w5    /* q9  += W5 * col[5] */
-+        vmlsl.s16 q10, d5,  w1    /* q10 -= W1 * col[5] */
-+        vadd.i32  q11, q11, q7
-+        vmull.s16 q8,  d7,  w2    /* q8   = W2 * col[6] */
-+        vsub.i32  q12, q12, q7
-+        vmlal.s16 q5,  d5,  w7    /* q5  += W7 * col[5] */
-+        vsub.i32  q13, q13, q7
-+        vmlal.s16 q6,  d5,  w3    /* q6  += W3 * col[5] */
-+        vadd.i32  q14, q14, q7
-+        vmull.s16 q7,  d7,  w6    /* q7   = W6 * col[6] */
-+        vadd.i32  q11, q11, q7
-+        vmlal.s16 q9,  d9,  w7
-+        vsub.i32  q12, q12, q8
-+        vmlsl.s16 q10, d9,  w5
-+        vadd.i32  q13, q13, q8
-+        vmlal.s16 q5,  d9,  w3
-+        vsub.i32  q14, q14, q7
-+        vmlsl.s16 q6,  d9,  w1
-+        .endm
-+
-+        .macro idct_col4_end
-+        vadd.i32 q3,  q11, q9
-+        vadd.i32 q4,  q12, q10
-+        vadd.i32 q7,  q13, q5
-+        vadd.i32 q8,  q14, q6
-+        vsub.i32 q11, q11, q9
-+        vsub.i32 q12, q12, q10
-+        vsub.i32 q13, q13, q5
-+        vsub.i32 q14, q14, q6
-+        .endm
-+
-+	.text
-+        .align
-+        .type idct_row4_neon, %function
-+        .func idct_row4_neon
-+idct_row4_neon:
-+        vld1.64 {d2,d3}, [a3,:128]!
-+        vld1.64 {d4,d5}, [a3,:128]!
-+        vld1.64 {d6,d7}, [a3,:128]!
-+        vld1.64 {d8,d9}, [a3,:128]!
-+        add a3, a3, #-64
-+
-+        vmov.i32  q15, #(1<<(ROW_SHIFT-1))
-+        vorr      d10, d3,  d5
-+        vtrn.16   q1,  q2
-+        vorr      d11, d7,  d9
-+        vtrn.16   q3,  q4
-+        vorr      d10, d10, d11
-+        vtrn.32   q1,  q3
-+        vmlal.s16 q15, d2,  w4    /* q15 += W4 * col[0] */
-+        vtrn.32   q2,  q4
-+        vmov      a4,  v1,  d10
-+
-+        idct_col4_top
-+
-+        orrs a4, a4, v1
-+        beq 1f
-+        idct_col4_mid
-+1:
-+        vadd.i32 q3,  q11, q9
-+        vadd.i32 q4,  q12, q10
-+        vshrn.i32 d2, q3,  #ROW_SHIFT
-+        vadd.i32 q7,  q13, q5
-+        vshrn.i32 d4, q4,  #ROW_SHIFT
-+        vadd.i32 q8,  q14, q6
-+        vshrn.i32 d6, q7,  #ROW_SHIFT
-+        vsub.i32 q11, q11, q9
-+        vshrn.i32 d8, q8,  #ROW_SHIFT
-+        vsub.i32 q12, q12, q10
-+        vshrn.i32 d9, q11, #ROW_SHIFT
-+        vsub.i32 q13, q13, q5
-+        vshrn.i32 d7, q12, #ROW_SHIFT
-+        vsub.i32 q14, q14, q6
-+        vshrn.i32 d5, q13, #ROW_SHIFT
-+        vshrn.i32 d3, q14, #ROW_SHIFT
-+
-+        vtrn.16   q1, q2
-+        vtrn.16   q3, q4
-+        vtrn.32   q1, q3
-+        vtrn.32   q2, q4
-+
-+        vst1.64 {d2,d3}, [a3,:128]!
-+        vst1.64 {d4,d5}, [a3,:128]!
-+        vst1.64 {d6,d7}, [a3,:128]!
-+        vst1.64 {d8,d9}, [a3,:128]!
-+
-+        mov pc, lr
-+        .endfunc
-+
-+        .align
-+        .type idct_col4_neon, %function
-+        .func idct_col4_neon
-+idct_col4_neon:
-+        mov ip, #16
-+        vld1.64 {d2}, [a3,:64], ip /* d2 = col[0] */
-+        vld1.64 {d4}, [a3,:64], ip /* d3 = col[1] */
-+        vld1.64 {d6}, [a3,:64], ip /* d4 = col[2] */
-+        vld1.64 {d8}, [a3,:64], ip /* d5 = col[3] */
-+        vld1.64 {d3}, [a3,:64], ip /* d6 = col[4] */
-+        vld1.64 {d5}, [a3,:64], ip /* d7 = col[5] */
-+        vld1.64 {d7}, [a3,:64], ip /* d8 = col[6] */
-+        vld1.64 {d9}, [a3,:64], ip /* d9 = col[7] */
-+
-+        vrev64.32 d11, d3
-+        vrev64.32 d13, d5
-+        vorr      d11, d3, d11
-+        vrev64.32 d15, d7
-+        vorr      d13, d5, d13
-+        vrev64.32 d17, d9
-+        vorr      d15, d7, d15
-+        vmov.32   v1,  d11[0]
-+        vmov.32   v2,  d13[0]
-+        vorr      d17, d9, d17
-+        vmov.32   v3,  d15[0]
-+        vmov.32   ip,  d17[0]
-+        vdup.16   d30, w4c
-+        vadd.i16  d30, d30, d2
-+        vmull.s16 q15, d30, w4 /* q15 = W4 * (col[0]+(1<<(COL_SHIFT-1))/W4) */
-+
-+        idct_col4_top
-+        tst v1, v1
-+        beq 1f
-+        idct_col4_mid1
-+1:      tst v2, v2
-+        beq 2f
-+        idct_col4_mid2
-+2:      tst v3, v3
-+        beq 3f
-+        idct_col4_mid3
-+3:      tst ip, ip
-+        beq 4f
-+        idct_col4_mid4
-+4:
-+        idct_col4_end
-+
-+        vshr.s32  q2, q3,  #COL_SHIFT
-+        vshr.s32  q3, q4,  #COL_SHIFT
-+        vmovn.i32 d2, q2
-+        vshr.s32  q4, q7,  #COL_SHIFT
-+        vmovn.i32 d3, q3
-+        vshr.s32  q5, q8,  #COL_SHIFT
-+        vmovn.i32 d4, q4
-+        vshr.s32  q6, q14, #COL_SHIFT
-+        vmovn.i32 d5, q5
-+        vshr.s32  q7, q13, #COL_SHIFT
-+        vmovn.i32 d6, q6
-+        vshr.s32  q8, q12, #COL_SHIFT
-+        vmovn.i32 d7, q7
-+        vshr.s32  q9, q11, #COL_SHIFT
-+        vmovn.i32 d8, q8
-+        vmovn.i32 d9, q9
-+
-+        mov pc, lr
-+        .endfunc
-+
-+        .macro idct_col4_st16
-+        mov ip, #16
-+        vst1.64 {d2}, [a3,:64], ip
-+        vst1.64 {d3}, [a3,:64], ip
-+        vst1.64 {d4}, [a3,:64], ip
-+        vst1.64 {d5}, [a3,:64], ip
-+        vst1.64 {d6}, [a3,:64], ip
-+        vst1.64 {d7}, [a3,:64], ip
-+        vst1.64 {d8}, [a3,:64], ip
-+        vst1.64 {d9}, [a3,:64], ip
-+        .endm
-+
-+        .align
-+        .type idct_col4_add8, %function
-+        .func idct_col4_add8
-+idct_col4_add8:
-+        vld1.32 {d10[0]}, [a1,:32], a2
-+        vld1.32 {d10[1]}, [a1,:32], a2
-+        vld1.32 {d11[0]}, [a1,:32], a2
-+        vld1.32 {d11[1]}, [a1,:32], a2
-+        vld1.32 {d12[0]}, [a1,:32], a2
-+        vld1.32 {d12[1]}, [a1,:32], a2
-+        vld1.32 {d13[0]}, [a1,:32], a2
-+        vld1.32 {d13[1]}, [a1,:32], a2
-+
-+        vaddw.u8 q1, q1, d10
-+        vaddw.u8 q2, q2, d11
-+        vaddw.u8 q3, q3, d12
-+        vaddw.u8 q4, q4, d13
-+
-+        sub a1, a1, a2, lsl #3
-+        .endfunc
-+
-+        .type idct_col4_st8, %function
-+        .func idct_col4_st8
-+idct_col4_st8:
-+        vqmovun.s16 d2, q1
-+        vqmovun.s16 d3, q2
-+        vqmovun.s16 d4, q3
-+        vqmovun.s16 d5, q4
-+
-+        vst1.32 {d2[0]}, [a1,:32], a2
-+        vst1.32 {d2[1]}, [a1,:32], a2
-+        vst1.32 {d3[0]}, [a1,:32], a2
-+        vst1.32 {d3[1]}, [a1,:32], a2
-+        vst1.32 {d4[0]}, [a1,:32], a2
-+        vst1.32 {d4[1]}, [a1,:32], a2
-+        vst1.32 {d5[0]}, [a1,:32], a2
-+        vst1.32 {d5[1]}, [a1,:32], a2
-+        mov pc, lr
-+        .endfunc
-+
-+        .align 4
-+const:  .short W1, W2, W3, W4, W5, W6, W7, W4c
-+
-+        .macro idct_start data
-+        pld [\data]
-+        pld [\data, #64]
-+        push {v1-v3, lr}
-+        vpush {d8-d15}
-+        adr a4, const
-+        vld1.64 {d0,d1}, [a4,:128]
-+        .endm
-+
-+        .macro idct_end
-+        vpop {d8-d15}
-+        pop {v1-v3, pc}
-+        .endm
-+
-+        .align
-+        .global ff_simple_idct_neon
-+        .type ff_simple_idct_neon, %function
-+        .func ff_simple_idct_neon
-+/* void ff_simple_idct_neon(DCTELEM *data); */
-+ff_simple_idct_neon:
-+        idct_start a1
-+
-+        mov a3, a1
-+        bl idct_row4_neon
-+        bl idct_row4_neon
-+        add a3, a3, #-128
-+        bl idct_col4_neon
-+        add a3, a3, #-128
-+        idct_col4_st16
-+        add a3, a3, #-120
-+        bl idct_col4_neon
-+        add a3, a3, #-128
-+        idct_col4_st16
-+
-+        idct_end
-+        .endfunc
-+
-+        .align
-+        .global ff_simple_idct_put_neon
-+        .type ff_simple_idct_put_neon, %function
-+        .func ff_simple_idct_put_neon
-+/* void ff_simple_idct_put_neon(uint8_t *dst, int line_size, DCTELEM *data); */
-+ff_simple_idct_put_neon:
-+        idct_start a3
-+
-+        bl idct_row4_neon
-+        bl idct_row4_neon
-+        add a3, a3, #-128
-+        bl idct_col4_neon
-+        bl idct_col4_st8
-+        sub a1, a1, a2, lsl #3
-+        add a1, a1, #4
-+        add a3, a3, #-120
-+        bl idct_col4_neon
-+        bl idct_col4_st8
-+
-+        idct_end
-+        .endfunc
-+
-+        .align
-+        .global ff_simple_idct_add_neon
-+        .type ff_simple_idct_add_neon, %function
-+        .func ff_simple_idct_add_neon
-+/* void ff_simple_idct_add_neon(uint8_t *dst, int line_size, DCTELEM *data); */
-+ff_simple_idct_add_neon:
-+        idct_start a3
-+
-+        bl idct_row4_neon
-+        bl idct_row4_neon
-+        add a3, a3, #-128
-+        bl idct_col4_neon
-+        bl idct_col4_add8
-+        sub a1, a1, a2, lsl #3
-+        add a1, a1, #4
-+        add a3, a3, #-120
-+        bl idct_col4_neon
-+        bl idct_col4_add8
-+
-+        idct_end
-+        .endfunc
-diff -Nurd mythtv.orig/libs/libavcodec/avcodec.h mythtv/libs/libavcodec/avcodec.h
---- mythtv.orig/libs/libavcodec/avcodec.h	2008-07-23 12:19:11.000000000 +0200
-+++ mythtv/libs/libavcodec/avcodec.h	2008-07-24 19:56:46.953198000 +0200
-@@ -1328,6 +1328,8 @@
- #define FF_IDCT_SIMPLEARMV6   17
- #define FF_IDCT_SIMPLEVIS     18
- #define FF_IDCT_WMV2          19
-+#define FF_IDCT_FAAN          20
-+#define FF_IDCT_SIMPLENEON    21
- 
-     /**
-      * slice count
-diff -Nurd mythtv.orig/libs/libavcodec/libavcodec.pro mythtv/libs/libavcodec/libavcodec.pro
---- mythtv.orig/libs/libavcodec/libavcodec.pro	2008-07-23 12:19:10.000000000 +0200
-+++ mythtv/libs/libavcodec/libavcodec.pro	2008-07-24 19:54:01.503198000 +0200
-@@ -413,6 +413,8 @@
- 
- contains( HAVE_ARMV6, yes )      { SOURCES += armv4l/simple_idct_armv6.S }
- 
-+contains( HAVE_NEON, yes )      { SOURCES += armv4l/float_arm_vfp.c armv4l/simple_idct_neon.S armv4l/dsputil_neon.c armv4l/h264dsp_neon.S }
-+
- contains( HAVE_VIS, yes ) {
-     SOURCES += sparc/dsputil_vis.c
-     SOURCES += sparc/simple_idct_vis.c
-diff -Nurd mythtv.orig/libs/libavcodec/utils.c mythtv/libs/libavcodec/utils.c
---- mythtv.orig/libs/libavcodec/utils.c	2008-07-23 12:19:10.000000000 +0200
-+++ mythtv/libs/libavcodec/utils.c	2008-07-24 19:58:12.403198000 +0200
-@@ -594,6 +594,7 @@
- {"sh4", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_SH4, INT_MIN, INT_MAX, V|E|D, "idct"},
- {"simplearm", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_SIMPLEARM, INT_MIN, INT_MAX, V|E|D, "idct"},
- {"simplearmv5te", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_SIMPLEARMV5TE, INT_MIN, INT_MAX, V|E|D, "idct"},
-+{"simpleneon", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_SIMPLENEON, INT_MIN, INT_MAX, V|E|D, "idct"},
- {"h264", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_H264, INT_MIN, INT_MAX, V|E|D, "idct"},
- {"vp3", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_VP3, INT_MIN, INT_MAX, V|E|D, "idct"},
- {"ipp", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_IPP, INT_MIN, INT_MAX, V|E|D, "idct"},