mythtv 0.21: enable appropriate ARM optimization dependant on cpu and patch in NEON support for video

author: Koen Kooi <koen@openembedded.org> 2008-07-24 18:19:14 +0000
committer: Koen Kooi <koen@openembedded.org> 2008-07-24 18:19:14 +0000
commit: 45f4f7874cb1ff999638eab0561ba310bebfd11d (patch)
tree: 5e5fecf7041e6193b3bdbdc720441d905cff305a
parent: cb8ecbc2191f9a29bf4c669bd7161b24b44513ba (diff)
13 files changed, 1721 insertions, 5 deletions
diff --git a/packages/mythtv/files/armv5te/.mtn2git_empty b/packages/mythtv/files/armv5te/.mtn2git_empty
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/packages/mythtv/files/armv5te/.mtn2git_empty
diff --git a/packages/mythtv/files/armv5te/configh b/packages/mythtv/files/armv5te/configh
new file mode 100644
index 0000000000..46c647e2d5
--- /dev/null
+++ b/packages/mythtv/files/armv5te/configh
@@ -0,0 +1,6 @@
+#define HAVE_LLRINT 1
+#define HAVE_ROUNDF 1
+#define ARCH_ARMV4L 1
+#define ENABLE_ARMV4L 1
+#define HAVE_ARMV5TE 1
+#define ENABLE_ARMV5TE 1
diff --git a/packages/mythtv/files/armv5te/configmak b/packages/mythtv/files/armv5te/configmak
new file mode 100644
index 0000000000..aa9978515d
--- /dev/null
+++ b/packages/mythtv/files/armv5te/configmak
@@ -0,0 +1,3 @@
+ARCH_ARMV4L=yes
+HAVE_ARMV5TE=yes
+
diff --git a/packages/mythtv/files/armv6/.mtn2git_empty b/packages/mythtv/files/armv6/.mtn2git_empty
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/packages/mythtv/files/armv6/.mtn2git_empty
diff --git a/packages/mythtv/files/armv6/configh b/packages/mythtv/files/armv6/configh
new file mode 100644
index 0000000000..2301e723d6
--- /dev/null
+++ b/packages/mythtv/files/armv6/configh
@@ -0,0 +1,8 @@
+#define HAVE_LLRINT 1
+#define HAVE_ROUNDF 1
+#define ARCH_ARMV4L 1
+#define ENABLE_ARMV4L 1
+#define HAVE_ARMV5TE 1
+#define ENABLE_ARMV5TE 1
+#define HAVE_ARMV6 1
+#define ENABLE_ARMV6 1
diff --git a/packages/mythtv/files/armv6/configmak b/packages/mythtv/files/armv6/configmak
new file mode 100644
index 0000000000..4db5dc0dfd
--- /dev/null
+++ b/packages/mythtv/files/armv6/configmak
@@ -0,0 +1,3 @@
+ARCH_ARMV4L=yes
+HAVE_ARMV5TE=yes
+HAVE_ARMV6=yes
diff --git a/packages/mythtv/files/armv7a/.mtn2git_empty b/packages/mythtv/files/armv7a/.mtn2git_empty
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/packages/mythtv/files/armv7a/.mtn2git_empty
diff --git a/packages/mythtv/files/armv7a/configh b/packages/mythtv/files/armv7a/configh
new file mode 100644
index 0000000000..245e40f56a
--- /dev/null
+++ b/packages/mythtv/files/armv7a/configh
@@ -0,0 +1,14 @@
+#define HAVE_LLRINT 1
+#define HAVE_ROUNDF 1
+#define ARCH_ARMV4L 1
+#define ENABLE_ARMV4L 1
+#define HAVE_ARMV5TE 1
+#define ENABLE_ARMV5TE 1
+#define HAVE_ARMV6 1
+#define ENABLE_ARMV6 1
+#define HAVE_ARMV6T2 1
+#define ENABLE_ARMV6T2 1
+#define HAVE_ARMVFP 1
+#define ENABLE_ARMVFP 1
+#define HAVE_NEON 1
+#define ENABLE_NEON 1
diff --git a/packages/mythtv/files/armv7a/configmak b/packages/mythtv/files/armv7a/configmak
new file mode 100644
index 0000000000..50d549f794
--- /dev/null
+++ b/packages/mythtv/files/armv7a/configmak
@@ -0,0 +1,6 @@
+ARCH_ARMV4L=yes
+HAVE_ARMV5TE=yes
+HAVE_ARMV6=yes
+HAVE_ARMV6T2=yes
+HAVE_ARMVFP=yes
+HAVE_NEON=yes
diff --git a/packages/mythtv/files/configh b/packages/mythtv/files/configh
new file mode 100644
index 0000000000..2fe7658383
--- /dev/null
+++ b/packages/mythtv/files/configh
@@ -0,0 +1,2 @@
+#define HAVE_LLRINT 1
+#define HAVE_ROUNDF 1
diff --git a/packages/mythtv/files/configmak b/packages/mythtv/files/configmak
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/packages/mythtv/files/configmak
diff --git a/packages/mythtv/mythtv-0.21/ffmpeg-arm-update.diff b/packages/mythtv/mythtv-0.21/ffmpeg-arm-update.diff
new file mode 100644
index 0000000000..5abf52fcbb
--- /dev/null
+++ b/packages/mythtv/mythtv-0.21/ffmpeg-arm-update.diff
@@ -0,0 +1,1669 @@
+diff -Nurd mythtv.orig/libs/libavcodec/armv4l/dsputil_arm.c mythtv/libs/libavcodec/armv4l/dsputil_arm.c
+--- mythtv.orig/libs/libavcodec/armv4l/dsputil_arm.c	2008-07-23 12:19:05.000000000 +0200
++++ mythtv/libs/libavcodec/armv4l/dsputil_arm.c	2008-07-24 19:54:00.753198000 +0200
+@@ -19,12 +19,14 @@
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
+ 
+-#include "dsputil.h"
++#include "libavcodec/dsputil.h"
+ #ifdef HAVE_IPP
+-#include "ipp.h"
++#include <ipp.h>
+ #endif
+ 
+ extern void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx);
++extern void ff_float_init_arm_vfp(DSPContext* c, AVCodecContext *avctx);
++extern void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx);
+ 
+ extern void j_rev_dct_ARM(DCTELEM *data);
+ extern void simple_idct_ARM(DCTELEM *data);
+@@ -41,6 +43,12 @@
+ extern void ff_simple_idct_add_armv6(uint8_t *dest, int line_size,
+                                      DCTELEM *data);
+ 
++extern void ff_simple_idct_neon(DCTELEM *data);
++extern void ff_simple_idct_put_neon(uint8_t *dest, int line_size,
++                                    DCTELEM *data);
++extern void ff_simple_idct_add_neon(uint8_t *dest, int line_size,
++                                    DCTELEM *data);
++
+ /* XXX: local hack */
+ static void (*ff_put_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
+ static void (*ff_add_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
+@@ -202,6 +210,24 @@
+ }
+ #endif
+ 
++#ifdef HAVE_ARMV5TE
++static void prefetch_arm(void *mem, int stride, int h)
++{
++    asm volatile(
++        "1:              \n\t"
++        "subs %0, %0, #1 \n\t"
++        "pld  [%1]       \n\t"
++        "add  %1, %1, %2 \n\t"
++        "bgt  1b         \n\t"
++        : "+r"(h), "+r"(mem) : "r"(stride));
++}
++#endif
++
++int mm_support(void)
++{
++    return ENABLE_IWMMXT * MM_IWMMXT;
++}
++
+ void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx)
+ {
+     int idct_algo= avctx->idct_algo;
+@@ -209,49 +235,60 @@
+     ff_put_pixels_clamped = c->put_pixels_clamped;
+     ff_add_pixels_clamped = c->add_pixels_clamped;
+ 
+-    if(idct_algo == FF_IDCT_AUTO){
++    if (avctx->lowres == 0) {
++        if(idct_algo == FF_IDCT_AUTO){
+ #if defined(HAVE_IPP)
+-        idct_algo = FF_IDCT_IPP;
++            idct_algo = FF_IDCT_IPP;
++#elif defined(HAVE_NEON)
++            idct_algo = FF_IDCT_SIMPLENEON;
+ #elif defined(HAVE_ARMV6)
+-        idct_algo = FF_IDCT_SIMPLEARMV6;
++            idct_algo = FF_IDCT_SIMPLEARMV6;
+ #elif defined(HAVE_ARMV5TE)
+-        idct_algo = FF_IDCT_SIMPLEARMV5TE;
++            idct_algo = FF_IDCT_SIMPLEARMV5TE;
+ #else
+-        idct_algo = FF_IDCT_ARM;
++            idct_algo = FF_IDCT_ARM;
+ #endif
+-    }
++        }
+ 
+-    if(idct_algo==FF_IDCT_ARM){
+-        c->idct_put= j_rev_dct_ARM_put;
+-        c->idct_add= j_rev_dct_ARM_add;
+-        c->idct    = j_rev_dct_ARM;
+-        c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;/* FF_NO_IDCT_PERM */
+-    } else if (idct_algo==FF_IDCT_SIMPLEARM){
+-        c->idct_put= simple_idct_ARM_put;
+-        c->idct_add= simple_idct_ARM_add;
+-        c->idct    = simple_idct_ARM;
+-        c->idct_permutation_type= FF_NO_IDCT_PERM;
++        if(idct_algo==FF_IDCT_ARM){
++            c->idct_put= j_rev_dct_ARM_put;
++            c->idct_add= j_rev_dct_ARM_add;
++            c->idct    = j_rev_dct_ARM;
++            c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;/* FF_NO_IDCT_PERM */
++        } else if (idct_algo==FF_IDCT_SIMPLEARM){
++            c->idct_put= simple_idct_ARM_put;
++            c->idct_add= simple_idct_ARM_add;
++            c->idct    = simple_idct_ARM;
++            c->idct_permutation_type= FF_NO_IDCT_PERM;
+ #ifdef HAVE_ARMV6
+-    } else if (idct_algo==FF_IDCT_SIMPLEARMV6){
+-        c->idct_put= ff_simple_idct_put_armv6;
+-        c->idct_add= ff_simple_idct_add_armv6;
+-        c->idct    = ff_simple_idct_armv6;
+-        c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
++        } else if (idct_algo==FF_IDCT_SIMPLEARMV6){
++            c->idct_put= ff_simple_idct_put_armv6;
++            c->idct_add= ff_simple_idct_add_armv6;
++            c->idct    = ff_simple_idct_armv6;
++            c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
+ #endif
+ #ifdef HAVE_ARMV5TE
+-    } else if (idct_algo==FF_IDCT_SIMPLEARMV5TE){
+-        c->idct_put= simple_idct_put_armv5te;
+-        c->idct_add= simple_idct_add_armv5te;
+-        c->idct    = simple_idct_armv5te;
+-        c->idct_permutation_type = FF_NO_IDCT_PERM;
++        } else if (idct_algo==FF_IDCT_SIMPLEARMV5TE){
++            c->idct_put= simple_idct_put_armv5te;
++            c->idct_add= simple_idct_add_armv5te;
++            c->idct    = simple_idct_armv5te;
++            c->idct_permutation_type = FF_NO_IDCT_PERM;
+ #endif
+ #ifdef HAVE_IPP
+-    } else if (idct_algo==FF_IDCT_IPP){
+-        c->idct_put= simple_idct_ipp_put;
+-        c->idct_add= simple_idct_ipp_add;
+-        c->idct    = simple_idct_ipp;
+-        c->idct_permutation_type= FF_NO_IDCT_PERM;
++        } else if (idct_algo==FF_IDCT_IPP){
++            c->idct_put= simple_idct_ipp_put;
++            c->idct_add= simple_idct_ipp_add;
++            c->idct    = simple_idct_ipp;
++            c->idct_permutation_type= FF_NO_IDCT_PERM;
++#endif
++#ifdef HAVE_NEON
++        } else if (idct_algo==FF_IDCT_SIMPLENEON){
++            c->idct_put= ff_simple_idct_put_neon;
++            c->idct_add= ff_simple_idct_add_neon;
++            c->idct    = ff_simple_idct_neon;
++            c->idct_permutation_type = FF_NO_IDCT_PERM;
+ #endif
++        }
+     }
+ 
+     c->put_pixels_tab[0][0] = put_pixels16_arm;
+@@ -271,7 +308,17 @@
+     c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_arm; //OK
+     c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_arm;
+ 
++#ifdef HAVE_ARMV5TE
++    c->prefetch = prefetch_arm;
++#endif
++
+ #ifdef HAVE_IWMMXT
+     dsputil_init_iwmmxt(c, avctx);
+ #endif
++#ifdef HAVE_ARMVFP
++    ff_float_init_arm_vfp(c, avctx);
++#endif
++#ifdef HAVE_NEON
++    ff_dsputil_init_neon(c, avctx);
++#endif
+ }
+diff -Nurd mythtv.orig/libs/libavcodec/armv4l/dsputil_arm_s.S mythtv/libs/libavcodec/armv4l/dsputil_arm_s.S
+--- mythtv.orig/libs/libavcodec/armv4l/dsputil_arm_s.S	2008-07-23 12:19:05.000000000 +0200
++++ mythtv/libs/libavcodec/armv4l/dsputil_arm_s.S	2008-07-24 19:54:00.753198000 +0200
+@@ -19,6 +19,13 @@
+ @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ @
+ 
++#include "config.h"
++
++#ifndef HAVE_PLD
++.macro pld reg
++.endm
++#endif
++
+ .macro  ADJ_ALIGN_QUADWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
+         mov \Rd0, \Rn0, lsr #(\shift * 8)
+         mov \Rd1, \Rn1, lsr #(\shift * 8)
+diff -Nurd mythtv.orig/libs/libavcodec/armv4l/dsputil_iwmmxt.c mythtv/libs/libavcodec/armv4l/dsputil_iwmmxt.c
+--- mythtv.orig/libs/libavcodec/armv4l/dsputil_iwmmxt.c	2008-07-23 12:19:05.000000000 +0200
++++ mythtv/libs/libavcodec/armv4l/dsputil_iwmmxt.c	2008-07-24 19:54:00.753198000 +0200
+@@ -19,10 +19,10 @@
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
+ 
+-#include "dsputil.h"
++#include "libavcodec/dsputil.h"
+ 
+ #define DEF(x, y) x ## _no_rnd_ ## y ##_iwmmxt
+-#define SET_RND(regd)  __asm__ __volatile__ ("mov r12, #1 \n\t tbcsth " #regd ", r12":::"r12");
++#define SET_RND(regd)  asm volatile ("mov r12, #1 \n\t tbcsth " #regd ", r12":::"r12");
+ #define WAVG2B "wavg2b"
+ #include "dsputil_iwmmxt_rnd.h"
+ #undef DEF
+@@ -30,7 +30,7 @@
+ #undef WAVG2B
+ 
+ #define DEF(x, y) x ## _ ## y ##_iwmmxt
+-#define SET_RND(regd)  __asm__ __volatile__ ("mov r12, #2 \n\t tbcsth " #regd ", r12":::"r12");
++#define SET_RND(regd)  asm volatile ("mov r12, #2 \n\t tbcsth " #regd ", r12":::"r12");
+ #define WAVG2B "wavg2br"
+ #include "dsputil_iwmmxt_rnd.h"
+ #undef DEF
+@@ -89,7 +89,7 @@
+ {
+     uint8_t *pixels2 = pixels + line_size;
+ 
+-    __asm__ __volatile__ (
++    asm volatile (
+         "mov            r12, #4                 \n\t"
+         "1:                                     \n\t"
+         "pld            [%[pixels], %[line_size2]]              \n\t"
+@@ -125,7 +125,7 @@
+ 
+ static void clear_blocks_iwmmxt(DCTELEM *blocks)
+ {
+-    __asm __volatile(
++    asm volatile(
+                 "wzero wr0                      \n\t"
+                 "mov r1, #(128 * 6 / 32)        \n\t"
+                 "1:                             \n\t"
+diff -Nurd mythtv.orig/libs/libavcodec/armv4l/dsputil_iwmmxt_rnd.h mythtv/libs/libavcodec/armv4l/dsputil_iwmmxt_rnd.h
+--- mythtv.orig/libs/libavcodec/armv4l/dsputil_iwmmxt_rnd.h	2008-07-23 12:19:05.000000000 +0200
++++ mythtv/libs/libavcodec/armv4l/dsputil_iwmmxt_rnd.h	2008-07-24 19:54:01.023198000 +0200
+@@ -19,13 +19,14 @@
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
+ 
+-#ifndef FFMPEG_DSPUTIL_IWMMXT_RND_H
+-#define FFMPEG_DSPUTIL_IWMMXT_RND_H
++/* This header intentionally has no multiple inclusion guards. It is meant to
++ * be included multiple times and generates different code depending on the
++ * value of certain #defines. */
+ 
+ void DEF(put, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+ {
+     int stride = line_size;
+-    __asm__ __volatile__ (
++    asm volatile (
+         "and r12, %[pixels], #7 \n\t"
+         "bic %[pixels], %[pixels], #7 \n\t"
+         "tmcr wcgr1, r12 \n\t"
+@@ -59,7 +60,7 @@
+ void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+ {
+     int stride = line_size;
+-    __asm__ __volatile__ (
++    asm volatile (
+         "and r12, %[pixels], #7 \n\t"
+         "bic %[pixels], %[pixels], #7 \n\t"
+         "tmcr wcgr1, r12 \n\t"
+@@ -101,7 +102,7 @@
+ void DEF(put, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+ {
+     int stride = line_size;
+-    __asm__ __volatile__ (
++    asm volatile (
+         "and r12, %[pixels], #7 \n\t"
+         "bic %[pixels], %[pixels], #7 \n\t"
+         "tmcr wcgr1, r12 \n\t"
+@@ -141,7 +142,7 @@
+ void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+ {
+     int stride = line_size;
+-    __asm__ __volatile__ (
++    asm volatile (
+         "pld [%[pixels]]                \n\t"
+         "pld [%[pixels], #32]           \n\t"
+         "pld [%[block]]                 \n\t"
+@@ -200,7 +201,7 @@
+     // [wr0 wr1 wr2 wr3] for previous line
+     // [wr4 wr5 wr6 wr7] for current line
+     SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
+-    __asm__ __volatile__(
++    asm volatile(
+         "pld [%[pixels]]                \n\t"
+         "pld [%[pixels], #32]           \n\t"
+         "and r12, %[pixels], #7         \n\t"
+@@ -249,7 +250,7 @@
+     // [wr0 wr1 wr2 wr3] for previous line
+     // [wr4 wr5 wr6 wr7] for current line
+     SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
+-    __asm__ __volatile__(
++    asm volatile(
+         "pld [%[pixels]]                \n\t"
+         "pld [%[pixels], #32]           \n\t"
+         "and r12, %[pixels], #7         \n\t"
+@@ -310,7 +311,7 @@
+     // [wr0 wr1 wr2 wr3] for previous line
+     // [wr4 wr5 wr6 wr7] for current line
+     SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
+-    __asm__ __volatile__(
++    asm volatile(
+         "pld [%[pixels]]                \n\t"
+         "pld [%[pixels], #32]           \n\t"
+         "pld [%[block]]                 \n\t"
+@@ -371,7 +372,7 @@
+     // [wr0 wr1 wr2 wr3] for previous line
+     // [wr4 wr5 wr6 wr7] for current line
+     SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
+-    __asm__ __volatile__(
++    asm volatile(
+         "pld [%[pixels]]                \n\t"
+         "pld [%[pixels], #32]           \n\t"
+         "pld [%[block]]                 \n\t"
+@@ -447,7 +448,7 @@
+     int stride = line_size;
+     // [wr0 wr1 wr2 wr3] for previous line
+     // [wr4 wr5 wr6 wr7] for current line
+-    __asm__ __volatile__(
++    asm volatile(
+         "pld            [%[pixels]]                             \n\t"
+         "pld            [%[pixels], #32]                        \n\t"
+         "and            r12, %[pixels], #7                      \n\t"
+@@ -501,7 +502,7 @@
+     int stride = line_size;
+     // [wr0 wr1 wr2 wr3] for previous line
+     // [wr4 wr5 wr6 wr7] for current line
+-    __asm__ __volatile__(
++    asm volatile(
+         "pld [%[pixels]]                \n\t"
+         "pld [%[pixels], #32]           \n\t"
+         "and r12, %[pixels], #7         \n\t"
+@@ -558,7 +559,7 @@
+     int stride = line_size;
+     // [wr0 wr1 wr2 wr3] for previous line
+     // [wr4 wr5 wr6 wr7] for current line
+-    __asm__ __volatile__(
++    asm volatile(
+         "pld [%[pixels]]                \n\t"
+         "pld [%[pixels], #32]           \n\t"
+         "and r12, %[pixels], #7         \n\t"
+@@ -626,7 +627,7 @@
+     // [wr0 wr1 wr2 wr3] for previous line
+     // [wr4 wr5 wr6 wr7] for current line
+     SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
+-    __asm__ __volatile__(
++    asm volatile(
+         "pld [%[pixels]]                \n\t"
+         "mov r12, #2                    \n\t"
+         "pld [%[pixels], #32]           \n\t"
+@@ -720,7 +721,7 @@
+     // [wr0 wr1 wr2 wr3] for previous line
+     // [wr4 wr5 wr6 wr7] for current line
+     SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
+-    __asm__ __volatile__(
++    asm volatile(
+         "pld [%[pixels]]                \n\t"
+         "mov r12, #2                    \n\t"
+         "pld [%[pixels], #32]           \n\t"
+@@ -862,7 +863,7 @@
+     // [wr0 wr1 wr2 wr3] for previous line
+     // [wr4 wr5 wr6 wr7] for current line
+     SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
+-    __asm__ __volatile__(
++    asm volatile(
+         "pld [%[block]]                 \n\t"
+         "pld [%[block], #32]            \n\t"
+         "pld [%[pixels]]                \n\t"
+@@ -966,7 +967,7 @@
+     // [wr0 wr1 wr2 wr3] for previous line
+     // [wr4 wr5 wr6 wr7] for current line
+     SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
+-    __asm__ __volatile__(
++    asm volatile(
+         "pld [%[block]]                 \n\t"
+         "pld [%[block], #32]            \n\t"
+         "pld [%[pixels]]                \n\t"
+@@ -1115,5 +1116,3 @@
+         : [line_size]"r"(line_size)
+         : "r12", "memory");
+ }
+-
+-#endif /* FFMPEG_DSPUTIL_IWMMXT_RND_H */
+diff -Nurd mythtv.orig/libs/libavcodec/armv4l/dsputil_neon.c mythtv/libs/libavcodec/armv4l/dsputil_neon.c
+--- mythtv.orig/libs/libavcodec/armv4l/dsputil_neon.c	1970-01-01 01:00:00.000000000 +0100
++++ mythtv/libs/libavcodec/armv4l/dsputil_neon.c	2008-07-24 19:54:01.023198000 +0200
+@@ -0,0 +1,397 @@
++/*
++ * ARM NEON optimised DSP functions
++ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#include <stdint.h>
++
++#include "libavcodec/avcodec.h"
++#include "libavcodec/dsputil.h"
++
++extern void ff_put_h264_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride,
++                                        int h, int x, int y);
++
++#define PUT_PIXELS_16_X2(vhadd)                                 \
++        "1:                                          \n\t"      \
++        "vld1.64   {d0,d1,d2}, [%[p]], %[line_size]  \n\t"      \
++        "vld1.64   {d4,d5,d6}, [%[p]], %[line_size]  \n\t"      \
++        "pld       [%[p]]                            \n\t"      \
++        "subs      %[h], %[h], #2                    \n\t"      \
++        "vext.8    q1, q0, q1, #1                    \n\t"      \
++        "vext.8    q3, q2, q3, #1                    \n\t"      \
++         vhadd".u8 q0, q0, q1                        \n\t"      \
++         vhadd".u8 q2, q2, q3                        \n\t"      \
++        "vst1.64   {d0,d1}, [%[b],:64], %[line_size] \n\t"      \
++        "vst1.64   {d4,d5}, [%[b],:64], %[line_size] \n\t"      \
++        "bne       1b                                \n\t"
++
++#define PUT_PIXELS_16_Y2(vhadd)                                 \
++        "add       %[p1], %[p0], %[line_size]         \n\t"     \
++        "lsl       %[l2], %[line_size], #1            \n\t"     \
++        "vld1.64   {d0,d1}, [%[p0]], %[l2]            \n\t"     \
++        "vld1.64   {d2,d3}, [%[p1]], %[l2]            \n\t"     \
++        "1:                                           \n\t"     \
++        "subs      %[h], %[h], #2                     \n\t"     \
++         vhadd".u8 q2, q0, q1                         \n\t"     \
++        "vst1.64   {d4,d5}, [%[b],:128], %[line_size] \n\t"     \
++        "vld1.64   {d0,d1}, [%[p0]],     %[l2]        \n\t"     \
++         vhadd".u8 q2, q0, q1                         \n\t"     \
++        "vst1.64   {d4,d5}, [%[b],:128], %[line_size] \n\t"     \
++        "vld1.64   {d2,d3}, [%[p1]],     %[l2]        \n\t"     \
++        "bne 1b                                       \n\t"
++
++#define PUT_PIXELS_16_XY2(vshrn, no_rnd)                        \
++        "lsl        %[l2], %[line_size], #1              \n\t"  \
++        "add        %[p1], %[p0], %[line_size]           \n\t"  \
++        "vld1.64    {d0,d1,d2}, [%[p0]], %[l2]           \n\t"  \
++        "vld1.64    {d4,d5,d6}, [%[p1]], %[l2]           \n\t"  \
++        "pld        [%[p0]]                              \n\t"  \
++        "pld        [%[p1]]                              \n\t"  \
++        "vext.8     q1,  q0, q1, #1                      \n\t"  \
++        "vext.8     q3,  q2, q3, #1                      \n\t"  \
++        "vaddl.u8   q8,  d0, d2                          \n\t"  \
++        "vaddl.u8   q10, d1, d3                          \n\t"  \
++        "vaddl.u8   q9,  d4, d6                          \n\t"  \
++        "vaddl.u8   q11, d5, d7                          \n\t"  \
++        "1:                                              \n\t"  \
++        "subs       %[h], %[h], #2                       \n\t"  \
++        "vld1.64    {d0,d1,d2}, [%[p0]], %[l2]           \n\t"  \
++        "vadd.u16   q12, q8, q9                          \n\t"  \
++        "pld        [%[p0]]                              \n\t"  \
++ no_rnd "vadd.u16   q12, q12, q13                        \n\t"  \
++        "vext.8     q15, q0, q1, #1                      \n\t"  \
++        "vadd.u16   q1, q10, q11                         \n\t"  \
++         vshrn".u16 d28, q12, #2                         \n\t"  \
++ no_rnd "vadd.u16   q1, q1, q13                          \n\t"  \
++         vshrn".u16 d29, q1, #2                          \n\t"  \
++        "vaddl.u8   q8, d0, d30                          \n\t"  \
++        "vld1.64    {d2,d3,d4}, [%[p1]], %[l2]           \n\t"  \
++        "vaddl.u8   q10, d1, d31                         \n\t"  \
++        "vst1.64    {d28,d29}, [%[b],:128], %[line_size] \n\t"  \
++        "vadd.u16   q12, q8, q9                          \n\t"  \
++        "pld        [%[p1]]                              \n\t"  \
++ no_rnd "vadd.u16   q12, q12, q13                        \n\t"  \
++        "vext.8     q2, q1, q2, #1                       \n\t"  \
++        "vadd.u16   q0, q10, q11                         \n\t"  \
++         vshrn".u16 d30, q12, #2                         \n\t"  \
++ no_rnd "vadd.u16   q0, q0, q13                          \n\t"  \
++         vshrn".u16 d31, q0, #2                          \n\t"  \
++        "vaddl.u8   q9, d2, d4                           \n\t"  \
++        "vst1.64    {d30,d31}, [%[b],:128], %[line_size] \n\t"  \
++        "vaddl.u8   q11, d3, d5                          \n\t"  \
++        "bgt     1b                                      \n\t"
++
++#define PUT_PIXELS_8_X2(vhadd)                          \
++        "1:                                       \n\t" \
++        "vld1.64   {d0,d1}, [%[p]], %[line_size]  \n\t" \
++        "vld1.64   {d2,d3}, [%[p]], %[line_size]  \n\t" \
++        "pld       [%[p]]                         \n\t" \
++        "subs      %[h], %[h], #2                 \n\t" \
++        "vext.8    d1, d0, d1, #1                 \n\t" \
++        "vext.8    d3, d2, d3, #1                 \n\t" \
++        "vswp      d1, d2                         \n\t" \
++         vhadd".u8 q0, q0, q1                     \n\t" \
++        "vst1.64   {d0}, [%[b],:64], %[line_size] \n\t" \
++        "vst1.64   {d1}, [%[b],:64], %[line_size] \n\t" \
++        "bne       1b                             \n\t"
++
++#define PUT_PIXELS_8_Y2(vhadd)                          \
++        "add       %[p1], %[p0], %[line_size]     \n\t" \
++        "lsl       %[l2], %[line_size], #1        \n\t" \
++        "vld1.64   {d0}, [%[p0]], %[l2]           \n\t" \
++        "vld1.64   {d1}, [%[p1]], %[l2]           \n\t" \
++        "1:                                       \n\t" \
++        "subs      %[h], %[h], #2                 \n\t" \
++         vhadd".u8 d4, d0, d1                     \n\t" \
++        "vst1.64   {d4}, [%[b],:64], %[line_size] \n\t" \
++        "vld1.64   {d0}, [%[p0]],    %[l2]        \n\t" \
++         vhadd".u8 d4, d0, d1                     \n\t" \
++        "vst1.64   {d4}, [%[b],:64], %[line_size] \n\t" \
++        "vld1.64   {d1}, [%[p1]],     %[l2]       \n\t" \
++        "bne 1b                                   \n\t"
++
++#define PUT_PIXELS8_XY2(vshrn, no_rnd)                          \
++        "lsl        %[l2],   %[line_size], #1       \n\t"       \
++        "add        %[p1],   %[p0], %[line_size]    \n\t"       \
++        "vld1.64    {d0,d1}, [%[p0]], %[l2]         \n\t"       \
++        "vld1.64    {d2,d3}, [%[p1]], %[l2]         \n\t"       \
++        "pld        [%[p0]]                         \n\t"       \
++        "pld        [%[p1]]                         \n\t"       \
++        "vext.8     d4, d0, d1, #1                  \n\t"       \
++        "vext.8     d6, d2, d3, #1                  \n\t"       \
++        "vaddl.u8   q8, d0, d4                      \n\t"       \
++        "vaddl.u8   q9, d2, d6                      \n\t"       \
++        "1:                                         \n\t"       \
++        "subs       %[h], %[h], #2                  \n\t"       \
++        "vld1.64    {d0,d1}, [%[p0]], %[l2]         \n\t"       \
++        "pld        [%[p0]]                         \n\t"       \
++        "vadd.u16   q10, q8, q9                     \n\t"       \
++        "vext.8     d4, d0, d1, #1                  \n\t"       \
++ no_rnd "vadd.u16   q10, q10, q11                   \n\t"       \
++        "vaddl.u8   q8, d0, d4                      \n\t"       \
++         vshrn".u16 d5, q10, #2                     \n\t"       \
++        "vld1.64    {d2,d3}, [%[p1]], %[l2]         \n\t"       \
++        "vadd.u16   q10, q8, q9                     \n\t"       \
++        "pld        [%[p1]]                         \n\t"       \
++ no_rnd "vadd.u16   q10, q10, q11                   \n\t"       \
++        "vst1.64    {d5}, [%[b],:64], %[line_size]  \n\t"       \
++         vshrn".u16 d7, q10, #2                     \n\t"       \
++        "vext.8     d6, d2, d3, #1                  \n\t"       \
++        "vaddl.u8   q9, d2, d6                      \n\t"       \
++        "vst1.64    {d7}, [%[b],:64], %[line_size]  \n\t"       \
++        "bgt     1b                                 \n\t"
++
++static void put_pixels16_neon(uint8_t *block, const uint8_t *pixels,
++                              int line_size, int h)
++{
++    asm volatile(
++        "1:                                         \n\t"
++        "vld1.64 {d0,d1}, [%[pixels]], %[line_size] \n\t"
++        "vld1.64 {d2,d3}, [%[pixels]], %[line_size] \n\t"
++        "vld1.64 {d4,d5}, [%[pixels]], %[line_size] \n\t"
++        "vld1.64 {d6,d7}, [%[pixels]], %[line_size] \n\t"
++        "pld     [%[pixels]]                        \n\t"
++        "subs    %[h], %[h], #4                     \n\t"
++        "vst1.64 {d0,d1}, [%[block],:128], %[line_size]  \n\t"
++        "vst1.64 {d2,d3}, [%[block],:128], %[line_size]  \n\t"
++        "vst1.64 {d4,d5}, [%[block],:128], %[line_size]  \n\t"
++        "vst1.64 {d6,d7}, [%[block],:128], %[line_size]  \n\t"
++        "bne     1b                                 \n\t"
++        : [block]"+r"(block), [pixels]"+r"(pixels), [h]"+r"(h)
++        : [line_size]"r"(line_size)
++        : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "memory");
++}
++
++static void put_pixels16_x2_neon(uint8_t *block, const uint8_t *pixels,
++                                 int line_size, int h)
++{
++    asm volatile(
++        PUT_PIXELS_16_X2("vrhadd")
++        : [b]"+r"(block), [p]"+r"(pixels), [h]"+r"(h)
++        : [line_size]"r"(line_size)
++        : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "memory");
++}
++
++static void put_pixels16_y2_neon(uint8_t *block, const uint8_t *pixels,
++                                 int line_size, int h)
++{
++    const uint8_t *p1;
++    int l2;
++
++    asm volatile(
++        PUT_PIXELS_16_Y2("vrhadd")
++        : [b]"+r"(block), [p0]"+r"(pixels), [p1]"=&r"(p1), [h]"+r"(h),
++          [l2]"=&r"(l2)
++        : [line_size]"r"(line_size)
++        : "d0", "d1", "d2", "d3", "d4", "d5", "memory");
++}
++
++static void put_pixels16_xy2_neon(uint8_t *block, const uint8_t *pixels,
++                                  int line_size, int h)
++{
++    const uint8_t *p1;
++    int l2;
++
++    asm volatile(
++        PUT_PIXELS_16_XY2("vrshrn", "@")
++        : [b]"+r"(block),
++          [p0]"+r"(pixels),
++          [p1]"=&r"(p1), [h]"+r"(h),
++          [l2]"=&r"(l2)
++        : [line_size]"r"(line_size)
++        : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
++          "d28", "d29", "d30", "d31",
++          "q8", "q9", "q10", "q11", "q12", "memory");
++}
++
++static void put_pixels8_neon(uint8_t *block, const uint8_t *pixels,
++                             int line_size, int h)
++{
++    asm volatile(
++        "1:                                 \n\t"
++        "vld1.64 {d0}, [%[p]], %[line_size] \n\t"
++        "vld1.64 {d1}, [%[p]], %[line_size] \n\t"
++        "vld1.64 {d2}, [%[p]], %[line_size] \n\t"
++        "vld1.64 {d3}, [%[p]], %[line_size] \n\t"
++        "subs    %[h], %[h], #4             \n\t"
++        "vst1.64 {d0}, [%[b],:64], %[line_size] \n\t"
++        "vst1.64 {d1}, [%[b],:64], %[line_size] \n\t"
++        "vst1.64 {d2}, [%[b],:64], %[line_size] \n\t"
++        "vst1.64 {d3}, [%[b],:64], %[line_size] \n\t"
++        "bne     1b                         \n\t"
++        : [b]"+r"(block), [p]"+r"(pixels), [h]"+r"(h)
++        : [line_size]"r"(line_size)
++        : "d0", "d1", "d2", "d3", "memory");
++}
++
++static void put_pixels8_x2_neon(uint8_t *block, const uint8_t *pixels,
++                                int line_size, int h)
++{
++    asm volatile(
++        PUT_PIXELS_8_X2("vrhadd")
++        : [b]"+r"(block), [p]"+r"(pixels), [h]"+r"(h)
++        : [line_size]"r"(line_size)
++        : "d0", "d1", "d2", "d3", "memory");
++}
++
++static void put_pixels8_y2_neon(uint8_t *block, const uint8_t *pixels,
++                                int line_size, int h)
++{
++    const uint8_t *p1;
++    int l2;
++
++    asm volatile(
++        PUT_PIXELS_8_Y2("vrhadd")
++        : [b]"+r"(block), [p0]"+r"(pixels), [p1]"=&r"(p1), [h]"+r"(h),
++          [l2]"=&r"(l2)
++        : [line_size]"r"(line_size)
++        : "d0", "d1", "d4", "memory");
++}
++
++static void put_pixels8_xy2_neon(uint8_t *block, const uint8_t *pixels,
++                                 int line_size, int h)
++{
++    const uint8_t *p1;
++    int l2;
++
++    asm volatile(
++        PUT_PIXELS8_XY2("vrshrn", "@")
++        : [b]"+r"(block),
++          [p0]"+r"(pixels),
++          [p1]"=&r"(p1), [h]"+r"(h),
++          [l2]"=&r"(l2)
++        : [line_size]"r"(line_size)
++        : "d0", "d1", "d2", "d3", "d4", "d6", "d7",
++          "q8", "q9", "q10", "memory");
++}
++
++static void put_no_rnd_pixels16_x2_neon(uint8_t *block, const uint8_t *pixels,
++                                        int line_size, int h)
++{
++    asm volatile(
++        PUT_PIXELS_16_X2("vhadd")
++        : [b]"+r"(block), [p]"+r"(pixels), [h]"+r"(h)
++        : [line_size]"r"(line_size)
++        : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "memory");
++}
++
++static void put_no_rnd_pixels16_y2_neon(uint8_t *block, const uint8_t *pixels,
++                                        int line_size, int h)
++{
++    const uint8_t *p1;
++    int l2;
++
++    asm volatile(
++        PUT_PIXELS_16_Y2("vhadd")
++        : [b]"+r"(block), [p0]"+r"(pixels), [p1]"=&r"(p1), [h]"+r"(h),
++          [l2]"=&r"(l2)
++        : [line_size]"r"(line_size)
++        : "d0", "d1", "d2", "d3", "d4", "d5", "memory");
++}
++
++static void put_no_rnd_pixels16_xy2_neon(uint8_t *block, const uint8_t *pixels,
++                                         int line_size, int h)
++{
++    const uint8_t *p1;
++    int l2;
++
++    asm volatile(
++        "vmov.i16   q13, #1                         \n\t"
++        PUT_PIXELS_16_XY2("vshrn", "")
++        : [b]"+r"(block),
++          [p0]"+r"(pixels),
++          [p1]"=&r"(p1), [h]"+r"(h),
++          [l2]"=&r"(l2)
++        : [line_size]"r"(line_size)
++        : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
++          "d28", "d29", "d30", "d31",
++          "q8", "q9", "q10", "q11", "q12", "q13", "memory");
++}
++
++static void put_no_rnd_pixels8_x2_neon(uint8_t *block, const uint8_t *pixels,
++                                       int line_size, int h)
++{
++    asm volatile(
++        PUT_PIXELS_8_X2("vhadd")
++        : [b]"+r"(block), [p]"+r"(pixels), [h]"+r"(h)
++        : [line_size]"r"(line_size)
++        : "d0", "d1", "d2", "d3", "memory");
++}
++
++static void put_no_rnd_pixels8_y2_neon(uint8_t *block, const uint8_t *pixels,
++                                       int line_size, int h)
++{
++    const uint8_t *p1;
++    int l2;
++
++    asm volatile(
++        PUT_PIXELS_8_Y2("vhadd")
++        : [b]"+r"(block), [p0]"+r"(pixels), [p1]"=&r"(p1), [h]"+r"(h),
++          [l2]"=&r"(l2)
++        : [line_size]"r"(line_size)
++        : "d0", "d1", "d4", "memory");
++}
++
author	Koen Kooi <koen@openembedded.org>	2008-07-24 18:19:14 +0000
committer	Koen Kooi <koen@openembedded.org>	2008-07-24 18:19:14 +0000
commit	45f4f7874cb1ff999638eab0561ba310bebfd11d (patch)
tree	5e5fecf7041e6193b3bdbdc720441d905cff305a
parent	cb8ecbc2191f9a29bf4c669bd7161b24b44513ba (diff)