diff options
| author | Koen Kooi <koen@openembedded.org> | 2008-07-24 18:19:14 +0000 |
|---|---|---|
| committer | Koen Kooi <koen@openembedded.org> | 2008-07-24 18:19:14 +0000 |
| commit | 45f4f7874cb1ff999638eab0561ba310bebfd11d (patch) | |
| tree | 5e5fecf7041e6193b3bdbdc720441d905cff305a /packages | |
| parent | cb8ecbc2191f9a29bf4c669bd7161b24b44513ba (diff) | |
mythtv 0.21: enable appropriate ARM optimization dependant on cpu and patch in NEON support for video
Diffstat (limited to 'packages')
| -rw-r--r-- | packages/mythtv/files/armv5te/.mtn2git_empty | 0 | ||||
| -rw-r--r-- | packages/mythtv/files/armv5te/configh | 6 | ||||
| -rw-r--r-- | packages/mythtv/files/armv5te/configmak | 3 | ||||
| -rw-r--r-- | packages/mythtv/files/armv6/.mtn2git_empty | 0 | ||||
| -rw-r--r-- | packages/mythtv/files/armv6/configh | 8 | ||||
| -rw-r--r-- | packages/mythtv/files/armv6/configmak | 3 | ||||
| -rw-r--r-- | packages/mythtv/files/armv7a/.mtn2git_empty | 0 | ||||
| -rw-r--r-- | packages/mythtv/files/armv7a/configh | 14 | ||||
| -rw-r--r-- | packages/mythtv/files/armv7a/configmak | 6 | ||||
| -rw-r--r-- | packages/mythtv/files/configh | 2 | ||||
| -rw-r--r-- | packages/mythtv/files/configmak | 0 | ||||
| -rw-r--r-- | packages/mythtv/mythtv-0.21/ffmpeg-arm-update.diff | 1669 | ||||
| -rw-r--r-- | packages/mythtv/mythtv_0.21.bb | 15 |
13 files changed, 1721 insertions, 5 deletions
diff --git a/packages/mythtv/files/armv5te/.mtn2git_empty b/packages/mythtv/files/armv5te/.mtn2git_empty new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/packages/mythtv/files/armv5te/.mtn2git_empty diff --git a/packages/mythtv/files/armv5te/configh b/packages/mythtv/files/armv5te/configh new file mode 100644 index 0000000000..46c647e2d5 --- /dev/null +++ b/packages/mythtv/files/armv5te/configh @@ -0,0 +1,6 @@ +#define HAVE_LLRINT 1 +#define HAVE_ROUNDF 1 +#define ARCH_ARMV4L 1 +#define ENABLE_ARMV4L 1 +#define HAVE_ARMV5TE 1 +#define ENABLE_ARMV5TE 1 diff --git a/packages/mythtv/files/armv5te/configmak b/packages/mythtv/files/armv5te/configmak new file mode 100644 index 0000000000..aa9978515d --- /dev/null +++ b/packages/mythtv/files/armv5te/configmak @@ -0,0 +1,3 @@ +ARCH_ARMV4L=yes +HAVE_ARMV5TE=yes + diff --git a/packages/mythtv/files/armv6/.mtn2git_empty b/packages/mythtv/files/armv6/.mtn2git_empty new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/packages/mythtv/files/armv6/.mtn2git_empty diff --git a/packages/mythtv/files/armv6/configh b/packages/mythtv/files/armv6/configh new file mode 100644 index 0000000000..2301e723d6 --- /dev/null +++ b/packages/mythtv/files/armv6/configh @@ -0,0 +1,8 @@ +#define HAVE_LLRINT 1 +#define HAVE_ROUNDF 1 +#define ARCH_ARMV4L 1 +#define ENABLE_ARMV4L 1 +#define HAVE_ARMV5TE 1 +#define ENABLE_ARMV5TE 1 +#define HAVE_ARMV6 1 +#define ENABLE_ARMV6 1 diff --git a/packages/mythtv/files/armv6/configmak b/packages/mythtv/files/armv6/configmak new file mode 100644 index 0000000000..4db5dc0dfd --- /dev/null +++ b/packages/mythtv/files/armv6/configmak @@ -0,0 +1,3 @@ +ARCH_ARMV4L=yes +HAVE_ARMV5TE=yes +HAVE_ARMV6=yes diff --git a/packages/mythtv/files/armv7a/.mtn2git_empty b/packages/mythtv/files/armv7a/.mtn2git_empty new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/packages/mythtv/files/armv7a/.mtn2git_empty diff --git a/packages/mythtv/files/armv7a/configh b/packages/mythtv/files/armv7a/configh new file mode 100644 index 0000000000..245e40f56a --- /dev/null +++ b/packages/mythtv/files/armv7a/configh @@ -0,0 +1,14 @@ +#define HAVE_LLRINT 1 +#define HAVE_ROUNDF 1 +#define ARCH_ARMV4L 1 +#define ENABLE_ARMV4L 1 +#define HAVE_ARMV5TE 1 +#define ENABLE_ARMV5TE 1 +#define HAVE_ARMV6 1 +#define ENABLE_ARMV6 1 +#define HAVE_ARMV6T2 1 +#define ENABLE_ARMV6T2 1 +#define HAVE_ARMVFP 1 +#define ENABLE_ARMVFP 1 +#define HAVE_NEON 1 +#define ENABLE_NEON 1 diff --git a/packages/mythtv/files/armv7a/configmak b/packages/mythtv/files/armv7a/configmak new file mode 100644 index 0000000000..50d549f794 --- /dev/null +++ b/packages/mythtv/files/armv7a/configmak @@ -0,0 +1,6 @@ +ARCH_ARMV4L=yes +HAVE_ARMV5TE=yes +HAVE_ARMV6=yes +HAVE_ARMV6T2=yes +HAVE_ARMVFP=yes +HAVE_NEON=yes diff --git a/packages/mythtv/files/configh b/packages/mythtv/files/configh new file mode 100644 index 0000000000..2fe7658383 --- /dev/null +++ b/packages/mythtv/files/configh @@ -0,0 +1,2 @@ +#define HAVE_LLRINT 1 +#define HAVE_ROUNDF 1 diff --git a/packages/mythtv/files/configmak b/packages/mythtv/files/configmak new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/packages/mythtv/files/configmak diff --git a/packages/mythtv/mythtv-0.21/ffmpeg-arm-update.diff b/packages/mythtv/mythtv-0.21/ffmpeg-arm-update.diff new file mode 100644 index 0000000000..5abf52fcbb --- /dev/null +++ b/packages/mythtv/mythtv-0.21/ffmpeg-arm-update.diff @@ -0,0 +1,1669 @@ +diff -Nurd mythtv.orig/libs/libavcodec/armv4l/dsputil_arm.c mythtv/libs/libavcodec/armv4l/dsputil_arm.c +--- mythtv.orig/libs/libavcodec/armv4l/dsputil_arm.c 2008-07-23 12:19:05.000000000 +0200 ++++ mythtv/libs/libavcodec/armv4l/dsputil_arm.c 2008-07-24 19:54:00.753198000 +0200 +@@ -19,12 +19,14 @@ + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +-#include "dsputil.h" ++#include "libavcodec/dsputil.h" + #ifdef HAVE_IPP +-#include "ipp.h" ++#include <ipp.h> + #endif + + extern void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx); ++extern void ff_float_init_arm_vfp(DSPContext* c, AVCodecContext *avctx); ++extern void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx); + + extern void j_rev_dct_ARM(DCTELEM *data); + extern void simple_idct_ARM(DCTELEM *data); +@@ -41,6 +43,12 @@ + extern void ff_simple_idct_add_armv6(uint8_t *dest, int line_size, + DCTELEM *data); + ++extern void ff_simple_idct_neon(DCTELEM *data); ++extern void ff_simple_idct_put_neon(uint8_t *dest, int line_size, ++ DCTELEM *data); ++extern void ff_simple_idct_add_neon(uint8_t *dest, int line_size, ++ DCTELEM *data); ++ + /* XXX: local hack */ + static void (*ff_put_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size); + static void (*ff_add_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size); +@@ -202,6 +210,24 @@ + } + #endif + ++#ifdef HAVE_ARMV5TE ++static void prefetch_arm(void *mem, int stride, int h) ++{ ++ asm volatile( ++ "1: \n\t" ++ "subs %0, %0, #1 \n\t" ++ "pld [%1] \n\t" ++ "add %1, %1, %2 \n\t" ++ "bgt 1b \n\t" ++ : "+r"(h), "+r"(mem) : "r"(stride)); ++} ++#endif ++ ++int mm_support(void) ++{ ++ return ENABLE_IWMMXT * MM_IWMMXT; ++} ++ + void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx) + { + int idct_algo= avctx->idct_algo; +@@ -209,49 +235,60 @@ + ff_put_pixels_clamped = c->put_pixels_clamped; + ff_add_pixels_clamped = c->add_pixels_clamped; + +- if(idct_algo == FF_IDCT_AUTO){ ++ if (avctx->lowres == 0) { ++ if(idct_algo == FF_IDCT_AUTO){ + #if defined(HAVE_IPP) +- idct_algo = FF_IDCT_IPP; ++ idct_algo = FF_IDCT_IPP; ++#elif defined(HAVE_NEON) ++ idct_algo = FF_IDCT_SIMPLENEON; + #elif defined(HAVE_ARMV6) +- idct_algo = FF_IDCT_SIMPLEARMV6; ++ idct_algo = FF_IDCT_SIMPLEARMV6; + #elif defined(HAVE_ARMV5TE) +- idct_algo = FF_IDCT_SIMPLEARMV5TE; ++ idct_algo = FF_IDCT_SIMPLEARMV5TE; + #else +- idct_algo = FF_IDCT_ARM; ++ idct_algo = FF_IDCT_ARM; + #endif +- } ++ } + +- if(idct_algo==FF_IDCT_ARM){ +- c->idct_put= j_rev_dct_ARM_put; +- c->idct_add= j_rev_dct_ARM_add; +- c->idct = j_rev_dct_ARM; +- c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;/* FF_NO_IDCT_PERM */ +- } else if (idct_algo==FF_IDCT_SIMPLEARM){ +- c->idct_put= simple_idct_ARM_put; +- c->idct_add= simple_idct_ARM_add; +- c->idct = simple_idct_ARM; +- c->idct_permutation_type= FF_NO_IDCT_PERM; ++ if(idct_algo==FF_IDCT_ARM){ ++ c->idct_put= j_rev_dct_ARM_put; ++ c->idct_add= j_rev_dct_ARM_add; ++ c->idct = j_rev_dct_ARM; ++ c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;/* FF_NO_IDCT_PERM */ ++ } else if (idct_algo==FF_IDCT_SIMPLEARM){ ++ c->idct_put= simple_idct_ARM_put; ++ c->idct_add= simple_idct_ARM_add; ++ c->idct = simple_idct_ARM; ++ c->idct_permutation_type= FF_NO_IDCT_PERM; + #ifdef HAVE_ARMV6 +- } else if (idct_algo==FF_IDCT_SIMPLEARMV6){ +- c->idct_put= ff_simple_idct_put_armv6; +- c->idct_add= ff_simple_idct_add_armv6; +- c->idct = ff_simple_idct_armv6; +- c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; ++ } else if (idct_algo==FF_IDCT_SIMPLEARMV6){ ++ c->idct_put= ff_simple_idct_put_armv6; ++ c->idct_add= ff_simple_idct_add_armv6; ++ c->idct = ff_simple_idct_armv6; ++ c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; + #endif + #ifdef HAVE_ARMV5TE +- } else if (idct_algo==FF_IDCT_SIMPLEARMV5TE){ +- c->idct_put= simple_idct_put_armv5te; +- c->idct_add= simple_idct_add_armv5te; +- c->idct = simple_idct_armv5te; +- c->idct_permutation_type = FF_NO_IDCT_PERM; ++ } else if (idct_algo==FF_IDCT_SIMPLEARMV5TE){ ++ c->idct_put= simple_idct_put_armv5te; ++ c->idct_add= simple_idct_add_armv5te; ++ c->idct = simple_idct_armv5te; ++ c->idct_permutation_type = FF_NO_IDCT_PERM; + #endif + #ifdef HAVE_IPP +- } else if (idct_algo==FF_IDCT_IPP){ +- c->idct_put= simple_idct_ipp_put; +- c->idct_add= simple_idct_ipp_add; +- c->idct = simple_idct_ipp; +- c->idct_permutation_type= FF_NO_IDCT_PERM; ++ } else if (idct_algo==FF_IDCT_IPP){ ++ c->idct_put= simple_idct_ipp_put; ++ c->idct_add= simple_idct_ipp_add; ++ c->idct = simple_idct_ipp; ++ c->idct_permutation_type= FF_NO_IDCT_PERM; ++#endif ++#ifdef HAVE_NEON ++ } else if (idct_algo==FF_IDCT_SIMPLENEON){ ++ c->idct_put= ff_simple_idct_put_neon; ++ c->idct_add= ff_simple_idct_add_neon; ++ c->idct = ff_simple_idct_neon; ++ c->idct_permutation_type = FF_NO_IDCT_PERM; + #endif ++ } + } + + c->put_pixels_tab[0][0] = put_pixels16_arm; +@@ -271,7 +308,17 @@ + c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_arm; //OK + c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_arm; + ++#ifdef HAVE_ARMV5TE ++ c->prefetch = prefetch_arm; ++#endif ++ + #ifdef HAVE_IWMMXT + dsputil_init_iwmmxt(c, avctx); + #endif ++#ifdef HAVE_ARMVFP ++ ff_float_init_arm_vfp(c, avctx); ++#endif ++#ifdef HAVE_NEON ++ ff_dsputil_init_neon(c, avctx); ++#endif + } +diff -Nurd mythtv.orig/libs/libavcodec/armv4l/dsputil_arm_s.S mythtv/libs/libavcodec/armv4l/dsputil_arm_s.S +--- mythtv.orig/libs/libavcodec/armv4l/dsputil_arm_s.S 2008-07-23 12:19:05.000000000 +0200 ++++ mythtv/libs/libavcodec/armv4l/dsputil_arm_s.S 2008-07-24 19:54:00.753198000 +0200 +@@ -19,6 +19,13 @@ + @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + @ + ++#include "config.h" ++ ++#ifndef HAVE_PLD ++.macro pld reg ++.endm ++#endif ++ + .macro ADJ_ALIGN_QUADWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4 + mov \Rd0, \Rn0, lsr #(\shift * 8) + mov \Rd1, \Rn1, lsr #(\shift * 8) +diff -Nurd mythtv.orig/libs/libavcodec/armv4l/dsputil_iwmmxt.c mythtv/libs/libavcodec/armv4l/dsputil_iwmmxt.c +--- mythtv.orig/libs/libavcodec/armv4l/dsputil_iwmmxt.c 2008-07-23 12:19:05.000000000 +0200 ++++ mythtv/libs/libavcodec/armv4l/dsputil_iwmmxt.c 2008-07-24 19:54:00.753198000 +0200 +@@ -19,10 +19,10 @@ + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +-#include "dsputil.h" ++#include "libavcodec/dsputil.h" + + #define DEF(x, y) x ## _no_rnd_ ## y ##_iwmmxt +-#define SET_RND(regd) __asm__ __volatile__ ("mov r12, #1 \n\t tbcsth " #regd ", r12":::"r12"); ++#define SET_RND(regd) asm volatile ("mov r12, #1 \n\t tbcsth " #regd ", r12":::"r12"); + #define WAVG2B "wavg2b" + #include "dsputil_iwmmxt_rnd.h" + #undef DEF +@@ -30,7 +30,7 @@ + #undef WAVG2B + + #define DEF(x, y) x ## _ ## y ##_iwmmxt +-#define SET_RND(regd) __asm__ __volatile__ ("mov r12, #2 \n\t tbcsth " #regd ", r12":::"r12"); ++#define SET_RND(regd) asm volatile ("mov r12, #2 \n\t tbcsth " #regd ", r12":::"r12"); + #define WAVG2B "wavg2br" + #include "dsputil_iwmmxt_rnd.h" + #undef DEF +@@ -89,7 +89,7 @@ + { + uint8_t *pixels2 = pixels + line_size; + +- __asm__ __volatile__ ( ++ asm volatile ( + "mov r12, #4 \n\t" + "1: \n\t" + "pld [%[pixels], %[line_size2]] \n\t" +@@ -125,7 +125,7 @@ + + static void clear_blocks_iwmmxt(DCTELEM *blocks) + { +- __asm __volatile( ++ asm volatile( + "wzero wr0 \n\t" + "mov r1, #(128 * 6 / 32) \n\t" + "1: \n\t" +diff -Nurd mythtv.orig/libs/libavcodec/armv4l/dsputil_iwmmxt_rnd.h mythtv/libs/libavcodec/armv4l/dsputil_iwmmxt_rnd.h +--- mythtv.orig/libs/libavcodec/armv4l/dsputil_iwmmxt_rnd.h 2008-07-23 12:19:05.000000000 +0200 ++++ mythtv/libs/libavcodec/armv4l/dsputil_iwmmxt_rnd.h 2008-07-24 19:54:01.023198000 +0200 +@@ -19,13 +19,14 @@ + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +-#ifndef FFMPEG_DSPUTIL_IWMMXT_RND_H +-#define FFMPEG_DSPUTIL_IWMMXT_RND_H ++/* This header intentionally has no multiple inclusion guards. It is meant to ++ * be included multiple times and generates different code depending on the ++ * value of certain #defines. */ + + void DEF(put, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) + { + int stride = line_size; +- __asm__ __volatile__ ( ++ asm volatile ( + "and r12, %[pixels], #7 \n\t" + "bic %[pixels], %[pixels], #7 \n\t" + "tmcr wcgr1, r12 \n\t" +@@ -59,7 +60,7 @@ + void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) + { + int stride = line_size; +- __asm__ __volatile__ ( ++ asm volatile ( + "and r12, %[pixels], #7 \n\t" + "bic %[pixels], %[pixels], #7 \n\t" + "tmcr wcgr1, r12 \n\t" +@@ -101,7 +102,7 @@ + void DEF(put, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) + { + int stride = line_size; +- __asm__ __volatile__ ( ++ asm volatile ( + "and r12, %[pixels], #7 \n\t" + "bic %[pixels], %[pixels], #7 \n\t" + "tmcr wcgr1, r12 \n\t" +@@ -141,7 +142,7 @@ + void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) + { + int stride = line_size; +- __asm__ __volatile__ ( ++ asm volatile ( + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "pld [%[block]] \n\t" +@@ -200,7 +201,7 @@ + // [wr0 wr1 wr2 wr3] for previous line + // [wr4 wr5 wr6 wr7] for current line + SET_RND(wr15); // =2 for rnd and =1 for no_rnd version +- __asm__ __volatile__( ++ asm volatile( + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "and r12, %[pixels], #7 \n\t" +@@ -249,7 +250,7 @@ + // [wr0 wr1 wr2 wr3] for previous line + // [wr4 wr5 wr6 wr7] for current line + SET_RND(wr15); // =2 for rnd and =1 for no_rnd version +- __asm__ __volatile__( ++ asm volatile( + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "and r12, %[pixels], #7 \n\t" +@@ -310,7 +311,7 @@ + // [wr0 wr1 wr2 wr3] for previous line + // [wr4 wr5 wr6 wr7] for current line + SET_RND(wr15); // =2 for rnd and =1 for no_rnd version +- __asm__ __volatile__( ++ asm volatile( + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "pld [%[block]] \n\t" +@@ -371,7 +372,7 @@ + // [wr0 wr1 wr2 wr3] for previous line + // [wr4 wr5 wr6 wr7] for current line + SET_RND(wr15); // =2 for rnd and =1 for no_rnd version +- __asm__ __volatile__( ++ asm volatile( + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "pld [%[block]] \n\t" +@@ -447,7 +448,7 @@ + int stride = line_size; + // [wr0 wr1 wr2 wr3] for previous line + // [wr4 wr5 wr6 wr7] for current line +- __asm__ __volatile__( ++ asm volatile( + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "and r12, %[pixels], #7 \n\t" +@@ -501,7 +502,7 @@ + int stride = line_size; + // [wr0 wr1 wr2 wr3] for previous line + // [wr4 wr5 wr6 wr7] for current line +- __asm__ __volatile__( ++ asm volatile( + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "and r12, %[pixels], #7 \n\t" +@@ -558,7 +559,7 @@ + int stride = line_size; + // [wr0 wr1 wr2 wr3] for previous line + // [wr4 wr5 wr6 wr7] for current line +- __asm__ __volatile__( ++ asm volatile( + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "and r12, %[pixels], #7 \n\t" +@@ -626,7 +627,7 @@ + // [wr0 wr1 wr2 wr3] for previous line + // [wr4 wr5 wr6 wr7] for current line + SET_RND(wr15); // =2 for rnd and =1 for no_rnd version +- __asm__ __volatile__( ++ asm volatile( + "pld [%[pixels]] \n\t" + "mov r12, #2 \n\t" + "pld [%[pixels], #32] \n\t" +@@ -720,7 +721,7 @@ + // [wr0 wr1 wr2 wr3] for previous line + // [wr4 wr5 wr6 wr7] for current line + SET_RND(wr15); // =2 for rnd and =1 for no_rnd version +- __asm__ __volatile__( ++ asm volatile( + "pld [%[pixels]] \n\t" + "mov r12, #2 \n\t" + "pld [%[pixels], #32] \n\t" +@@ -862,7 +863,7 @@ + // [wr0 wr1 wr2 wr3] for previous line + // [wr4 wr5 wr6 wr7] for current line + SET_RND(wr15); // =2 for rnd and =1 for no_rnd version +- __asm__ __volatile__( ++ asm volatile( + "pld [%[block]] \n\t" + "pld [%[block], #32] \n\t" + "pld [%[pixels]] \n\t" +@@ -966,7 +967,7 @@ + // [wr0 wr1 wr2 wr3] for previous line + // [wr4 wr5 wr6 wr7] for current line + SET_RND(wr15); // =2 for rnd and =1 for no_rnd version +- __asm__ __volatile__( ++ asm volatile( + "pld [%[block]] \n\t" + "pld [%[block], #32] \n\t" + "pld [%[pixels]] \n\t" +@@ -1115,5 +1116,3 @@ + : [line_size]"r"(line_size) + : "r12", "memory"); + } +- +-#endif /* FFMPEG_DSPUTIL_IWMMXT_RND_H */ +diff -Nurd mythtv.orig/libs/libavcodec/armv4l/dsputil_neon.c mythtv/libs/libavcodec/armv4l/dsputil_neon.c +--- mythtv.orig/libs/libavcodec/armv4l/dsputil_neon.c 1970-01-01 01:00:00.000000000 +0100 ++++ mythtv/libs/libavcodec/armv4l/dsputil_neon.c 2008-07-24 19:54:01.023198000 +0200 +@@ -0,0 +1,397 @@ ++/* ++ * ARM NEON optimised DSP functions ++ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include <stdint.h> ++ ++#include "libavcodec/avcodec.h" ++#include "libavcodec/dsputil.h" ++ ++extern void ff_put_h264_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride, ++ int h, int x, int y); ++ ++#define PUT_PIXELS_16_X2(vhadd) \ ++ "1: \n\t" \ ++ "vld1.64 {d0,d1,d2}, [%[p]], %[line_size] \n\t" \ ++ "vld1.64 {d4,d5,d6}, [%[p]], %[line_size] \n\t" \ ++ "pld [%[p]] \n\t" \ ++ "subs %[h], %[h], #2 \n\t" \ ++ "vext.8 q1, q0, q1, #1 \n\t" \ ++ "vext.8 q3, q2, q3, #1 \n\t" \ ++ vhadd".u8 q0, q0, q1 \n\t" \ ++ vhadd".u8 q2, q2, q3 \n\t" \ ++ "vst1.64 {d0,d1}, [%[b],:64], %[line_size] \n\t" \ ++ "vst1.64 {d4,d5}, [%[b],:64], %[line_size] \n\t" \ ++ "bne 1b \n\t" ++ ++#define PUT_PIXELS_16_Y2(vhadd) \ ++ "add %[p1], %[p0], %[line_size] \n\t" \ ++ "lsl %[l2], %[line_size], #1 \n\t" \ ++ "vld1.64 {d0,d1}, [%[p0]], %[l2] \n\t" \ ++ "vld1.64 {d2,d3}, [%[p1]], %[l2] \n\t" \ ++ "1: \n\t" \ ++ "subs %[h], %[h], #2 \n\t" \ ++ vhadd".u8 q2, q0, q1 \n\t" \ ++ "vst1.64 {d4,d5}, [%[b],:128], %[line_size] \n\t" \ ++ "vld1.64 {d0,d1}, [%[p0]], %[l2] \n\t" \ ++ vhadd".u8 q2, q0, q1 \n\t" \ ++ "vst1.64 {d4,d5}, [%[b],:128], %[line_size] \n\t" \ ++ "vld1.64 {d2,d3}, [%[p1]], %[l2] \n\t" \ ++ "bne 1b \n\t" ++ ++#define PUT_PIXELS_16_XY2(vshrn, no_rnd) \ ++ "lsl %[l2], %[line_size], #1 \n\t" \ ++ "add %[p1], %[p0], %[line_size] \n\t" \ ++ "vld1.64 {d0,d1,d2}, [%[p0]], %[l2] \n\t" \ ++ "vld1.64 {d4,d5,d6}, [%[p1]], %[l2] \n\t" \ ++ "pld [%[p0]] \n\t" \ ++ "pld [%[p1]] \n\t" \ ++ "vext.8 q1, q0, q1, #1 \n\t" \ ++ "vext.8 q3, q2, q3, #1 \n\t" \ ++ "vaddl.u8 q8, d0, d2 \n\t" \ ++ "vaddl.u8 q10, d1, d3 \n\t" \ ++ "vaddl.u8 q9, d4, d6 \n\t" \ ++ "vaddl.u8 q11, d5, d7 \n\t" \ ++ "1: \n\t" \ ++ "subs %[h], %[h], #2 \n\t" \ ++ "vld1.64 {d0,d1,d2}, [%[p0]], %[l2] \n\t" \ ++ "vadd.u16 q12, q8, q9 \n\t" \ ++ "pld [%[p0]] \n\t" \ ++ no_rnd "vadd.u16 q12, q12, q13 \n\t" \ ++ "vext.8 q15, q0, q1, #1 \n\t" \ ++ "vadd.u16 q1, q10, q11 \n\t" \ ++ vshrn".u16 d28, q12, #2 \n\t" \ ++ no_rnd "vadd.u16 q1, q1, q13 \n\t" \ ++ vshrn".u16 d29, q1, #2 \n\t" \ ++ "vaddl.u8 q8, d0, d30 \n\t" \ ++ "vld1.64 {d2,d3,d4}, [%[p1]], %[l2] \n\t" \ ++ "vaddl.u8 q10, d1, d31 \n\t" \ ++ "vst1.64 {d28,d29}, [%[b],:128], %[line_size] \n\t" \ ++ "vadd.u16 q12, q8, q9 \n\t" \ ++ "pld [%[p1]] \n\t" \ ++ no_rnd "vadd.u16 q12, q12, q13 \n\t" \ ++ "vext.8 q2, q1, q2, #1 \n\t" \ ++ "vadd.u16 q0, q10, q11 \n\t" \ ++ vshrn".u16 d30, q12, #2 \n\t" \ ++ no_rnd "vadd.u16 q0, q0, q13 \n\t" \ ++ vshrn".u16 d31, q0, #2 \n\t" \ ++ "vaddl.u8 q9, d2, d4 \n\t" \ ++ "vst1.64 {d30,d31}, [%[b],:128], %[line_size] \n\t" \ ++ "vaddl.u8 q11, d3, d5 \n\t" \ ++ "bgt 1b \n\t" ++ ++#define PUT_PIXELS_8_X2(vhadd) \ ++ "1: \n\t" \ ++ "vld1.64 {d0,d1}, [%[p]], %[line_size] \n\t" \ ++ "vld1.64 {d2,d3}, [%[p]], %[line_size] \n\t" \ ++ "pld [%[p]] \n\t" \ ++ "subs %[h], %[h], #2 \n\t" \ ++ "vext.8 d1, d0, d1, #1 \n\t" \ ++ "vext.8 d3, d2, d3, #1 \n\t" \ ++ "vswp d1, d2 \n\t" \ ++ vhadd".u8 q0, q0, q1 \n\t" \ ++ "vst1.64 {d0}, [%[b],:64], %[line_size] \n\t" \ ++ "vst1.64 {d1}, [%[b],:64], %[line_size] \n\t" \ ++ "bne 1b \n\t" ++ ++#define PUT_PIXELS_8_Y2(vhadd) \ ++ "add %[p1], %[p0], %[line_size] \n\t" \ ++ "lsl %[l2], %[line_size], #1 \n\t" \ ++ "vld1.64 {d0}, [%[p0]], %[l2] \n\t" \ ++ "vld1.64 {d1}, [%[p1]], %[l2] \n\t" \ ++ "1: \n\t" \ ++ "subs %[h], %[h], #2 \n\t" \ ++ vhadd".u8 d4, d0, d1 \n\t" \ ++ "vst1.64 {d4}, [%[b],:64], %[line_size] \n\t" \ ++ "vld1.64 {d0}, [%[p0]], %[l2] \n\t" \ ++ vhadd".u8 d4, d0, d1 \n\t" \ ++ "vst1.64 {d4}, [%[b],:64], %[line_size] \n\t" \ ++ "vld1.64 {d1}, [%[p1]], %[l2] \n\t" \ ++ "bne 1b \n\t" ++ ++#define PUT_PIXELS8_XY2(vshrn, no_rnd) \ ++ "lsl %[l2], %[line_size], #1 \n\t" \ ++ "add %[p1], %[p0], %[line_size] \n\t" \ ++ "vld1.64 {d0,d1}, [%[p0]], %[l2] \n\t" \ ++ "vld1.64 {d2,d3}, [%[p1]], %[l2] \n\t" \ ++ "pld [%[p0]] \n\t" \ ++ "pld [%[p1]] \n\t" \ ++ "vext.8 d4, d0, d1, #1 \n\t" \ ++ "vext.8 d6, d2, d3, #1 \n\t" \ ++ "vaddl.u8 q8, d0, d4 \n\t" \ ++ "vaddl.u8 q9, d2, d6 \n\t" \ ++ "1: \n\t" \ ++ "subs %[h], %[h], #2 \n\t" \ ++ "vld1.64 {d0,d1}, [%[p0]], %[l2] \n\t" \ ++ "pld [%[p0]] \n\t" \ ++ "vadd.u16 q10, q8, q9 \n\t" \ ++ "vext.8 d4, d0, d1, #1 \n\t" \ ++ no_rnd "vadd.u16 q10, q10, q11 \n\t" \ ++ "vaddl.u8 q8, d0, d4 \n\t" \ ++ vshrn".u16 d5, q10, #2 \n\t" \ ++ "vld1.64 {d2,d3}, [%[p1]], %[l2] \n\t" \ ++ "vadd.u16 q10, q8, q9 \n\t" \ ++ "pld [%[p1]] \n\t" \ ++ no_rnd "vadd.u16 q10, q10, q11 \n\t" \ ++ "vst1.64 {d5}, [%[b],:64], %[line_size] \n\t" \ ++ vshrn".u16 d7, q10, #2 \n\t" \ ++ "vext.8 d6, d2, d3, #1 \n\t" \ ++ "vaddl.u8 q9, d2, d6 \n\t" \ ++ "vst1.64 {d7}, [%[b],:64], %[line_size] \n\t" \ ++ "bgt 1b \n\t" ++ ++static void put_pixels16_neon(uint8_t *block, const uint8_t *pixels, ++ int line_size, int h) ++{ ++ asm volatile( ++ "1: \n\t" ++ "vld1.64 {d0,d1}, [%[pixels]], %[line_size] \n\t" ++ "vld1.64 {d2,d3}, [%[pixels]], %[line_size] \n\t" ++ "vld1.64 {d4,d5}, [%[pixels]], %[line_size] \n\t" ++ "vld1.64 {d6,d7}, [%[pixels]], %[line_size] \n\t" ++ "pld [%[pixels]] \n\t" ++ "subs %[h], %[h], #4 \n\t" ++ "vst1.64 {d0,d1}, [%[block],:128], %[line_size] \n\t" ++ "vst1.64 {d2,d3}, [%[block],:128], %[line_size] \n\t" ++ "vst1.64 {d4,d5}, [%[block],:128], %[line_size] \n\t" ++ "vst1.64 {d6,d7}, [%[block],:128], %[line_size] \n\t" ++ "bne 1b \n\t" ++ : [block]"+r"(block), [pixels]"+r"(pixels), [h]"+r"(h) ++ : [line_size]"r"(line_size) ++ : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "memory"); ++} ++ ++static void put_pixels16_x2_neon(uint8_t *block, const uint8_t *pixels, ++ int line_size, int h) ++{ ++ asm volatile( ++ PUT_PIXELS_16_X2("vrhadd") ++ : [b]"+r"(block), [p]"+r"(pixels), [h]"+r"(h) ++ : [line_size]"r"(line_size) ++ : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "memory"); ++} ++ ++static void put_pixels16_y2_neon(uint8_t *block, const uint8_t *pixels, ++ int line_size, int h) ++{ ++ const uint8_t *p1; ++ int l2; ++ ++ asm volatile( ++ PUT_PIXELS_16_Y2("vrhadd") ++ : [b]"+r"(block), [p0]"+r"(pixels), [p1]"=&r"(p1), [h]"+r"(h), ++ [l2]"=&r"(l2) ++ : [line_size]"r"(line_size) ++ : "d0", "d1", "d2", "d3", "d4", "d5", "memory"); ++} ++ ++static void put_pixels16_xy2_neon(uint8_t *block, const uint8_t *pixels, ++ int line_size, int h) ++{ ++ const uint8_t *p1; ++ int l2; ++ ++ asm volatile( ++ PUT_PIXELS_16_XY2("vrshrn", "@") ++ : [b]"+r"(block), ++ [p0]"+r"(pixels), ++ [p1]"=&r"(p1), [h]"+r"(h), ++ [l2]"=&r"(l2) ++ : [line_size]"r"(line_size) ++ : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", ++ "d28", "d29", "d30", "d31", ++ "q8", "q9", "q10", "q11", "q12", "memory"); ++} ++ ++static void put_pixels8_neon(uint8_t *block, const uint8_t *pixels, ++ int line_size, int h) ++{ ++ asm volatile( ++ "1: \n\t" ++ "vld1.64 {d0}, [%[p]], %[line_size] \n\t" ++ "vld1.64 {d1}, [%[p]], %[line_size] \n\t" ++ "vld1.64 {d2}, [%[p]], %[line_size] \n\t" ++ "vld1.64 {d3}, [%[p]], %[line_size] \n\t" ++ "subs %[h], %[h], #4 \n\t" ++ "vst1.64 {d0}, [%[b],:64], %[line_size] \n\t" ++ "vst1.64 {d1}, [%[b],:64], %[line_size] \n\t" ++ "vst1.64 {d2}, [%[b],:64], %[line_size] \n\t" ++ "vst1.64 {d3}, [%[b],:64], %[line_size] \n\t" ++ "bne 1b \n\t" ++ : [b]"+r"(block), [p]"+r"(pixels), [h]"+r"(h) ++ : [line_size]"r"(line_size) ++ : "d0", "d1", "d2", "d3", "memory"); ++} ++ ++static void put_pixels8_x2_neon(uint8_t *block, const uint8_t *pixels, ++ int line_size, int h) ++{ ++ asm volatile( ++ PUT_PIXELS_8_X2("vrhadd") ++ : [b]"+r"(block), [p]"+r"(pixels), [h]"+r"(h) ++ : [line_size]"r"(line_size) ++ : "d0", "d1", "d2", "d3", "memory"); ++} ++ ++static void put_pixels8_y2_neon(uint8_t *block, const uint8_t *pixels, ++ int line_size, int h) ++{ ++ const uint8_t *p1; ++ int l2; ++ ++ asm volatile( ++ PUT_PIXELS_8_Y2("vrhadd") ++ : [b]"+r"(block), [p0]"+r"(pixels), [p1]"=&r"(p1), [h]"+r"(h), ++ [l2]"=&r"(l2) ++ : [line_size]"r"(line_size) ++ : "d0", "d1", "d4", "memory"); ++} ++ ++static void put_pixels8_xy2_neon(uint8_t *block, const uint8_t *pixels, ++ int line_size, int h) ++{ ++ const uint8_t *p1; ++ int l2; ++ ++ asm volatile( ++ PUT_PIXELS8_XY2("vrshrn", "@") ++ : [b]"+r"(block), ++ [p0]"+r"(pixels), ++ [p1]"=&r"(p1), [h]"+r"(h), ++ [l2]"=&r"(l2) ++ : [line_size]"r"(line_size) ++ : "d0", "d1", "d2", "d3", "d4", "d6", "d7", ++ "q8", "q9", "q10", "memory"); ++} ++ ++static void put_no_rnd_pixels16_x2_neon(uint8_t *block, const uint8_t *pixels, ++ int line_size, int h) ++{ ++ asm volatile( ++ PUT_PIXELS_16_X2("vhadd") ++ : [b]"+r"(block), [p]"+r"(pixels), [h]"+r"(h) ++ : [line_size]"r"(line_size) ++ : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "memory"); ++} ++ ++static void put_no_rnd_pixels16_y2_neon(uint8_t *block, const uint8_t *pixels, ++ int line_size, int h) ++{ ++ const uint8_t *p1; ++ int l2; ++ ++ asm volatile( ++ PUT_PIXELS_16_Y2("vhadd") ++ : [b]"+r"(block), [p0]"+r"(pixels), [p1]"=&r"(p1), [h]"+r"(h), ++ [l2]"=&r"(l2) ++ : [line_size]"r"(line_size) ++ : "d0", "d1", "d2", "d3", "d4", "d5", "memory"); ++} ++ ++static void put_no_rnd_pixels16_xy2_neon(uint8_t *block, const uint8_t *pixels, ++ int line_size, int h) ++{ ++ const uint8_t *p1; ++ int l2; ++ ++ asm volatile( ++ "vmov.i16 q13, #1 \n\t" ++ PUT_PIXELS_16_XY2("vshrn", "") ++ : [b]"+r"(block), ++ [p0]"+r"(pixels), ++ [p1]"=&r"(p1), [h]"+r"(h), ++ [l2]"=&r"(l2) ++ : [line_size]"r"(line_size) ++ : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", ++ "d28", "d29", "d30", "d31", ++ "q8", "q9", "q10", "q11", "q12", "q13", "memory"); ++} ++ ++static void put_no_rnd_pixels8_x2_neon(uint8_t *block, const uint8_t *pixels, ++ int line_size, int h) ++{ ++ asm volatile( ++ PUT_PIXELS_8_X2("vhadd") ++ : [b]"+r"(block), [p]"+r"(pixels), [h]"+r"(h) ++ : [line_size]"r"(line_size) ++ : "d0", "d1", "d2", "d3", "memory"); ++} ++ ++static void put_no_rnd_pixels8_y2_neon(uint8_t *block, const uint8_t *pixels, ++ int line_size, int h) ++{ ++ const uint8_t *p1; ++ int l2; |
