summaryrefslogtreecommitdiff
path: root/packages/mplayer/files
diff options
context:
space:
mode:
Diffstat (limited to 'packages/mplayer/files')
-rw-r--r--packages/mplayer/files/mru-neon-float-to-int16.diff107
-rw-r--r--packages/mplayer/files/mru-neon-h264-chrome.diff364
-rw-r--r--packages/mplayer/files/mru-neon-h264-loopfilter.diff346
-rw-r--r--packages/mplayer/files/mru-neon-h264-qpel.diff1040
-rw-r--r--packages/mplayer/files/mru-neon-h264idct-dc.diff55
-rw-r--r--packages/mplayer/files/mru-neon-h264idctadd.diff123
-rw-r--r--packages/mplayer/files/mru-neon-put-pixels.diff376
-rw-r--r--packages/mplayer/files/mru-neon-simple-idct.diff501
-rw-r--r--packages/mplayer/files/mru-neon-vector-fmul-window.diff86
-rw-r--r--packages/mplayer/files/mru-neon-vector-fmul.diff56
-rw-r--r--packages/mplayer/files/mru-neon-vorbis-inverse.diff68
-rw-r--r--packages/mplayer/files/omapfb.patch23
-rw-r--r--packages/mplayer/files/pld-onlyarm5-svn.patch405
13 files changed, 407 insertions, 3143 deletions
diff --git a/packages/mplayer/files/mru-neon-float-to-int16.diff b/packages/mplayer/files/mru-neon-float-to-int16.diff
deleted file mode 100644
index 7a874cab30..0000000000
--- a/packages/mplayer/files/mru-neon-float-to-int16.diff
+++ /dev/null
@@ -1,107 +0,0 @@
-From: Mans Rullgard <mans@mansr.com>
-Date: Thu, 31 Jul 2008 02:35:42 +0000 (+0100)
-Subject: ARM: NEON optimised float_to_int16
-X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=f16a738cfc3307cbcba2f9c8aff4b5aa43144731
-
-ARM: NEON optimised float_to_int16
----
-
-diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c
-index 6dbe835..b584e5b 100644
---- a/libavcodec/armv4l/dsputil_neon.c
-+++ b/libavcodec/armv4l/dsputil_neon.c
-@@ -91,6 +91,9 @@ void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
- void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride);
- void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride);
-
-+void ff_float_to_int16_neon(int16_t *, const float *, long);
-+void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int);
-+
- void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
- {
- c->put_pixels_tab[0][0] = ff_put_pixels16_neon;
-@@ -158,4 +161,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
-
- c->h264_idct_add = ff_h264_idct_add_neon;
- c->h264_idct_dc_add = ff_h264_idct_dc_add_neon;
-+
-+ c->float_to_int16 = ff_float_to_int16_neon;
-+ c->float_to_int16_interleave = ff_float_to_int16_interleave_neon;
- }
-diff --git a/libavcodec/armv4l/dsputil_neon_s.S b/libavcodec/armv4l/dsputil_neon_s.S
-index fc5e401..44f75ba 100644
---- a/libavcodec/armv4l/dsputil_neon_s.S
-+++ b/libavcodec/armv4l/dsputil_neon_s.S
-@@ -252,3 +252,72 @@
- defun2 put_pixels8_x2, _no_rnd, vhadd.u8
- defun2 put_pixels8_y2, _no_rnd, vhadd.u8
- defun2 put_pixels8_xy2, _no_rnd, vshrn.u16, 1
-+
-+extern ff_float_to_int16_neon
-+ dmb
-+1: vld1.64 {d0-d3}, [r1,:128]!
-+ vcvt.s32.f32 q2, q0
-+ vcvt.s32.f32 q3, q1
-+ subs r2, r2, #8
-+ vqmovn.s32 d4, q2
-+ vqmovn.s32 d5, q3
-+ vst1.64 {d4-d5}, [r0,:128]!
-+ bgt 1b
-+ bx lr
-+ .endfunc
-+
-+extern ff_float_to_int16_interleave_neon
-+ cmp r3, #2
-+ ldrlt r1, [r1]
-+ blt ff_float_to_int16_neon
-+ bne 2f
-+
-+ ldr ip, [r1]
-+ ldr r1, [r1, #4]
-+ vld1.64 {d0-d3}, [ip,:128]!
-+ vld1.64 {d4-d7}, [r1,:128]!
-+ dmb
-+1: vcvt.s32.f32 q8, q0
-+ vcvt.s32.f32 q9, q1
-+ vcvt.s32.f32 q10, q2
-+ vcvt.s32.f32 q11, q3
-+ subs r2, r2, #8
-+ vqmovn.s32 d16, q8
-+ vqmovn.s32 d17, q9
-+ vqmovn.s32 d18, q10
-+ vqmovn.s32 d19, q11
-+ beq 1f
-+ vld1.64 {d0-d3}, [ip,:128]!
-+ vld1.64 {d4-d7}, [r1,:128]!
-+ vst2.16 {d16-d19}, [r0,:64]!
-+ b 1b
-+1: vst2.16 {d16-d19}, [r0,:64]!
-+ bx lr
-+
-+2: push {r4,r5,lr}
-+ lsls r4, r3, #1
-+ dmb
-+ b 4f
-+3: vld1.64 {d0-d3}, [ip,:128]!
-+ vcvt.s32.f32 q2, q0
-+ vcvt.s32.f32 q3, q1
-+ subs lr, lr, #8
-+ vqmovn.s32 d4, q2
-+ vqmovn.s32 d5, q3
-+ vst1.16 {d4[0]}, [r5,:16], r4
-+ vst1.16 {d4[1]}, [r5,:16], r4
-+ vst1.16 {d4[2]}, [r5,:16], r4
-+ vst1.16 {d4[3]}, [r5,:16], r4
-+ vst1.16 {d5[0]}, [r5,:16], r4
-+ vst1.16 {d5[1]}, [r5,:16], r4
-+ vst1.16 {d5[2]}, [r5,:16], r4
-+ vst1.16 {d5[3]}, [r5,:16], r4
-+ bgt 3b
-+ subs r3, r3, #1
-+4: ldr ip, [r1], #4
-+ mov lr, r2
-+ mov r5, r0
-+ add r0, r0, #2
-+ bne 3b
-+ pop {r4,r5,pc}
-+ .endfunc
diff --git a/packages/mplayer/files/mru-neon-h264-chrome.diff b/packages/mplayer/files/mru-neon-h264-chrome.diff
deleted file mode 100644
index cb6c4ff991..0000000000
--- a/packages/mplayer/files/mru-neon-h264-chrome.diff
+++ /dev/null
@@ -1,364 +0,0 @@
-From: Mans Rullgard <mans@mansr.com>
-Date: Fri, 11 Jul 2008 01:20:07 +0000 (+0100)
-Subject: ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
-X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=d3aa8f93b8a0061e0c3ac12aeed055961abfc113
-
-ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
----
-
-diff --git a/libavcodec/Makefile b/libavcodec/Makefile
-index 7fa02fa..36ba158 100644
---- a/libavcodec/Makefile
-+++ b/libavcodec/Makefile
-@@ -437,6 +437,7 @@ OBJS-$(HAVE_NEON) += armv4l/dsputil_neon.o \
-
- ASM_OBJS-$(HAVE_NEON) += armv4l/dsputil_neon_s.o \
- armv4l/simple_idct_neon.o \
-+ armv4l/h264dsp_neon.o \
-
- OBJS-$(HAVE_VIS) += sparc/dsputil_vis.o \
- sparc/simple_idct_vis.o \
-diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c
-index 8a10dde..a6d86cd 100644
---- a/libavcodec/armv4l/dsputil_neon.c
-+++ b/libavcodec/armv4l/dsputil_neon.c
-@@ -42,6 +42,12 @@ void ff_put_pixels8_xy2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
- void ff_put_h264_qpel16_mc00_neon(uint8_t *, uint8_t *, int);
- void ff_put_h264_qpel8_mc00_neon(uint8_t *, uint8_t *, int);
-
-+void ff_put_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
-+void ff_put_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
-+
-+void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
-+void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
-+
- void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
- {
- c->put_pixels_tab[0][0] = ff_put_pixels16_neon;
-@@ -62,6 +68,12 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
- c->put_no_rnd_pixels_tab[1][2] = ff_put_pixels8_y2_no_rnd_neon;
- c->put_no_rnd_pixels_tab[1][3] = ff_put_pixels8_xy2_no_rnd_neon;
-
-+ c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon;
-+ c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon;
-+
-+ c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_neon;
-+ c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_neon;
-+
- c->put_h264_qpel_pixels_tab[0][0] = ff_put_h264_qpel16_mc00_neon;
- c->put_h264_qpel_pixels_tab[1][0] = ff_put_h264_qpel8_mc00_neon;
- }
-diff --git a/libavcodec/armv4l/h264dsp_neon.S b/libavcodec/armv4l/h264dsp_neon.S
-new file mode 100644
-index 0000000..28d9aa7
---- /dev/null
-+++ b/libavcodec/armv4l/h264dsp_neon.S
-@@ -0,0 +1,308 @@
-+/*
-+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
-+ *
-+ * This file is part of FFmpeg.
-+ *
-+ * FFmpeg is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU Lesser General Public
-+ * License as published by the Free Software Foundation; either
-+ * version 2.1 of the License, or (at your option) any later version.
-+ *
-+ * FFmpeg is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ * Lesser General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU Lesser General Public
-+ * License along with FFmpeg; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
-+
-+ .fpu neon
-+
-+/* chroma_mc8(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
-+ .macro h264_chroma_mc8 avg=0
-+ push {r4-r7, lr}
-+ ldrd r4, [sp, #20]
-+.if \avg
-+ mov lr, r0
-+.endif
-+ pld [r1]
-+ pld [r1, r2]
-+
-+ muls r7, r4, r5
-+ rsb r6, r7, r5, lsl #3
-+ rsb ip, r7, r4, lsl #3
-+ sub r4, r7, r4, lsl #3
-+ sub r4, r4, r5, lsl #3
-+ add r4, r4, #64
-+
-+ dmb
-+
-+ beq 2f
-+
-+ add r5, r1, r2
-+
-+ vdup.8 d0, r4
-+ lsl r4, r2, #1
-+ vdup.8 d1, ip
-+ vld1.64 {d4, d5}, [r1], r4
-+ vdup.8 d2, r6
-+ vld1.64 {d6, d7}, [r5], r4
-+ vdup.8 d3, r7
-+
-+ vext.8 d5, d4, d5, #1
-+ vext.8 d7, d6, d7, #1
-+
-+1: pld [r5]
-+ vmull.u8 q8, d4, d0
-+ vmlal.u8 q8, d5, d1
-+ vld1.64 {d4, d5}, [r1], r4
-+ vmlal.u8 q8, d6, d2
-+ vext.8 d5, d4, d5, #1
-+ vmlal.u8 q8, d7, d3
-+ vmull.u8 q9, d6, d0
-+ subs r3, r3, #2
-+ vmlal.u8 q9, d7, d1
-+ vmlal.u8 q9, d4, d2
-+ vmlal.u8 q9, d5, d3
-+ vrshrn.u16 d16, q8, #6
-+ vld1.64 {d6, d7}, [r5], r4
-+ pld [r1]
-+ vrshrn.u16 d17, q9, #6
-+.if \avg
-+ vld1.64 {d20}, [lr,:64], r2
-+ vld1.64 {d21}, [lr,:64], r2
-+ vrhadd.u8 q8, q8, q10
-+.endif
-+ vext.8 d7, d6, d7, #1
-+ vst1.64 {d16}, [r0,:64], r2
-+ vst1.64 {d17}, [r0,:64], r2
-+ bgt 1b
-+
-+ pop {r4-r7, pc}
-+
-+2: tst r6, r6
-+ add ip, ip, r6
-+ vdup.8 d0, r4
-+ vdup.8 d1, ip
-+
-+ beq 4f
-+
-+ add r5, r1, r2
-+ lsl r4, r2, #1
-+ vld1.64 {d4}, [r1], r4
-+ vld1.64 {d6}, [r5], r4
-+
-+3: pld [r5]
-+ vmull.u8 q8, d4, d0
-+ vmlal.u8 q8, d6, d1
-+ vld1.64 {d4}, [r1], r4
-+ vmull.u8 q9, d6, d0
-+ vmlal.u8 q9, d4, d1
-+ vld1.64 {d6}, [r5], r4
-+ vrshrn.u16 d16, q8, #6
-+ vrshrn.u16 d17, q9, #6
-+.if \avg
-+ vld1.64 {d20}, [lr,:64], r2
-+ vld1.64 {d21}, [lr,:64], r2
-+ vrhadd.u8 q8, q8, q10
-+.endif
-+ subs r3, r3, #2
-+ pld [r1]
-+ vst1.64 {d16}, [r0,:64], r2
-+ vst1.64 {d17}, [r0,:64], r2
-+ bgt 3b
-+
-+ pop {r4-r7, pc}
-+
-+4: vld1.64 {d4, d5}, [r1], r2
-+ vld1.64 {d6, d7}, [r1], r2
-+ vext.8 d5, d4, d5, #1
-+ vext.8 d7, d6, d7, #1
-+
-+5: pld [r1]
-+ subs r3, r3, #2
-+ vmull.u8 q8, d4, d0
-+ vmlal.u8 q8, d5, d1
-+ vld1.64 {d4, d5}, [r1], r2
-+ vmull.u8 q9, d6, d0
-+ vmlal.u8 q9, d7, d1
-+ pld [r1]
-+ vext.8 d5, d4, d5, #1
-+ vrshrn.u16 d16, q8, #6
-+ vrshrn.u16 d17, q9, #6
-+.if \avg
-+ vld1.64 {d20}, [lr,:64], r2
-+ vld1.64 {d21}, [lr,:64], r2
-+ vrhadd.u8 q8, q8, q10
-+.endif
-+ vld1.64 {d6, d7}, [r1], r2
-+ vext.8 d7, d6, d7, #1
-+ vst1.64 {d16}, [r0,:64], r2
-+ vst1.64 {d17}, [r0,:64], r2
-+ bgt 5b
-+
-+ pop {r4-r7, pc}
-+ .endm
-+
-+/* chroma_mc4(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
-+ .macro h264_chroma_mc4 avg=0
-+ push {r4-r7, lr}
-+ ldrd r4, [sp, #20]
-+.if \avg
-+ mov lr, r0
-+.endif
-+ pld [r1]
-+ pld [r1, r2]
-+
-+ muls r7, r4, r5
-+ rsb r6, r7, r5, lsl #3
-+ rsb ip, r7, r4, lsl #3
-+ sub r4, r7, r4, lsl #3
-+ sub r4, r4, r5, lsl #3
-+ add r4, r4, #64
-+
-+ dmb
-+
-+ beq 2f
-+
-+ add r5, r1, r2
-+
-+ vdup.8 d0, r4
-+ lsl r4, r2, #1
-+ vdup.8 d1, ip
-+ vld1.64 {d4}, [r1], r4
-+ vdup.8 d2, r6
-+ vld1.64 {d6}, [r5], r4
-+ vdup.8 d3, r7
-+
-+ vext.8 d5, d4, d5, #1
-+ vext.8 d7, d6, d7, #1
-+ vtrn.32 d4, d5
-+ vtrn.32 d6, d7
-+
-+ vtrn.32 d0, d1
-+ vtrn.32 d2, d3
-+
-+1: pld [r5]
-+ vmull.u8 q8, d4, d0
-+ vmlal.u8 q8, d6, d2
-+ vld1.64 {d4}, [r1], r4
-+ vext.8 d5, d4, d5, #1
-+ vtrn.32 d4, d5
-+ vmull.u8 q9, d6, d0
-+ vmlal.u8 q9, d4, d2
-+ vld1.64 {d6}, [r5], r4
-+ vadd.i16 d16, d16, d17
-+ vadd.i16 d17, d18, d19
-+ vrshrn.u16 d16, q8, #6
-+ subs r3, r3, #2
-+ pld [r1]
-+.if \avg
-+ vld1.32 {d20[0]}, [lr,:32], r2
-+ vld1.32 {d20[1]}, [lr,:32], r2
-+ vrhadd.u8 d16, d16, d20
-+.endif
-+ vext.8 d7, d6, d7, #1
-+ vtrn.32 d6, d7
-+ vst1.32 {d16[0]}, [r0,:32], r2
-+ vst1.32 {d16[1]}, [r0,:32], r2
-+ bgt 1b
-+
-+ pop {r4-r7, pc}
-+
-+2: tst r6, r6
-+ add ip, ip, r6
-+ vdup.8 d0, r4
-+ vdup.8 d1, ip
-+ vtrn.32 d0, d1
-+
-+ beq 4f
-+
-+ vext.32 d1, d0, d1, #1
-+ add r5, r1, r2
-+ lsl r4, r2, #1
-+ vld1.32 {d4[0]}, [r1], r4
-+ vld1.32 {d4[1]}, [r5], r4
-+
-+3: pld [r5]
-+ vmull.u8 q8, d4, d0
-+ vld1.32 {d4[0]}, [r1], r4
-+ vmull.u8 q9, d4, d1
-+ vld1.32 {d4[1]}, [r5], r4
-+ vadd.i16 d16, d16, d17
-+ vadd.i16 d17, d18, d19
-+ vrshrn.u16 d16, q8, #6
-+.if \avg
-+ vld1.32 {d20[0]}, [lr,:32], r2
-+ vld1.32 {d20[1]}, [lr,:32], r2
-+ vrhadd.u8 d16, d16, d20
-+.endif
-+ subs r3, r3, #2
-+ pld [r1]
-+ vst1.32 {d16[0]}, [r0,:32], r2
-+ vst1.32 {d16[1]}, [r0,:32], r2
-+ bgt 3b
-+
-+ pop {r4-r7, pc}
-+
-+4: vld1.64 {d4}, [r1], r2
-+ vld1.64 {d6}, [r1], r2
-+ vext.8 d5, d4, d5, #1
-+ vext.8 d7, d6, d7, #1
-+ vtrn.32 d4, d5
-+ vtrn.32 d6, d7
-+
-+5: vmull.u8 q8, d4, d0
-+ vmull.u8 q9, d6, d0
-+ subs r3, r3, #2
-+ vld1.64 {d4}, [r1], r2
-+ vext.8 d5, d4, d5, #1
-+ vtrn.32 d4, d5
-+ vadd.i16 d16, d16, d17
-+ vadd.i16 d17, d18, d19
-+ pld [r1]
-+ vrshrn.u16 d16, q8, #6
-+.if \avg
-+ vld1.32 {d20[0]}, [lr,:32], r2
-+ vld1.32 {d20[1]}, [lr,:32], r2
-+ vrhadd.u8 d16, d16, d20
-+.endif
-+ vld1.64 {d6}, [r1], r2
-+ vext.8 d7, d6, d7, #1
-+ vtrn.32 d6, d7
-+ pld [r1]
-+ vst1.32 {d16[0]}, [r0,:32], r2
-+ vst1.32 {d16[1]}, [r0,:32], r2
-+ bgt 5b
-+
-+ pop {r4-r7, pc}
-+ .endm
-+
-+ .text
-+ .align
-+
-+ .global ff_put_h264_chroma_mc8_neon
-+ .func ff_put_h264_chroma_mc8_neon
-+ff_put_h264_chroma_mc8_neon:
-+ h264_chroma_mc8
-+ .endfunc
-+
-+ .global ff_avg_h264_chroma_mc8_neon
-+ .func ff_avg_h264_chroma_mc8_neon
-+ff_avg_h264_chroma_mc8_neon:
-+ h264_chroma_mc8 avg=1
-+ .endfunc
-+
-+ .global ff_put_h264_chroma_mc4_neon
-+ .func ff_put_h264_chroma_mc4_neon
-+ff_put_h264_chroma_mc4_neon:
-+ h264_chroma_mc4
-+ .endfunc
-+
-+ .global ff_avg_h264_chroma_mc4_neon
-+ .func ff_avg_h264_chroma_mc4_neon
-+ff_avg_h264_chroma_mc4_neon:
-+ h264_chroma_mc4 avg=1
-+ .endfunc
diff --git a/packages/mplayer/files/mru-neon-h264-loopfilter.diff b/packages/mplayer/files/mru-neon-h264-loopfilter.diff
deleted file mode 100644
index 056702517b..0000000000
--- a/packages/mplayer/files/mru-neon-h264-loopfilter.diff
+++ /dev/null
@@ -1,346 +0,0 @@
-From: Mans Rullgard <mans@mansr.com>
-Date: Fri, 15 Aug 2008 00:02:55 +0000 (+0100)
-Subject: ARM: NEON optimised H.264 loop filter
-X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=0c1b6bb0814587bd4c8a895c6d7dc2dd4cc2841a
-
-ARM: NEON optimised H.264 loop filter
----
-
-diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c
-index a6d86cd..68ecbe8 100644
---- a/libavcodec/armv4l/dsputil_neon.c
-+++ b/libavcodec/armv4l/dsputil_neon.c
-@@ -48,6 +48,15 @@ void ff_put_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
- void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
- void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
-
-+void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
-+ int beta, int8_t *tc0);
-+void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
-+ int beta, int8_t *tc0);
-+void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
-+ int beta, int8_t *tc0);
-+void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
-+ int beta, int8_t *tc0);
-+
- void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
- {
- c->put_pixels_tab[0][0] = ff_put_pixels16_neon;
-@@ -76,4 +85,9 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
-
- c->put_h264_qpel_pixels_tab[0][0] = ff_put_h264_qpel16_mc00_neon;
- c->put_h264_qpel_pixels_tab[1][0] = ff_put_h264_qpel8_mc00_neon;
-+
-+ c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon;
-+ c->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon;
-+ c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
-+ c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;
- }
-diff --git a/libavcodec/armv4l/h264dsp_neon.S b/libavcodec/armv4l/h264dsp_neon.S
-index 28d9aa7..ac793b2 100644
---- a/libavcodec/armv4l/h264dsp_neon.S
-+++ b/libavcodec/armv4l/h264dsp_neon.S
-@@ -306,3 +306,303 @@ ff_put_h264_chroma_mc4_neon:
- ff_avg_h264_chroma_mc4_neon:
- h264_chroma_mc4 avg=1
- .endfunc
-+
-+ /* H.264 loop filter */
-+
-+ .macro h264_loop_filter_start
-+ ldr ip, [sp]
-+ tst r2, r2
-+ ldr ip, [ip]
-+ tstne r3, r3
-+ vmov.32 d24[0], ip
-+ and ip, ip, ip, lsl #16
-+ bxeq lr
-+ ands ip, ip, ip, lsl #8
-+ bxlt lr
-+ .endm
-+
-+ .macro align_push_regs
-+ and ip, sp, #15
-+ add ip, ip, #32
-+ sub sp, sp, ip
-+ dmb
-+ vst1.64 {d12-d15}, [sp,:128]
-+ sub sp, sp, #32
-+ vst1.64 {d8-d11}, [sp,:128]
-+ .endm
-+
-+ .macro align_pop_regs
-+ vld1.64 {d8-d11}, [sp,:128]!
-+ vld1.64 {d12-d15}, [sp,:128], ip
-+ .endm
-+
-+ .macro h264_loop_filter_luma
-+ vdup.8 q11, r2 @ alpha
-+ vmovl.u8 q12, d24
-+ vabd.u8 q6, q8, q0 @ abs(p0 - q0)
-+ vmovl.u16 q12, d24
-+ vabd.u8 q14, q9, q8 @ abs(p1 - p0)
-+ vsli.16 q12, q12, #8
-+ vabd.u8 q15, q1, q0 @ abs(q1 - q0)
-+ vsli.32 q12, q12, #16
-+ vclt.u8 q6, q6, q11 @ < alpha
-+ vdup.8 q11, r3 @ beta
-+ vclt.s8 q7, q12, #0
-+ vclt.u8 q14, q14, q11 @ < beta
-+ vclt.u8 q15, q15, q11 @ < beta
-+ vbic q6, q6, q7
-+ vabd.u8 q4, q10, q8 @ abs(p2 - p0)
-+ vand q6, q6, q14
-+ vabd.u8 q5, q2, q0 @ abs(q2 - q0)
-+ vclt.u8 q4, q4, q11 @ < beta
-+ vand q6, q6, q15
-+ vclt.u8 q5, q5, q11 @ < beta
-+ vand q4, q4, q6
-+ vand q5, q5, q6
-+ vand q12, q12, q6
-+ vrhadd.u8 q14, q8, q0
-+ vsub.i8 q6, q12, q4
-+ vqadd.u8 q7, q9, q12
-+ vhadd.u8 q10, q10, q14
-+ vsub.i8 q6, q6, q5
-+ vhadd.u8 q14, q2, q14
-+ vmin.u8 q7, q7, q10
-+ vqsub.u8 q11, q9, q12
-+ vqadd.u8 q2, q1, q12
-+ vmax.u8 q7, q7, q11
-+ vqsub.u8 q11, q1, q12
-+ vmin.u8 q14, q2, q14
-+ vmovl.u8 q2, d0
-+ vmax.u8 q14, q14, q11
-+ vmovl.u8 q10, d1
-+ vsubw.u8 q2, q2, d16
-+ vsubw.u8 q10, q10, d17
-+ vshl.i16 q2, q2, #2
-+ vshl.i16 q10, q10, #2
-+ vaddw.u8 q2, q2, d18
-+ vaddw.u8 q10, q10, d19
-+ vsubw.u8 q2, q2, d2
-+ vsubw.u8 q10, q10, d3
-+ vrshrn.i16 d4, q2, #3
-+ vrshrn.i16 d5, q10, #3
-+ vbsl q4, q7, q9
-+ vbsl q5, q14, q1
-+ vneg.s8 q7, q6
-+ vmovl.u8 q14, d16
-+ vmin.s8 q2, q2, q6
-+ vmovl.u8 q6, d17
-+ vmax.s8 q2, q2, q7
-+ vmovl.u8 q11, d0
-+ vmovl.u8 q12, d1
-+ vaddw.s8 q14, q14, d4
-+ vaddw.s8 q6, q6, d5
-+ vsubw.s8 q11, q11, d4
-+ vsubw.s8 q12, q12, d5
-+ vqmovun.s16 d16, q14
-+ vqmovun.s16 d17, q6
-+ vqmovun.s16 d0, q11
-+ vqmovun.s16 d1, q12
-+ .endm
-+
-+ .global ff_h264_v_loop_filter_luma_neon
-+ .func ff_h264_v_loop_filter_luma_neon
-+ff_h264_v_loop_filter_luma_neon:
-+ h264_loop_filter_start
-+
-+ vld1.64 {d0, d1}, [r0,:128], r1
-+ vld1.64 {d2, d3}, [r0,:128], r1
-+ vld1.64 {d4, d5}, [r0,:128], r1
-+ sub r0, r0, r1, lsl #2
-+ sub r0, r0, r1, lsl #1
-+ vld1.64 {d20,d21}, [r0,:128], r1
-+ vld1.64 {d18,d19}, [r0,:128], r1
-+ vld1.64 {d16,d17}, [r0,:128], r1
-+
-+ align_push_regs
-+
-+ h264_loop_filter_luma
-+
-+ sub r0, r0, r1, lsl #1
-+ vst1.64 {d8, d9}, [r0,:128], r1
-+ vst1.64 {d16,d17}, [r0,:128], r1
-+ vst1.64 {d0, d1}, [r0,:128], r1
-+ vst1.64 {d10,d11}, [r0,:128]
-+
-+ align_pop_regs
-+ bx lr
-+ .endfunc
-+
-+ .global ff_h264_h_loop_filter_luma_neon
-+ .func ff_h264_h_loop_filter_luma_neon
-+ff_h264_h_loop_filter_luma_neon:
-+ h264_loop_filter_start
-+
-+ sub r0, r0, #4
-+ vld1.64 {d6}, [r0], r1
-+ vld1.64 {d20}, [r0], r1
-+ vld1.64 {d18}, [r0], r1
-+ vld1.64 {d16}, [r0], r1
-+ vld1.64 {d0}, [r0], r1
-+ vld1.64 {d2}, [r0], r1
-+ vld1.64 {d4}, [r0], r1
-+ vld1.64 {d26}, [r0], r1
-+ vld1.64 {d7}, [r0], r1
-+ vld1.64 {d21}, [r0], r1
-+ vld1.64 {d19}, [r0], r1
-+ vld1.64 {d17}, [r0], r1
-+ vld1.64 {d1}, [r0], r1
-+ vld1.64 {d3}, [r0], r1
-+ vld1.64 {d5}, [r0], r1
-+ vld1.64 {d27}, [r0], r1
-+
-+ vtrn.32 q3, q0
-+ vtrn.32 q10, q1
-+ vtrn.32 q9, q2
-+ vtrn.32 q8, q13
-+ vtrn.16 q3, q9
-+ vtrn.16 q10, q8
-+ vtrn.16 q0, q2
-+ vtrn.16 q1, q13
-+ vtrn.8 q3, q10
-+ vtrn.8 q9, q8
-+ vtrn.8 q0, q1
-+ vtrn.8 q2, q13
-+
-+ align_push_regs
-+ sub sp, sp, #16
-+ vst1.64 {d4, d5}, [sp,:128]
-+ sub sp, sp, #16
-+ vst1.64 {d20,d21}, [sp,:128]
-+
-+ h264_loop_filter_luma
-+
-+ vld1.64 {d20,d21}, [sp,:128]!
-+ vld1.64 {d4, d5}, [sp,:128]!
-+
-+ vtrn.32 q3, q0
-+ vtrn.32 q10, q5
-+ vtrn.32 q4, q2
-+ vtrn.32 q8, q13
-+ vtrn.16 q3, q4
-+ vtrn.16 q10, q8
-+ vtrn.16 q0, q2
-+ vtrn.16 q5, q13
-+ vtrn.8 q3, q10
-+ vtrn.8 q4, q8
-+ vtrn.8 q0, q5
-+ vtrn.8 q2, q13
-+
-+ sub r0, r0, r1, lsl #4
-+ vst1.64 {d6}, [r0], r1
-+ vst1.64 {d20}, [r0], r1
-+ vst1.64 {d8}, [r0], r1
-+ vst1.64 {d16}, [r0], r1
-+ vst1.64 {d0}, [r0], r1
-+ vst1.64 {d10}, [r0], r1
-+ vst1.64 {d4}, [r0], r1
-+ vst1.64 {d26}, [r0], r1
-+ vst1.64 {d7}, [r0], r1
-+ vst1.64 {d21}, [r0], r1
-+ vst1.64 {d9}, [r0], r1
-+ vst1.64 {d17}, [r0], r1
-+ vst1.64 {d1}, [r0], r1
-+ vst1.64 {d11}, [r0], r1
-+ vst1.64 {d5}, [r0], r1
-+ vst1.64 {d27}, [r0], r1
-+
-+ align_pop_regs
-+ bx lr
-+ .endfunc
-+
-+ .macro h264_loop_filter_chroma
-+ vdup.8 d22, r2 @ alpha
-+ vmovl.u8 q12, d24
-+ vabd.u8 d26, d16, d0 @ abs(p0 - q0)
-+ vmovl.u8 q2, d0
-+ vabd.u8 d28, d18, d16 @ abs(p1 - p0)
-+ vsubw.u8 q2, q2, d16
-+ vsli.16 d24, d24, #8
-+ vshl.i16 q2, q2, #2
-+ vabd.u8 d30, d2, d0 @ abs(q1 - q0)
-+ vaddw.u8 q2, q2, d18
-+ vclt.u8 d26, d26, d22 @ < alpha
-+ vsubw.u8 q2, q2, d2
-+ vdup.8 d22, r3 @ beta
-+ vclt.s8 d25, d24, #0
-+ vrshrn.i16 d4, q2, #3
-+ vclt.u8 d28, d28, d22 @ < beta
-+ vbic d26, d26, d25
-+ vclt.u8 d30, d30, d22 @ < beta
-+ vand d26, d26, d28
-+ vneg.s8 d25, d24
-+ vand d26, d26, d30
-+ vmin.s8 d4, d4, d24
-+ vmovl.u8 q14, d16
-+ vand d4, d4, d26
-+ vmax.s8 d4, d4, d25
-+ vmovl.u8 q11, d0
-+ vaddw.s8 q14, q14, d4
-+ vsubw.s8 q11, q11, d4
-+ vqmovun.s16 d16, q14
-+ vqmovun.s16 d0, q11
-+ .endm
-+
-+ .global ff_h264_v_loop_filter_chroma_neon
-+ .func ff_h264_v_loop_filter_chroma_neon
-+ff_h264_v_loop_filter_chroma_neon:
-+ h264_loop_filter_start
-+
-+ sub r0, r0, r1, lsl #1
-+ vld1.64 {d18}, [r0,:64], r1
-+ vld1.64 {d16}, [r0,:64], r1
-+ vld1.64 {d0}, [r0,:64], r1
-+ vld1.64 {d2}, [r0,:64]
-+
-+ h264_loop_filter_chroma
-+
-+ sub r0, r0, r1, lsl #1
-+ vst1.64 {d16}, [r0,:64], r1
-+ vst1.64 {d0}, [r0,:64], r1
-+
-+ bx lr
-+ .endfunc
-+
-+ .global ff_h264_h_loop_filter_chroma_neon
-+ .func ff_h264_h_loop_filter_chroma_neon
-+ff_h264_h_loop_filter_chroma_neon:
-+ h264_loop_filter_start
-+
-+ sub r0, r0, #2
-+ vld1.32 {d18[0]}, [r0], r1
-+ vld1.32 {d16[0]}, [r0], r1
-+ vld1.32 {d0[0]}, [r0], r1
-+ vld1.32 {d2[0]}, [r0], r1
-+ vld1.32 {d18[1]}, [r0], r1
-+ vld1.32 {d16[1]}, [r0], r1
-+ vld1.32 {d0[1]}, [r0], r1
-+ vld1.32 {d2[1]}, [r0], r1
-+
-+ vtrn.16 d18, d0
-+ vtrn.16 d16, d2
-+ vtrn.8 d18, d16
-+ vtrn.8 d0, d2
-+
-+ h264_loop_filter_chroma
-+
-+ vtrn.16 d18, d0
-+ vtrn.16 d16, d2
-+ vtrn.8 d18, d16
-+ vtrn.8 d0, d2
-+
-+ sub r0, r0, r1, lsl #3
-+ vst1.32 {d18[0]}, [r0], r1
-+ vst1.32 {d16[0]}, [r0], r1
-+ vst1.32 {d0[0]}, [r0], r1
-+ vst1.32 {d2[0]}, [r0], r1
-+ vst1.32 {d18[1]}, [r0], r1
-+ vst1.32 {d16[1]}, [r0], r1
-+ vst1.32 {d0[1]}, [r0], r1
-+ vst1.32 {d2[1]}, [r0], r1
-+
-+ bx lr
-+ .endfunc
diff --git a/packages/mplayer/files/mru-neon-h264-qpel.diff b/packages/mplayer/files/mru-neon-h264-qpel.diff
deleted file mode 100644
index 6ed479b19b..0000000000
--- a/packages/mplayer/files/mru-neon-h264-qpel.diff
+++ /dev/null
@@ -1,1040 +0,0 @@
-From: Mans Rullgard <mans@mansr.com>
-Date: Sat, 23 Aug 2008 00:24:04 +0000 (+0100)
-Subject: ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
-X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=55661fd933572f67248c0730f6c75a6db0f0eb6a
-
-ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
----
-
-diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c
-index 68ecbe8..a932aa9 100644
---- a/libavcodec/armv4l/dsputil_neon.c
-+++ b/libavcodec/armv4l/dsputil_neon.c
-@@ -40,7 +40,38 @@ void ff_put_pixels8_y2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
- void ff_put_pixels8_xy2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
-
- void ff_put_h264_qpel16_mc00_neon(uint8_t *, uint8_t *, int);
-+void ff_put_h264_qpel16_mc10_neon(uint8_t *, uint8_t *, int);
-+void ff_put_h264_qpel16_mc20_neon(uint8_t *, uint8_t *, int);
-+void ff_put_h264_qpel16_mc30_neon(uint8_t *, uint8_t *, int);
-+void ff_put_h264_qpel16_mc01_neon(uint8_t *, uint8_t *, int);
-+void ff_put_h264_qpel16_mc11_neon(uint8_t *, uint8_t *, int);
-+void ff_put_h264_qpel16_mc21_neon(uint8_t *, uint8_t *, int);
-+void ff_put_h264_qpel16_mc31_neon(uint8_t *, uint8_t *, int);
-+void ff_put_h264_qpel16_mc02_neon(uint8_t *, uint8_t *, int);
-+void ff_put_h264_qpel16_mc12_neon(uint8_t *, uint8_t *, int);
-+void ff_put_h264_qpel16_mc22_neon(uint8_t *, uint8_t *, int);
-+void ff_put_h264_qpel16_mc32_neon(uint8_t *, uint8_t *, int);
-+void ff_put_h264_qpel16_mc03_neon(uint8_t *, uint8_t *, int);
-+void ff_put_h264_qpel16_mc13_neon(uint8_t *, uint8_t *, int);
-+void ff_put_h264_qpel16_mc23_neon(uint8_t *, uint8_t *, int);
-+void ff_put_h264_qpel16_mc33_neon(uint8_t *, uint8_t *, int);
-+
- void ff_put_h264_qpel8_mc00_neon(uint8_t *, uint8_t *, int);
-+void ff_put_h264_qpel8_mc10_neon(uint8_t *, uint8_t *, int);
-+void ff_put_h264_qpel8_mc20_neon(uint8_t *, uint8_t *, int);
-+void ff_put_h264_qpel8_mc30_neon(uint8_t *, uint8_t *, int);
-+void ff_put_h264_qpel8_mc01_neon(uint8_t *, uint8_t *, int);
-+void ff_put_h264_qpel8_mc11_neon(uint8_t *, uint8_t *, int);
-+void ff_put_h264_qpel8_mc21_neon(uint8_t *, uint8_t *, int);
-+void ff_put_h264_qpel8_mc31_neon(uint8_t *, uint8_t *, int);
-+void ff_put_h264_qpel8_mc02_neon(uint8_t *, uint8_t *, int);
-+void ff_put_h264_qpel8_mc12_neon(uint8_t *, uint8_t *, int);
-+void ff_put_h264_qpel8_mc22_neon(uint8_t *, uint8_t *, int);
-+void ff_put_h264_qpel8_mc32_neon(uint8_t *, uint8_t *, int);
-+void ff_put_h264_qpel8_mc03_neon(uint8_t *, uint8_t *, int);
-+void ff_put_h264_qpel8_mc13_neon(uint8_t *, uint8_t *, int);
-+void ff_put_h264_qpel8_mc23_neon(uint8_t *, uint8_t *, int);
-+void ff_put_h264_qpel8_mc33_neon(uint8_t *, uint8_t *, int);
-
- void ff_put_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
- void ff_put_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
-@@ -83,8 +114,39 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
- c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_neon;
- c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_neon;
-
-- c->put_h264_qpel_pixels_tab[0][0] = ff_put_h264_qpel16_mc00_neon;
-- c->put_h264_qpel_pixels_tab[1][0] = ff_put_h264_qpel8_mc00_neon;
-+ c->put_h264_qpel_pixels_tab[0][ 0] = ff_put_h264_qpel16_mc00_neon;
-+ c->put_h264_qpel_pixels_tab[0][ 1] = ff_put_h264_qpel16_mc10_neon;
-+ c->put_h264_qpel_pixels_tab[0][ 2] = ff_put_h264_qpel16_mc20_neon;
-+ c->put_h264_qpel_pixels_tab[0][ 3] = ff_put_h264_qpel16_mc30_neon;
-+ c->put_h264_qpel_pixels_tab[0][ 4] = ff_put_h264_qpel16_mc01_neon;
-+ c->put_h264_qpel_pixels_tab[0][ 5] = ff_put_h264_qpel16_mc11_neon;
-+ c->put_h264_qpel_pixels_tab[0][ 6] = ff_put_h264_qpel16_mc21_neon;
-+ c->put_h264_qpel_pixels_tab[0][ 7] = ff_put_h264_qpel16_mc31_neon;
-+ c->put_h264_qpel_pixels_tab[0][ 8] = ff_put_h264_qpel16_mc02_neon;
-+ c->put_h264_qpel_pixels_tab[0][ 9] = ff_put_h264_qpel16_mc12_neon;
-+ c->put_h264_qpel_pixels_tab[0][10] = ff_put_h264_qpel16_mc22_neon;
-+ c->put_h264_qpel_pixels_tab[0][11] = ff_put_h264_qpel16_mc32_neon;
-+ c->put_h264_qpel_pixels_tab[0][12] = ff_put_h264_qpel16_mc03_neon;
-+ c->put_h264_qpel_pixels_tab[0][13] = ff_put_h264_qpel16_mc13_neon;
-+ c->put_h264_qpel_pixels_tab[0][14] = ff_put_h264_qpel16_mc23_neon;
-+ c->put_h264_qpel_pixels_tab[0][15] = ff_put_h264_qpel16_mc33_neon;
-+
-+ c->put_h264_qpel_pixels_tab[1][ 0] = ff_put_h264_qpel8_mc00_neon;
-+ c->put_h264_qpel_pixels_tab[1][ 1] = ff_put_h264_qpel8_mc10_neon;
-+ c->put_h264_qpel_pixels_tab[1][ 2] = ff_put_h264_qpel8_mc20_neon;
-+ c->put_h264_qpel_pixels_tab[1][ 3] = ff_put_h264_qpel8_mc30_neon;
-+ c->put_h264_qpel_pixels_tab[1][ 4] = ff_put_h264_qpel8_mc01_neon;
-+ c->put_h264_qpel_pixels_tab[1][ 5] = ff_put_h264_qpel8_mc11_neon;
-+ c->put_h264_qpel_pixels_tab[1][ 6] = ff_put_h264_qpel8_mc21_neon;
-+ c->put_h264_qpel_pixels_tab[1][ 7] = ff_put_h264_qpel8_mc31_neon;
-+ c->put_h264_qpel_pixels_tab[1][ 8] = ff_put_h264_qpel8_mc02_neon;
-+ c->put_h264_qpel_pixels_tab[1][ 9] = ff_put_h264_qpel8_mc12_neon;
-+ c->put_h264_qpel_pixels_tab[1][10] = ff_put_h264_qpel8_mc22_neon;
-+ c->put_h264_qpel_pixels_tab[1][11] = ff_put_h264_qpel8_mc32_neon;
-+ c->put_h264_qpel_pixels_tab[1][12] = ff_put_h264_qpel8_mc03_neon;
-+ c->put_h264_qpel_pixels_tab[1][13] = ff_put_h264_qpel8_mc13_neon;
-+ c->put_h264_qpel_pixels_tab[1][14] = ff_put_h264_qpel8_mc23_neon;
-+ c->put_h264_qpel_pixels_tab[1][15] = ff_put_h264_qpel8_mc33_neon;
-
- c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon;
- c->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon;
-diff --git a/libavcodec/armv4l/h264dsp_neon.S b/libavcodec/armv4l/h264dsp_neon.S
-index ac793b2..398e9c8 100644
---- a/libavcodec/armv4l/h264dsp_neon.S
-+++ b/libavcodec/armv4l/h264dsp_neon.S
-@@ -20,6 +20,39 @@
-
- .fpu neon
-
-+ .macro transpose_8x8 r0 r1 r2 r3 r4 r5 r6 r7
-+ vtrn.32 \r0, \r4
-+ vtrn.32 \r1, \r5
-+ vtrn.32 \r2, \r6
-+ vtrn.32 \r3, \r7
-+ vtrn.16 \r0, \r2
-+ vtrn.16 \r1, \r3
-+ vtrn.16 \r4, \r6
-+ vtrn.16 \r5, \r7
-+ vtrn.8 \r0, \r1
-+ vtrn.8 \r2, \r3
-+ vtrn.8 \r4, \r5
-+ vtrn.8 \r6, \r7
-+ .endm
-+
-+ .macro swap4 r0 r1 r2 r3 r4 r5 r6 r7
-+ vswp \r0, \r4
-+ vswp \r1, \r5
-+ vswp \r2, \r6
-+ vswp \r3, \r7
-+ .endm
-+
-+ .macro transpose16_4x4 r0 r1 r2 r3 r4 r5 r6 r7
-+ vtrn.32 \r0, \r2
-+ vtrn.32 \r1, \r3
-+ vtrn.32 \r4, \r6
-+ vtrn.32 \r5, \r7
-+ vtrn.16 \r0, \r1
-+ vtrn.16 \r2, \r3
-+ vtrn.16 \r4, \r5
-+ vtrn.16 \r6, \r7
-+ .endm
-+
- /* chroma_mc8(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
- .macro h264_chroma_mc8 avg=0
- push {r4-r7, lr}
-@@ -455,18 +488,7 @@ ff_h264_h_loop_filter_luma_neon:
- vld1.64 {d5}, [r0], r1
- vld1.64 {d27}, [r0], r1
-
-- vtrn.32 q3, q0
-- vtrn.32 q10, q1
-- vtrn.32 q9, q2
-- vtrn.32 q8, q13
-- vtrn.16 q3, q9
-- vtrn.16 q10, q8
-- vtrn.16 q0, q2
-- vtrn.16 q1, q13
-- vtrn.8 q3, q10
-- vtrn.8 q9, q8
-- vtrn.8 q0, q1
-- vtrn.8 q2, q13
-+ transpose_8x8 q3, q10, q9, q8, q0, q1, q2, q13
-
- align_push_regs
- sub sp, sp, #16
-@@ -479,18 +501,7 @@ ff_h264_h_loop_filter_luma_neon:
- vld1.64 {d20,d21}, [sp,:128]!
- vld1.64 {d4, d5}, [sp,:128]!
-
-- vtrn.32 q3, q0
-- vtrn.32 q10, q5
-- vtrn.32 q4, q2
-- vtrn.32 q8, q13
-- vtrn.16 q3, q4
-- vtrn.16 q10, q8
-- vtrn.16 q0, q2
-- vtrn.16 q5, q13
-- vtrn.8 q3, q10
-- vtrn.8 q4, q8
-- vtrn.8 q0, q5
-- vtrn.8 q2, q13
-+ transpose_8x8 q3, q10, q4, q8, q0, q5, q2, q13
-
- sub r0, r0, r1, lsl #4
- vst1.64 {d6}, [r0], r1
-@@ -606,3 +617,862 @@ ff_h264_h_loop_filter_chroma_neon:
-
- bx lr
- .endfunc
-+
-+ /* H.264 qpel MC */
-+
-+ .macro lowpass_8 r0, r1, r2, r3, d0, d1, narrow=1
-+ vext.8 d4, \r0, \r1, #1
-+ vext.8 d2, \r0, \r1, #2
-+ vext.8 d3, \r0, \r1, #3
-+ vext.8 d5, \r0, \r1, #4
-+ vext.8 d6, \r0, \r1, #5
-+
-+ vext.8 d20, \r2, \r3, #1
-+ vext.8 d18, \r2, \r3, #2
-+ vext.8 d19, \r2, \r3, #3
-+ vext.8 d21, \r2, \r3, #4
-+ vext.8 d7, \r2, \r3, #5
-+
-+ vaddl.u8 q1, d2, d3
-+ vaddl.u8 q2, d4, d5
-+ vaddl.u8 q0, \r0, d6
-+ vaddl.u8 q9, d18, d19
-+ vaddl.u8 q10, d20, d21
-+ vaddl.u8 q8, \r2, d7
-+
-+ vshl.i16 q3, q1, #4
-+ vshl.i16 q1, q1, #2
-+ vshl.i16 q15, q2, #2
-+ vadd.i16 q1, q1, q3
-+ vadd.i16 q2, q2, q15
-+
-+ vshl.i16 q3, q9, #4
-+ vshl.i16 q9, q9, #2
-+ vshl.i16 q15, q10, #2
-+ vadd.i16 q9, q9, q3
-+ vadd.i16 q10, q10, q15
-+
-+ vsub.i16 q1, q1, q2
-+ vsub.i16 q9, q9, q10
-+.if \narrow
-+ vadd.i16 q1, q1, q0
-+ vadd.i16 q9, q9, q8
-+ vqrshrun.s16 \d0, q1, #5
-+ vqrshrun.s16 \d1, q9, #5
-+.else
-+ vadd.i16 \d0, q1, q0
-+ vadd.i16 \d1, q9, q8
-+.endif
-+ .endm
-+
-+ .macro lowpass_8_1 r0, r1, d0, narrow=1
-+ vext.8 d4, \r0, \r1, #1
-+ vext.8 d2, \r0, \r1, #2
-+ vext.8 d3, \r0, \r1, #3
-+ vext.8 d5, \r0, \r1, #4
-+ vext.8 d6, \r0, \r1, #5
-+
-+ vaddl.u8 q1, d2, d3
-+ vaddl.u8 q2, d4, d5
-+ vaddl.u8 q0, \r0, d6
-+
-+ vshl.i16 q3, q1, #4
-+ vshl.i16 q1, q1, #2
-+ vshl.i16 q15, q2, #2
-+ vadd.i16 q1, q1, q3
-+ vadd.i16 q2, q2, q15
-+
-+ vadd.i16 q1, q1, q0
-+.if \narrow
-+ vsub.i16 q1, q1, q2
-+ vqrshrun.s16 \d0, q1, #5
-+.else
-+ vsub.i16 \d0, q1, q2
-+.endif
-+ .endm
-+
-+ .macro lowpass_8.16 r0, r1, l0, h0, l1, h1, d
-+ vext.16 q2, \r0, \r1, #1
-+ vext.16 q1, \r0, \r1, #2
-+ vext.16 q0, \r0, \r1, #3
-+ vext.16 q3, \r0, \r1, #4
-+ vext.16 \r1, \r0, \r1, #5
-+
-+ vaddl.s16 q9, d2, d0
-+ vaddl.s16 q1, d3, d1
-+ vaddl.s16 q10, d4, d6
-+ vaddl.s16 q2, d5, d7
-+ vaddl.s16 q0, \h0, \h1
-+ vaddl.s16 q8, \l0, \l1
-+
-+ vshl.i32 q3, q9, #4
-+ vshl.i32 q9, q9, #2
-+ vshl.i32 q15, q10, #2
-+ vadd.i32 q9, q9, q3
-+ vadd.i32 q10, q10, q15
-+
-+ vshl.i32 q3, q1, #4
-+ vshl.i32 q1, q1, #2
-+ vshl.i32 q15, q2, #2
-+ vadd.i32 q1, q1, q3
-+ vadd.i32 q2, q2, q15
-+
-+ vadd.i32 q9, q9, q8
-+ vsub.i32 q9, q9, q10
-+
-+ vadd.i32 q1, q1, q0
-+ vsub.i32 q1, q1, q2
-+
-+ vrshrn.s32 d18, q9, #10
-+ vrshrn.s32 d19, q1, #10
-+
-+ vqmovun.s16 \d, q9
-+ .endm
-+
-+ .func put_h264_qpel16_h_lowpass_neon_packed
-+put_h264_qpel16_h_lowpass_neon_packed:
-+ mov r4, lr
-+ mov ip, #16
-+ mov r3, #8
-+ bl put_h264_qpel8_h_lowpass_neon
-+ sub r1, r1, r2, lsl #4
-+ add r1, r1, #8
-+ mov ip, #16
-+ mov lr, r4
-+ b put_h264_qpel8_h_lowpass_neon
-+ .endfunc
-+
-+ .func put_h264_qpel16_h_lowpass_neon
-+put_h264_qpel16_h_lowpass_neon:
-+ push {lr}
-+ mov ip, #16
-+ dmb
-+ bl put_h264_qpel8_h_lowpass_neon
-+ sub r0, r0, r3, lsl #4
-+ sub r1, r1, r2, lsl #4
-+ add r0, r0, #8
-+ add r1, r1, #8
-+ mov ip, #16
-+ pop {lr}
-+ .endfunc
-+
-+ .func put_h264_qpel8_h_lowpass_neon
-+put_h264_qpel8_h_lowpass_neon:
-+1: vld1.64 {d0, d1}, [r1], r2
-+ vld1.64 {d16,d17}, [r1], r2
-+ subs ip, ip, #2
-+ lowpass_8 d0, d1, d16, d17, d0, d16
-+ vst1.64 {d0}, [r0,:64], r3
-+ vst1.64 {d16}, [r0,:64], r3
-+ bne 1b
-+ bx lr
-+ .endfunc
-+
-+ .func put_h264_qpel16_h_lowpass_l2_neon
-+put_h264_qpel16_h_lowpass_l2_neon:
-+ push {lr}
-+ mov ip, #16
-+ dmb
-+ bl put_h264_qpel8_h_lowpass_l2_neon
-+ sub r0, r0, r2, lsl #4
-+ sub r1, r1, r2, lsl #4
-+ sub r3, r3, r2, lsl #4
-+ add r0, r0, #8
-+ add r1, r1, #8
-+ add r3, r3, #8
-+ mov ip, #16
-+ pop {lr}
-+ .endfunc
-+
-+ .func put_h264_qpel8_h_lowpass_l2_neon
-+put_h264_qpel8_h_lowpass_l2_neon:
-+1: vld1.64 {d0, d1}, [r1], r2
-+ vld1.64 {d16,d17}, [r1], r2
-+ vld1.64 {d28}, [r3], r2
-+ vld1.64 {d29}, [r3], r2
-+ subs ip, ip, #2
-+ lowpass_8 d0, d1, d16, d17, d0, d1
-+ vrhadd.u8 q0, q0, q14
-+ vst1.64 {d0}, [r0,:64], r2
-+ vst1.64 {d1}, [r0,:64], r2
-+ bne 1b
-+ bx lr
-+ .endfunc
-+
-+ .func put_h264_qpel16_v_lowpass_neon_packed
-+put_h264_qpel16_v_lowpass_neon_packed:
-+ mov r4, lr
-+ mov r2, #8
-+ bl put_h264_qpel8_v_lowpass_neon
-+ sub r1, r1, r3, lsl #2
-+ bl put_h264_qpel8_v_lowpass_neon
-+ sub r1, r1, r3, lsl #4
-+ sub r1, r1, r3, lsl #2
-+ add r1, r1, #8
-+ bl put_h264_qpel8_v_lowpass_neon
-+ sub r1, r1, r3, lsl #2
-+ mov lr, r4
-+ b put_h264_qpel8_v_lowpass_neon
-+ .endfunc
-+
-+ .func put_h264_qpel16_v_lowpass_neon
-+put_h264_qpel16_v_lowpass_neon:
-+ mov r4, lr
-+ bl put_h264_qpel8_v_lowpass_neon
-+ sub r1, r1, r3, lsl #2
-+ bl put_h264_qpel8_v_lowpass_neon
-+ sub r0, r0, r2, lsl #4
-+ add r0, r0, #8
-+ sub r1, r1, r3, lsl #4
-+ sub r1, r1, r3, lsl #2
-+ add r1, r1, #8
-+ bl put_h264_qpel8_v_lowpass_neon
-+ sub r1, r1, r3, lsl #2
-+ mov lr, r4
-+ .endfunc
-+
-+ .func put_h264_qpel8_v_lowpass_neon
-+put_h264_qpel8_v_lowpass_neon:
-+ vld1.64 {d8}, [r1], r3
-+ vld1.64 {d10}, [r1], r3
-+ vld1.64 {d12}, [r1], r3
-+ vld1.64 {d14}, [r1], r3
-+ vld1.64 {d22}, [r1], r3
-+ vld1.64 {d24}, [r1], r3
-+ vld1.64 {d26}, [r1], r3
-+ vld1.64 {d28}, [r1], r3
-+ vld1.64 {d9}, [r1], r3
-+ vld1.64 {d11}, [r1], r3
-+ vld1.64 {d13}, [r1], r3
-+ vld1.64 {d15}, [r1], r3
-+ vld1.64 {d23}, [r1]
-+
-+ transpose_8x8 q4, q5, q6, q7, q11, q12, q13, q14
-+ lowpass_8 d8, d9, d10, d11, d8, d10
-+ lowpass_8 d12, d13, d14, d15, d12, d14
-+ lowpass_8 d22, d23, d24, d25, d22, d24
-+ lowpass_8 d26, d27, d28, d29, d26, d28
-+ transpose_8x8 d8, d10, d12, d14, d22, d24, d26, d28
-+
-+ vst1.64 {d8}, [r0,:64], r2
-+ vst1.64 {d10}, [r0,:64], r2
-+ vst1.64 {d12}, [r0,:64], r2
-+ vst1.64 {d14}, [r0,:64], r2
-+ vst1.64 {d22}, [r0,:64], r2
-+ vst1.64 {d24}, [r0,:64], r2
-+ vst1.64 {d26}, [r0,:64], r2
-+ vst1.64 {d28}, [r0,:64], r2
-+
-+ bx lr
-+ .endfunc
-+
-+ .func put_h264_qpel16_v_lowpass_l2_neon
-+put_h264_qpel16_v_lowpass_l2_neon:
-+ mov r4, lr
-+ bl put_h264_qpel8_v_lowpass_l2_neon
-+ sub r1, r1, r3, lsl #2
-+ bl put_h264_qpel8_v_lowpass_l2_neon
-+ sub r0, r0, r3, lsl #4
-+ sub ip, ip, r2, lsl #4
-+ add r0, r0, #8
-+ add ip, ip, #8
-+ sub r1, r1, r3, lsl #4
-+ sub r1, r1, r3, lsl #2
-+ add r1, r1, #8
-+ bl put_h264_qpel8_v_lowpass_l2_neon
-+ sub r1, r1, r3, lsl #2
-+ mov lr, r4
-+ .endfunc
-+
-+ .func put_h264_qpel8_v_lowpass_l2_neon
-+put_h264_qpel8_v_lowpass_l2_neon:
-+ vld1.64 {d8}, [r1], r3
-+ vld1.64 {d10}, [r1], r3
-+ vld1.64 {d12}, [r1], r3
-+ vld1.64 {d14}, [r1], r3
-+ vld1.64 {d22}, [r1], r3
-+ vld1.64 {d24}, [r1], r3
-+ vld1.64 {d26}, [r1], r3
-+ vld1.64 {d28}, [r1], r3
-+ vld1.64 {d9}, [r1], r3
-+ vld1.64 {d11}, [r1], r3
-+ vld1.64 {d13}, [r1], r3
-+ vld1.64 {d15}, [r1], r3
-+ vld1.64 {d23}, [r1]
-+
-+ transpose_8x8 q4, q5, q6, q7, q11, q12, q13, q14
-+ lowpass_8 d8, d9, d10, d11, d8, d9
-+ lowpass_8 d12, d13, d14, d15, d12, d13
-+ lowpass_8 d22, d23, d24, d25, d22, d23
-+ lowpass_8 d26, d27, d28, d29, d26, d27
-+ transpose_8x8 d8, d9, d12, d13, d22, d23, d26, d27
-+
-+ vld1.64 {d0}, [ip], r2
-+ vld1.64 {d1}, [ip], r2
-+ vld1.64 {d2}, [ip], r2
-+ vld1.64 {d3}, [ip], r2
-+ vld1.64 {d4}, [ip], r2
-+ vrhadd.u8 q0, q0, q4
-+ vld1.64 {d5}, [ip], r2
-+ vrhadd.u8 q1, q1, q6
-+ vld1.64 {d6}, [ip], r2
-+ vrhadd.u8 q2, q2, q11
-+ vld1.64 {d7}, [ip], r2
-+
-+ vst1.64 {d0}, [r0,:64], r3
-+ vst1.64 {d1}, [r0,:64], r3
-+ vrhadd.u8 q3, q3, q13
-+ vst1.64 {d2}, [r0,:64], r3
-+ vst1.64 {d3}, [r0,:64], r3
-+ vst1.64 {d4}, [r0,:64], r3
-+ vst1.64 {d5}, [r0,:64], r3
-+ vst1.64 {d6}, [r0,:64], r3
-+ vst1.64 {d7}, [r0,:64], r3
-+
-+ bx lr
-+ .endfunc
-+
-+ .func put_h264_qpel8_hv_lowpass_neon_top
-+put_h264_qpel8_hv_lowpass_neon_top:
-+ mov ip, #12
-+1: vld1.64 {d0, d1}, [r1], r3
-+ vld1.64 {d16,d17}, [r1], r3
-+ subs ip, ip, #2
-+ lowpass_8 d0, d1, d16, d17, q0, q1, narrow=0
-+ vst1.64 {d0-d3}, [r4,:128]!
-+ bne 1b
-+
-+ vld1.64 {d0, d1}, [r1]
-+ lowpass_8_1 d0, d1, q12, narrow=0
-+
-+ mov ip, #-16
-+ add r4, r4, ip
-+ vld1.64 {d30,d31}, [r4,:128], ip
-+ vld1.64 {d20,d21}, [r4,:128], ip
-+ vld1.64 {d18,d19}, [r4,:128], ip
-+ vld1.64 {d16,d17}, [r4,:128], ip
-+ vld1.64 {d14,d15}, [r4,:128], ip
-+ vld1.64 {d12,d13}, [r4,:128], ip
-+ vld1.64 {d10,d11}, [r4,:128], ip
-+ vld1.64 {d8, d9}, [r4,:128], ip
-+ vld1.64 {d6, d7}, [r4,:128], ip
-+ vld1.64 {d4, d5}, [r4,:128], ip
-+ vld1.64 {d2, d3}, [r4,:128], ip
-+ vld1.64 {d0, d1}, [r4,:128]
-+
-+ swap4 d1, d3, d5, d7, d8, d10, d12, d14
-+ transpose16_4x4 q0, q1, q2, q3, q4, q5, q6, q7
-+
-+ swap4 d17, d19, d21, d31, d24, d26, d28, d22
-+ transpose16_4x4 q8, q9, q10, q15, q12, q13, q14, q11
-+
-+ vst1.64 {d30,d31}, [r4,:128]!
-+ vst1.64 {d6, d7}, [r4,:128]!
-+ vst1.64 {d20,d21}, [r4,:128]!
-+ vst1.64 {d4, d5}, [r4,:128]!
-+ vst1.64 {d18,d19}, [r4,:128]!
-+ vst1.64 {d2, d3}, [r4,:128]!
-+ vst1.64 {d16,d17}, [r4,:128]!
-+ vst1.64 {d0, d1}, [r4,:128]
-+
-+ lowpass_8.16 q4, q12, d8, d9, d24, d25, d8
-+ lowpass_8.16 q5, q13, d10, d11, d26, d27, d9
-+ lowpass_8.16 q6, q14, d12, d13, d28, d29, d10
-+ lowpass_8.16 q7, q11, d14, d15, d22, d23, d11
-+
-+ vld1.64 {d16,d17}, [r4,:128], ip
-+ vld1.64 {d30,d31}, [r4,:128], ip
-+ lowpass_8.16 q8, q15, d16, d17, d30, d31, d12
-+ vld1.64 {d16,d17}, [r4,:128], ip
-+ vld1.64 {d30,d31}, [r4,:128], ip
-+ lowpass_8.16 q8, q15, d16, d17, d30, d31, d13
-+ vld1.64 {d16,d17}, [r4,:128], ip
-+ vld1.64 {d30,d31}, [r4,:128], ip
-+ lowpass_8.16 q8, q15, d16, d17, d30, d31, d14
-+ vld1.64 {d16,d17}, [r4,:128], ip
-+ vld1.64 {d30,d31}, [r4,:128]
-+ lowpass_8.16 q8, q15, d16, d17, d30, d31, d15
-+
-+ transpose_8x8 d12, d13, d14, d15, d8, d9, d10, d11
-+
-+ bx lr
-+ .endfunc
-+
-+ .func put_h264_qpel8_hv_lowpass_neon
-+put_h264_qpel8_hv_lowpass_neon:
-+ mov r10, lr
-+ bl put_h264_qpel8_hv_lowpass_neon_top
-+ vst1.64 {d12}, [r0,:64], r2
-+ vst1.64 {d13}, [r0,:64], r2
-+ vst1.64 {d14}, [r0,:64], r2
-+ vst1.64 {d15}, [r0,:64], r2
-+ vst1.64 {d8}, [r0,:64], r2
-+ vst1.64 {d9}, [r0,:64], r2
-+ vst1.64 {d10}, [r0,:64], r2
-+ vst1.64 {d11}, [r0,:64], r2
-+
-+ mov lr, r10
-+ bx lr
-+ .endfunc
-+
-+ .func put_h264_qpel8_hv_lowpass_l2_neon
-+put_h264_qpel8_hv_lowpass_l2_neon:
-+ mov r10, lr
-+ bl put_h264_qpel8_hv_lowpass_neon_top
-+
-+ vld1.64 {d0, d1}, [r2,:128]!
-+ vld1.64 {d2, d3}, [r2,:128]!
-+ vrhadd.u8 q0, q0, q6
-+ vld1.64 {d4, d5}, [r2,:128]!
-+ vrhadd.u8 q1, q1, q7
-+ vld1.64 {d6, d7}, [r2,:128]!
-+ vrhadd.u8 q2, q2, q4
-+
-+ vst1.64 {d0}, [r0,:64], r3
-+ vrhadd.u8 q3, q3, q5
-+ vst1.64 {d1}, [r0,:64], r3
-+ vst1.64 {d2}, [r0,:64], r3
-+ vst1.64 {d3}, [r0,:64], r3
-+ vst1.64 {d4}, [r0,:64], r3
-+ vst1.64 {d5}, [r0,:64], r3
-+ vst1.64 {d6}, [r0,:64], r3
-+ vst1.64 {d7}, [r0,:64], r3
-+
-+ mov lr, r10
-+ bx lr
-+ .endfunc
-+
-+ .func put_h264_qpel16_hv_lowpass_neon
-+put_h264_qpel16_hv_lowpass_neon:
-+ mov r9, lr
-+ bl put_h264_qpel8_hv_lowpass_neon
-+ sub r1, r1, r3, lsl #2
-+ bl put_h264_qpel8_hv_lowpass_neon
-+ sub r1, r1, r3, lsl #4
-+ sub r1, r1, r3, lsl #2
-+ add r1, r1, #8
-+ sub r0, r0, r2, lsl #4
-+ add r0, r0, #8
-+ bl put_h264_qpel8_hv_lowpass_neon
-+ sub r1, r1, r3, lsl #2
-+ mov lr, r9
-+ b put_h264_qpel8_hv_lowpass_neon
-+ .endfunc
-+
-+ .func put_h264_qpel16_hv_lowpass_l2_neon
-+put_h264_qpel16_hv_lowpass_l2_neon:
-+ mov r9, lr
-+ sub r2, r4, #256
-+ bl put_h264_qpel8_hv_lowpass_l2_neon
-+ sub r1, r1, r3, lsl #2
-+ bl put_h264_qpel8_hv_lowpass_l2_neon
-+ sub r1, r1, r3, lsl #4
-+ sub r1, r1, r3, lsl #2
-+ add r1, r1, #8
-+ sub r0, r0, r3, lsl #4
-+ add r0, r0, #8
-+ bl put_h264_qpel8_hv_lowpass_l2_neon
-+ sub r1, r1, r3, lsl #2
-+ mov lr, r9
-+ b put_h264_qpel8_hv_lowpass_l2_neon
-+ .endfunc
-+
-+ .global ff_put_h264_qpel8_mc10_neon
-+ .func ff_put_h264_qpel8_mc10_neon
-+ff_put_h264_qpel8_mc10_neon:
-+ mov r3, r1
-+ sub r1, r1, #2
-+ mov ip, #8
-+ dmb
-+ b put_h264_qpel8_h_lowpass_l2_neon
-+ .endfunc
-+
-+ .global ff_put_h264_qpel8_mc20_neon
-+ .func ff_put_h264_qpel8_mc20_neon
-+ff_put_h264_qpel8_mc20_neon:
-+ sub r1, r1, #2
-+ mov r3, r2
-+ mov ip, #8
-+ dmb
-+ b put_h264_qpel8_h_lowpass_neon
-+ .endfunc
-+
-+ .global ff_put_h264_qpel8_mc30_neon
-+ .func ff_put_h264_qpel8_mc30_neon
-+ff_put_h264_qpel8_mc30_neon:
-+ add r3, r1, #1
-+ sub r1, r1, #2
-+ mov ip, #8
-+ dmb
-+ b put_h264_qpel8_h_lowpass_l2_neon
-+ .endfunc
-+
-+ .global ff_put_h264_qpel8_mc01_neon
-+ .func ff_put_h264_qpel8_mc01_neon
-+ff_put_h264_qpel8_mc01_neon:
-+ push {lr}
-+ mov ip, r1
-+put_h264_qpel8_mc01:
-+ mov r3, r2
-+ sub r1, r1, r2, lsl #1
-+ dmb
-+ vpush {d8-d15}
-+ bl put_h264_qpel8_v_lowpass_l2_neon
-+ vpop {d8-d15}
-+ pop {pc}
-+ .endfunc
-+
-+ .global ff_put_h264_qpel8_mc11_neon
-+ .func ff_put_h264_qpel8_mc11_neon
-+ff_put_h264_qpel8_mc11_neon:
-+ push {r0, r1, r2, lr}
-+put_h264_qpel8_mc11:
-+ sub sp, sp, #64
-+ mov r0, sp
-+ sub r1, r1, #2
-+ mov r3, #8
-+ mov ip, #8
-+ dmb
-+ vpush {d8-d15}
-+ bl put_h264_qpel8_h_lowpass_neon
-+ ldrd r0, [sp, #128]
-+ mov r3, r2
-+ add ip, sp, #64
-+ sub r1, r1, r2, lsl #1
-+ mov r2, #8
-+ bl put_h264_qpel8_v_lowpass_l2_neon
-+ vpop {d8-d15}
-+ add sp, sp, #76
-+ pop {pc}
-+ .endfunc
-+
-+ .global ff_put_h264_qpel8_mc21_neon
-+ .func ff_put_h264_qpel8_mc21_neon
-+ff_put_h264_qpel8_mc21_neon:
-+ push {r0, r1, r4, r10, r11, lr}
-+put_h264_qpel8_mc21:
-+ mov r11, sp
-+ bic sp, sp, #15
-+ sub sp, sp, #(8*8+16*12)
-+ sub r1, r1, #2
-+ mov r3, #8
-+ mov r0, sp
-+ mov ip, #8
-+ dmb
-+ vpush {d8-d15}
-+ bl put_h264_qpel8_h_lowpass_neon
-+ mov r4, r0
-+ ldrd r0, [r11]
-+ sub r1, r1, r2, lsl #1
-+ sub r1, r1, #2
-+ mov r3, r2
-+ sub r2, r4, #64
-+ bl put_h264_qpel8_hv_lowpass_l2_neon
-+ vpop {d8-d15}
-+ add sp, r11, #8
-+ pop {r4, r10, r11, pc}
-+ .endfunc
-+
-+ .global ff_put_h264_qpel8_mc31_neon
-+ .func ff_put_h264_qpel8_mc31_neon
-+ff_put_h264_qpel8_mc31_neon:
-+ add r1, r1, #1
-+ push {r0, r1, r2, lr}
-+ sub r1, r1, #1
-+ b put_h264_qpel8_mc11
-+ .endfunc
-+
-+ .global ff_put_h264_qpel8_mc02_neon
-+ .func ff_put_h264_qpel8_mc02_neon
-+ff_put_h264_qpel8_mc02_neon:
-+ push {lr}
-+ sub r1, r1, r2, lsl #1
-+ mov r3, r2
-+ dmb
-+ vpush {d8-d15}
-+ bl put_h264_qpel8_v_lowpass_neon
-+ vpop {d8-d15}
-+ pop {pc}
-+ .endfunc
-+
-+ .global ff_put_h264_qpel8_mc12_neon
-+ .func ff_put_h264_qpel8_mc12_neon
-+ff_put_h264_qpel8_mc12_neon:
-+ push {r0, r1, r4, r10, r11, lr}
-+put_h264_qpel8_mc12:
-+ mov r11, sp
-+ bic sp, sp, #15
-+ sub sp, sp, #(8*8+16*12)
-+ sub r1, r1, r2, lsl #1
-+ mov r3, r2
-+ mov r2, #8
-+ mov r0, sp
-+ dmb
-+ vpush {d8-d15}
-+ bl put_h264_qpel8_v_lowpass_neon
-+ mov r4, r0
-+ ldrd r0, [r11]
-+ sub r1, r1, r3, lsl #1
-+ sub r1, r1, #2
-+ sub r2, r4, #64
-+ bl put_h264_qpel8_hv_lowpass_l2_neon
-+ vpop {d8-d15}
-+ add sp, r11, #8
-+ pop {r4, r10, r11, pc}
-+ .endfunc
-+
-+ .global ff_put_h264_qpel8_mc22_neon
-+ .func ff_put_h264_qpel8_mc22_neon
-+ff_put_h264_qpel8_mc22_neon:
-+ push {r4, r10, r11, lr}
-+ mov r11, sp
-+ bic sp, sp, #15
-+ sub r1, r1, r2, lsl #1
-+ sub r1, r1, #2
-+ mov r3, r2
-+ sub sp, sp, #(16*12)
-+ mov r4, sp
-+ dmb
-+ vpush {d8-d15}
-+ bl put_h264_qpel8_hv_lowpass_neon
-+ vpop {d8-d15}
-+ mov sp, r11
-+ pop {r4, r10, r11, pc}
-+ .endfunc
-+
-+ .global ff_put_h264_qpel8_mc32_neon
-+ .func ff_put_h264_qpel8_mc32_neon
-+ff_put_h264_qpel8_mc32_neon:
-+ push {r0, r1, r4, r10, r11, lr}
-+ add r1, r1, #1
-+ b put_h264_qpel8_mc12
-+ .endfunc
-+
-+ .global ff_put_h264_qpel8_mc03_neon
-+ .func ff_put_h264_qpel8_mc03_neon
-+ff_put_h264_qpel8_mc03_neon:
-+ push {lr}
-+ add ip, r1, r2
-+ b put_h264_qpel8_mc01
-+ .endfunc
-+
-+ .global ff_put_h264_qpel8_mc13_neon
-+ .func ff_put_h264_qpel8_mc13_neon
-+ff_put_h264_qpel8_mc13_neon:
-+ push {r0, r1, r2, lr}
-+ add r1, r1, r2
-+ b put_h264_qpel8_mc11
-+ .endfunc
-+
-+ .global ff_put_h264_qpel8_mc23_neon
-+ .func ff_put_h264_qpel8_mc23_neon
-+ff_put_h264_qpel8_mc23_neon:
-+ push {r0, r1, r4, r10, r11, lr}
-+ add r1, r1, r2
-+ b put_h264_qpel8_mc21
-+ .endfunc
-+
-+ .global ff_put_h264_qpel8_mc33_neon
-+ .func ff_put_h264_qpel8_mc33_neon
-+ff_put_h264_qpel8_mc33_neon:
-+ add r1, r1, #1
-+ push {r0, r1, r2, lr}
-+ add r1, r1, r2
-+ sub r1, r1, #1
-+ b put_h264_qpel8_mc11
-+ .endfunc
-+
-+ .global ff_put_h264_qpel16_mc10_neon
-+ .func ff_put_h264_qpel16_mc10_neon
-+ff_put_h264_qpel16_mc10_neon:
-+ mov r3, r1
-+ sub r1, r1, #2
-+ b put_h264_qpel16_h_lowpass_l2_neon
-+ .endfunc
-+
-+ .global ff_put_h264_qpel16_mc20_neon
-+ .func ff_put_h264_qpel16_mc20_neon
-+ff_put_h264_qpel16_mc20_neon:
-+ sub r1, r1, #2
-+ mov r3, r2
-+ b put_h264_qpel16_h_lowpass_neon
-+ .endfunc
-+
-+ .global ff_put_h264_qpel16_mc30_neon
-+ .func ff_put_h264_qpel16_mc30_neon
-+ff_put_h264_qpel16_mc30_neon:
-+ add r3, r1, #1
-+ sub r1, r1, #2
-+ b put_h264_qpel16_h_lowpass_l2_neon
-+ .endfunc
-+
-+ .global ff_put_h264_qpel16_mc01_neon
-+ .func ff_put_h264_qpel16_mc01_neon
-+ff_put_h264_qpel16_mc01_neon:
-+ push {r4, lr}
-+ mov ip, r1
-+put_h264_qpel16_mc01:
-+ mov r3, r2
-+ sub r1, r1, r2, lsl #1
-+ dmb
-+ vpush {d8-d15}
-+ bl put_h264_qpel16_v_lowpass_l2_neon
-+ vpop {d8-d15}
-+ pop {r4, pc}
-+ .endfunc
-+
-+ .global ff_put_h264_qpel16_mc11_neon
-+ .func ff_put_h264_qpel16_mc11_neon
-+ff_put_h264_qpel16_mc11_neon:
-+ push {r0, r1, r4, lr}
-+put_h264_qpel16_mc11:
-+ sub sp, sp, #256
-+ mov r0, sp
-+ sub r1, r1, #2
-+ mov r3, #16
-+ dmb
-+ vpush {d8-d15}
-+ bl put_h264_qpel16_h_lowpass_neon
-+ add r0, sp, #256
-+ ldrd r0, [r0, #64]
-+ mov r3, r2
-+ add ip, sp, #64
-+ sub r1, r1, r2, lsl #1
-+ mov r2, #16
-+ bl put_h264_qpel16_v_lowpass_l2_neon
-+ vpop {d8-d15}
-+ add sp, sp, #(256+8)
-+ pop {r4, pc}
-+ .endfunc
-+
-+ .global ff_put_h264_qpel16_mc21_neon
-+ .func ff_put_h264_qpel16_mc21_neon
-+ff_put_h264_qpel16_mc21_neon:
-+ push {r0, r1, r4-r5, r9-r11, lr}
-+put_h264_qpel16_mc21:
-+ mov r11, sp
-+ bic sp, sp, #15
-+ sub sp, sp, #(16*16+16*12)
-+ sub r1, r1, #2
-+ mov r0, sp
-+ dmb
-+ vpush {d8-d15}
-+ bl put_h264_qpel16_h_lowpass_neon_packed
-+ mov r4, r0
-+ ldrd r0, [r11]
-+ sub r1, r1, r2, lsl #1
-+ sub r1, r1, #2
-+ mov r3, r2
-+ bl put_h264_qpel16_hv_lowpass_l2_neon
-+ vpop {d8-d15}
-+ add sp, r11, #8
-+ pop {r4-r5, r9-r11, pc}
-+ .endfunc
-+
-+ .global ff_put_h264_qpel16_mc31_neon
-+ .func ff_put_h264_qpel16_mc31_neon
-+ff_put_h264_qpel16_mc31_neon:
-+ add r1, r1, #1
-+ push {r0, r1, r4, lr}
-+ sub r1, r1, #1
-+ b put_h264_qpel16_mc11
-+ .endfunc
-+
-+ .global ff_put_h264_qpel16_mc02_neon
-+ .func ff_put_h264_qpel16_mc02_neon
-+ff_put_h264_qpel16_mc02_neon:
-+ push {r4, lr}
-+ sub r1, r1, r2, lsl #1
-+ mov r3, r2
-+ dmb
-+ vpush {d8-d15}
-+ bl put_h264_qpel16_v_lowpass_neon
-+ vpop {d8-d15}
-+ pop {r4, pc}
-+ .endfunc
-+
-+ .global ff_put_h264_qpel16_mc12_neon
-+ .func ff_put_h264_qpel16_mc12_neon
-+ff_put_h264_qpel16_mc12_neon:
-+ push {r0, r1, r4-r5, r9-r11, lr}
-+put_h264_qpel16_mc12:
-+ mov r11, sp
-+ bic sp, sp, #15
-+ sub sp, sp, #(16*16+16*12)
-+ sub r1, r1, r2, lsl #1
-+ mov r0, sp
-+ mov r3, r2
-+ dmb
-+ vpush {d8-d15}
-+ bl put_h264_qpel16_v_lowpass_neon_packed
-+ mov r4, r0
-+ ldrd r0, [r11]
-+ sub r1, r1, r3, lsl #1
-+ sub r1, r1, #2
-+ mov r2, r3
-+ bl put_h264_qpel16_hv_lowpass_l2_neon
-+ vpop {d8-d15}
-+ add sp, r11, #8
-+ pop {r4-r5, r9-r11, pc}
-+ .endfunc
-+
-+ .global ff_put_h264_qpel16_mc22_neon
-+ .func ff_put_h264_qpel16_mc22_neon
-+ff_put_h264_qpel16_mc22_neon:
-+ push {r4, r9-r11, lr}
-+ mov r11, sp
-+ bic sp, sp, #15
-+ sub r1, r1, r2, lsl #1
-+ sub r1, r1, #2
-+ mov r3, r2
-+ sub sp, sp, #(16*12)
-+ mov r4, sp
-+ dmb
-+ vpush {d8-d15}
-+ bl put_h264_qpel16_hv_lowpass_neon
-+ vpop {d8-d15}
-+ mov sp, r11
-+ pop {r4, r9-r11, pc}
-+ .endfunc
-+
-+ .global ff_put_h264_qpel16_mc32_neon
-+ .func ff_put_h264_qpel16_mc32_neon
-+ff_put_h264_qpel16_mc32_neon:
-+ push {r0, r1, r4-r5, r9-r11, lr}
-+ add r1, r1, #1
-+ b put_h264_qpel16_mc12
-+ .endfunc
-+
-+ .global ff_put_h264_qpel16_mc03_neon
-+ .func ff_put_h264_qpel16_mc03_neon
-+ff_put_h264_qpel16_mc03_neon:
-+ push {r4, lr}
-+ add ip, r1, r2
-+ b put_h264_qpel16_mc01
-+ .endfunc
-+
-+ .global ff_put_h264_qpel16_mc13_neon
-+ .func ff_put_h264_qpel16_mc13_neon
-+ff_put_h264_qpel16_mc13_neon:
-+ push {r0, r1, r4, lr}
-+ add r1, r1, r2
-+ b put_h264_qpel16_mc11
-+ .endfunc
-+
-+ .global ff_put_h264_qpel16_mc23_neon
-+ .func ff_put_h264_qpel16_mc23_neon
-+ff_put_h264_qpel16_mc23_neon:
-+ push {r0, r1, r4-r5, r9-r11, lr}
-+ add r1, r1, r2
-+ b put_h264_qpel16_mc21
-+ .endfunc
-+
-+ .global ff_put_h264_qpel16_mc33_neon
-+ .func ff_put_h264_qpel16_mc33_neon
-+ff_put_h264_qpel16_mc33_neon:
-+ add r1, r1, #1
-+ push {r0, r1, r4, lr}
-+ add r1, r1, r2
-+ sub r1, r1, #1
-+ b put_h264_qpel16_mc11
-+ .endfunc
diff --git a/packages/mplayer/files/mru-neon-h264idct-dc.diff b/packages/mplayer/files/mru-neon-h264idct-dc.diff
deleted file mode 100644
index 9f316b1b5b..0000000000
--- a/packages/mplayer/files/mru-neon-h264idct-dc.diff
+++ /dev/null
@@ -1,55 +0,0 @@
-From: Mans Rullgard <mans@mansr.com>
-Date: Mon, 25 Aug 2008 00:05:54 +0000 (+0100)
-Subject: ARM: NEON optimised h264_idct_dc_add
-X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=1097c36b47b5019b2a8668f82796ffe76f482408
-
-ARM: NEON optimised h264_idct_dc_add
----
-
-diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c
-index 74f9b4d..6dbe835 100644
---- a/libavcodec/armv4l/dsputil_neon.c
-+++ b/libavcodec/armv4l/dsputil_neon.c
-@@ -89,6 +89,7 @@ void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
- int beta, int8_t *tc0);
-
- void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride);
-+void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride);
-
- void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
- {
-@@ -156,4 +157,5 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
- c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;
-
- c->h264_idct_add = ff_h264_idct_add_neon;
-+ c->h264_idct_dc_add = ff_h264_idct_dc_add_neon;
- }
-diff --git a/libavcodec/armv4l/h264idct_neon.S b/libavcodec/armv4l/h264idct_neon.S
-index 8f456f3..34e217f 100644
---- a/libavcodec/armv4l/h264idct_neon.S
-+++ b/libavcodec/armv4l/h264idct_neon.S
-@@ -75,3 +75,24 @@ ff_h264_idct_add_neon:
-
- bx lr
- .endfunc
-+
-+ .global ff_h264_idct_dc_add_neon
-+ .func ff_h264_idct_dc_add_neon
-+ff_h264_idct_dc_add_neon:
-+ vld1.16 {d2[],d3[]}, [r1,:16]
-+ vrshr.s16 q1, q1, #6
-+ vld1.32 {d0[0]}, [r0,:32], r2
-+ vld1.32 {d0[1]}, [r0,:32], r2
-+ vaddw.u8 q2, q1, d0
-+ vld1.32 {d1[0]}, [r0,:32], r2
-+ vld1.32 {d1[1]}, [r0,:32], r2
-+ vaddw.u8 q1, q1, d1
-+ vqmovun.s16 d0, q2
-+ vqmovun.s16 d1, q1
-+ sub r0, r0, r2, lsl #2
-+ vst1.32 {d0[0]}, [r0,:32], r2
-+ vst1.32 {d0[1]}, [r0,:32], r2
-+ vst1.32 {d1[0]}, [r0,:32], r2
-+ vst1.32 {d1[1]}, [r0,:32], r2
-+ bx lr
-+ .endfunc
diff --git a/packages/mplayer/files/mru-neon-h264idctadd.diff b/packages/mplayer/files/mru-neon-h264idctadd.diff
deleted file mode 100644
index 0f0931fbff..0000000000
--- a/packages/mplayer/files/mru-neon-h264idctadd.diff
+++ /dev/null
@@ -1,123 +0,0 @@
-From: Mans Rullgard <mans@mansr.com>
-Date: Sun, 24 Aug 2008 21:27:49 +0000 (+0100)
-Subject: ARM: NEON optimised h264_idct_add
-X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=ebfab90234268bb35600a06e9982ca1358ea43f3
-
-ARM: NEON optimised h264_idct_add
----
-
-diff --git a/libavcodec/Makefile b/libavcodec/Makefile
-index 36ba158..053e752 100644
---- a/libavcodec/Makefile
-+++ b/libavcodec/Makefile
-@@ -438,6 +438,7 @@ OBJS-$(HAVE_NEON) += armv4l/dsputil_neon.o \
- ASM_OBJS-$(HAVE_NEON) += armv4l/dsputil_neon_s.o \
- armv4l/simple_idct_neon.o \
- armv4l/h264dsp_neon.o \
-+ armv4l/h264idct_neon.o \
-
- OBJS-$(HAVE_VIS) += sparc/dsputil_vis.o \
- sparc/simple_idct_vis.o \
-diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c
-index a932aa9..74f9b4d 100644
---- a/libavcodec/armv4l/dsputil_neon.c
-+++ b/libavcodec/armv4l/dsputil_neon.c
-@@ -88,6 +88,8 @@ void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
- void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
- int beta, int8_t *tc0);
-
-+void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride);
-+
- void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
- {
- c->put_pixels_tab[0][0] = ff_put_pixels16_neon;
-@@ -152,4 +154,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
- c->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon;
- c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
- c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;
-+
-+ c->h264_idct_add = ff_h264_idct_add_neon;
- }
-diff --git a/libavcodec/armv4l/h264idct_neon.S b/libavcodec/armv4l/h264idct_neon.S
-new file mode 100644
-index 0000000..8f456f3
---- /dev/null
-+++ b/libavcodec/armv4l/h264idct_neon.S
-@@ -0,0 +1,77 @@
-+/*
-+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
-+ *
-+ * This file is part of FFmpeg.
-+ *
-+ * FFmpeg is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU Lesser General Public
-+ * License as published by the Free Software Foundation; either
-+ * version 2.1 of the License, or (at your option) any later version.
-+ *
-+ * FFmpeg is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ * Lesser General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU Lesser General Public
-+ * License along with FFmpeg; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
-+
-+ .fpu neon
-+
-+ .text
-+
-+ .global ff_h264_idct_add_neon
-+ .func ff_h264_idct_add_neon
-+ff_h264_idct_add_neon:
-+ mov r3, #(1<<5)
-+ vmov.i16 d16, #0
-+ vmov.16 d16[0], r3
-+ vld1.64 {d0-d3}, [r1,:128]
-+ vadd.i16 d0, d0, d16
-+
-+ vswp d1, d2
-+ vadd.i16 d4, d0, d1
-+ vshr.s16 q8, q1, #1
-+ vsub.i16 d5, d0, d1
-+ vadd.i16 d6, d2, d17
-+ vsub.i16 d7, d16, d3
-+ vadd.i16 q0, q2, q3
-+ vsub.i16 q1, q2, q3
-+
-+ vtrn.16 d0, d1
-+ vtrn.16 d3, d2
-+ vtrn.32 d0, d3
-+ vtrn.32 d1, d2
-+
-+ vadd.i16 d4, d0, d3
-+ vld1.32 {d18[0]}, [r0,:32], r2
-+ vswp d1, d3
-+ vshr.s16 q8, q1, #1
-+ vld1.32 {d19[1]}, [r0,:32], r2
-+ vsub.i16 d5, d0, d1
-+ vld1.32 {d18[1]}, [r0,:32], r2
-+ vadd.i16 d6, d16, d3
-+ vld1.32 {d19[0]}, [r0,:32], r2
-+ vsub.i16 d7, d2, d17
-+ sub r0, r0, r2, lsl #2
-+ vadd.i16 q0, q2, q3
-+ vsub.i16 q1, q2, q3
-+
-+ vshr.s16 q0, q0, #6
-+ vshr.s16 q1, q1, #6
-+
-+ vaddw.u8 q0, q0, d18
-+ vaddw.u8 q1, q1, d19
-+
-+ vqmovun.s16 d0, q0
-+ vqmovun.s16 d1, q1
-+
-+ vst1.32 {d0[0]}, [r0,:32], r2
-+ vst1.32 {d1[1]}, [r0,:32], r2
-+ vst1.32 {d0[1]}, [r0,:32], r2
-+ vst1.32 {d1[0]}, [r0,:32], r2
-+
-+ bx lr
-+ .endfunc
diff --git a/packages/mplayer/files/mru-neon-put-pixels.diff b/packages/mplayer/files/mru-neon-put-pixels.diff
deleted file mode 100644
index 85650d913b..0000000000
--- a/packages/mplayer/files/mru-neon-put-pixels.diff
+++ /dev/null
@@ -1,376 +0,0 @@
-From: Mans Rullgard <mans@mansr.com>
-Date: Fri, 13 Jun 2008 01:21:58 +0000 (+0100)
-Subject: ARM: NEON optimised put_pixels functions
-X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=86410ed1948118a29c70946d5294df9feb04dfef
-
-ARM: NEON optimised put_pixels functions
----
-
-diff --git a/libavcodec/Makefile b/libavcodec/Makefile
-index d91185e..27746df 100644
---- a/libavcodec/Makefile
-+++ b/libavcodec/Makefile
-@@ -433,6 +433,10 @@ ASM_OBJS-$(HAVE_ARMV5TE) += armv4l/simple_idct_armv5te.o \
-
- ASM_OBJS-$(HAVE_ARMV6) += armv4l/simple_idct_armv6.o \
-
-+OBJS-$(HAVE_NEON) += armv4l/dsputil_neon.o \
-+
-+ASM_OBJS-$(HAVE_NEON) += armv4l/dsputil_neon_s.o \
-+
- OBJS-$(HAVE_VIS) += sparc/dsputil_vis.o \
- sparc/simple_idct_vis.o \
-
-diff --git a/libavcodec/armv4l/dsputil_arm.c b/libavcodec/armv4l/dsputil_arm.c
-index 100b89e..89b51e7 100644
---- a/libavcodec/armv4l/dsputil_arm.c
-+++ b/libavcodec/armv4l/dsputil_arm.c
-@@ -26,6 +26,7 @@
-
- extern void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx);
- extern void ff_float_init_arm_vfp(DSPContext* c, AVCodecContext *avctx);
-+extern void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx);
-
- extern void j_rev_dct_ARM(DCTELEM *data);
- extern void simple_idct_ARM(DCTELEM *data);
-@@ -302,4 +303,7 @@ void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx)
- #ifdef HAVE_ARMVFP
- ff_float_init_arm_vfp(c, avctx);
- #endif
-+#ifdef HAVE_NEON
-+ ff_dsputil_init_neon(c, avctx);
-+#endif
- }
-diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c
-new file mode 100644
-index 0000000..8a10dde
---- /dev/null
-+++ b/libavcodec/armv4l/dsputil_neon.c
-@@ -0,0 +1,67 @@
-+/*
-+ * ARM NEON optimised DSP functions
-+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
-+ *
-+ * This file is part of FFmpeg.
-+ *
-+ * FFmpeg is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU Lesser General Public
-+ * License as published by the Free Software Foundation; either
-+ * version 2.1 of the License, or (at your option) any later version.
-+ *
-+ * FFmpeg is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ * Lesser General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU Lesser General Public
-+ * License along with FFmpeg; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
-+
-+#include <stdint.h>
-+
-+#include "libavcodec/avcodec.h"
-+#include "libavcodec/dsputil.h"
-+
-+void ff_put_pixels16_neon(uint8_t *, const uint8_t *, int, int);
-+void ff_put_pixels16_x2_neon(uint8_t *, const uint8_t *, int, int);
-+void ff_put_pixels16_y2_neon(uint8_t *, const uint8_t *, int, int);
-+void ff_put_pixels16_xy2_neon(uint8_t *, const uint8_t *, int, int);
-+void ff_put_pixels8_neon(uint8_t *, const uint8_t *, int, int);
-+void ff_put_pixels8_x2_neon(uint8_t *, const uint8_t *, int, int);
-+void ff_put_pixels8_y2_neon(uint8_t *, const uint8_t *, int, int);
-+void ff_put_pixels8_xy2_neon(uint8_t *, const uint8_t *, int, int);
-+void ff_put_pixels16_x2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
-+void ff_put_pixels16_y2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
-+void ff_put_pixels16_xy2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
-+void ff_put_pixels8_x2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
-+void ff_put_pixels8_y2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
-+void ff_put_pixels8_xy2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
-+
-+void ff_put_h264_qpel16_mc00_neon(uint8_t *, uint8_t *, int);
-+void ff_put_h264_qpel8_mc00_neon(uint8_t *, uint8_t *, int);
-+
-+void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
-+{
-+ c->put_pixels_tab[0][0] = ff_put_pixels16_neon;
-+ c->put_pixels_tab[0][1] = ff_put_pixels16_x2_neon;
-+ c->put_pixels_tab[0][2] = ff_put_pixels16_y2_neon;
-+ c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_neon;
-+ c->put_pixels_tab[1][0] = ff_put_pixels8_neon;
-+ c->put_pixels_tab[1][1] = ff_put_pixels8_x2_neon;
-+ c->put_pixels_tab[1][2] = ff_put_pixels8_y2_neon;
-+ c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_neon;
-+
-+ c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_neon;
-+ c->put_no_rnd_pixels_tab[0][1] = ff_put_pixels16_x2_no_rnd_neon;
-+ c->put_no_rnd_pixels_tab[0][2] = ff_put_pixels16_y2_no_rnd_neon;
-+ c->put_no_rnd_pixels_tab[0][3] = ff_put_pixels16_xy2_no_rnd_neon;
-+ c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_neon;
-+ c->put_no_rnd_pixels_tab[1][1] = ff_put_pixels8_x2_no_rnd_neon;
-+ c->put_no_rnd_pixels_tab[1][2] = ff_put_pixels8_y2_no_rnd_neon;
-+ c->put_no_rnd_pixels_tab[1][3] = ff_put_pixels8_xy2_no_rnd_neon;
-+
-+ c->put_h264_qpel_pixels_tab[0][0] = ff_put_h264_qpel16_mc00_neon;
-+ c->put_h264_qpel_pixels_tab[1][0] = ff_put_h264_qpel8_mc00_neon;
-+}
-diff --git a/libavcodec/armv4l/dsputil_neon_s.S b/libavcodec/armv4l/dsputil_neon_s.S
-new file mode 100644
-index 0000000..fc5e401
---- /dev/null
-+++ b/libavcodec/armv4l/dsputil_neon_s.S
-@@ -0,0 +1,254 @@
-+/*
-+ * ARM NEON optimised DSP functions
-+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
-+ *
-+ * This file is part of FFmpeg.
-+ *
-+ * FFmpeg is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU Lesser General Public
-+ * License as published by the Free Software Foundation; either
-+ * version 2.1 of the License, or (at your option) any later version.
-+ *
-+ * FFmpeg is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ * Lesser General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU Lesser General Public
-+ * License along with FFmpeg; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
-+
-+ .fpu neon
-+ .text
-+
-+ .macro put_pixels16
-+ dmb
-+1: vld1.64 {d0, d1}, [r1], r2
-+ vld1.64 {d2, d3}, [r1], r2
-+ vld1.64 {d4, d5}, [r1], r2
-+ vld1.64 {d6, d7}, [r1], r2
-+ pld [r1]
-+ subs r3, r3, #4
-+ vst1.64 {d0, d1}, [r0,:128], r2
-+ vst1.64 {d2, d3}, [r0,:128], r2
-+ vst1.64 {d4, d5}, [r0,:128], r2
-+ vst1.64 {d6, d7}, [r0,:128], r2
-+ bne 1b
-+ bx lr
-+ .endm
-+
-+ .macro put_pixels16_x2 vhadd=vrhadd.u8
-+ dmb
-+1: vld1.64 {d0-d2}, [r1], r2
-+ vld1.64 {d4-d6}, [r1], r2
-+ pld [r1]
-+ subs r3, r3, #2
-+ vext.8 q1, q0, q1, #1
-+ vext.8 q3, q2, q3, #1
-+ \vhadd q0, q0, q1
-+ \vhadd q2, q2, q3
-+ vst1.64 {d0, d1}, [r0,:128], r2
-+ vst1.64 {d4, d5}, [r0,:128], r2
-+ bne 1b
-+ bx lr
-+ .endm
-+
-+ .macro put_pixels16_y2 vhadd=vrhadd.u8
-+ push {lr}
-+ add ip, r1, r2
-+ lsl lr, r2, #1
-+ vld1.64 {d0, d1}, [r1], lr
-+ vld1.64 {d2, d3}, [ip], lr
-+ dmb
-+1: subs r3, r3, #2
-+ \vhadd q2, q0, q1
-+ vld1.64 {d0, d1}, [r1], lr
-+ vst1.64 {d4, d5}, [r0,:128], r2
-+ \vhadd q2, q0, q1
-+ vld1.64 {d2, d3}, [ip], lr
-+ vst1.64 {d4, d5}, [r0,:128], r2
-+ bne 1b
-+ pop {pc}
-+ .endm
-+
-+ .macro put_pixels16_xy2 vshrn=vrshrn.u16 no_rnd=0
-+ push {lr}
-+ lsl lr, r2, #1
-+ add ip, r1, r2
-+ vld1.64 {d0-d2}, [r1], lr
-+ vld1.64 {d4-d6}, [ip], lr
-+ .if \no_rnd
-+ vmov.i16 q13, #1
-+ .endif
-+ pld [r1]
-+ pld [ip]
-+ vext.8 q1, q0, q1, #1
-+ vext.8 q3, q2, q3, #1
-+ vaddl.u8 q8, d0, d2
-+ vaddl.u8 q10, d1, d3
-+ vaddl.u8 q9, d4, d6
-+ vaddl.u8 q11, d5, d7
-+ dmb
-+1: subs r3, r3, #2
-+ vld1.64 {d0-d2}, [r1], lr
-+ vadd.u16 q12, q8, q9
-+ pld [r1]
-+ .if \no_rnd
-+ vadd.u16 q12, q12, q13
-+ .endif
-+ vext.8 q15, q0, q1, #1
-+ vadd.u16 q1 , q10, q11
-+ \vshrn d28, q12, #2
-+ .if \no_rnd
-+ vadd.u16 q1, q1, q13
-+ .endif
-+ \vshrn d29, q1, #2
-+ vaddl.u8 q8, d0, d30
-+ vld1.64 {d2-d4}, [ip], lr
-+ vaddl.u8 q10, d1, d31
-+ vst1.64 {d28,d29}, [r0,:128], r2
-+ vadd.u16 q12, q8, q9
-+ pld [ip]
-+ .if \no_rnd
-+ vadd.u16 q12, q12, q13
-+ .endif
-+ vext.8 q2, q1, q2, #1
-+ vadd.u16 q0, q10, q11
-+ \vshrn d30, q12, #2
-+ .if \no_rnd
-+ vadd.u16 q0, q0, q13
-+ .endif
-+ \vshrn d31, q0, #2
-+ vaddl.u8 q9, d2, d4
-+ vaddl.u8 q11, d3, d5
-+ vst1.64 {d30,d31}, [r0,:128], r2
-+ bgt 1b
-+ pop {pc}
-+ .endm
-+
-+ .macro put_pixels8
-+ dmb
-+1: vld1.64 {d0}, [r1], r2
-+ vld1.64 {d1}, [r1], r2
-+ vld1.64 {d2}, [r1], r2
-+ vld1.64 {d3}, [r1], r2
-+ subs r3, r3, #4
-+ vst1.64 {d0}, [r0,:64], r2
-+ vst1.64 {d1}, [r0,:64], r2
-+ vst1.64 {d2}, [r0,:64], r2
-+ vst1.64 {d3}, [r0,:64], r2
-+ bne 1b
-+ bx lr
-+ .endm
-+
-+ .macro put_pixels8_x2 vhadd=vrhadd.u8
-+ dmb
-+1: vld1.64 {d0, d1}, [r1], r2
-+ vld1.64 {d2, d3}, [r1], r2
-+ pld [r1]
-+ subs r3, r3, #2
-+ vext.8 d1, d0, d1, #1
-+ vext.8 d3, d2, d3, #1
-+ vswp d1, d2
-+ \vhadd q0, q0, q1
-+ vst1.64 {d0}, [r0,:64], r2
-+ vst1.64 {d1}, [r0,:64], r2
-+ bne 1b
-+ bx lr
-+ .endm
-+
-+ .macro put_pixels8_y2 vhadd=vrhadd.u8
-+ push {lr}
-+ add ip, r1, r2
-+ lsl lr, r2, #1
-+ vld1.64 {d0}, [r1], lr
-+ vld1.64 {d1}, [ip], lr
-+ dmb
-+1: subs r3, r3, #2
-+ \vhadd d4, d0, d1
-+ vld1.64 {d0}, [r1], lr
-+ vst1.64 {d4}, [r0,:64], r2
-+ \vhadd d4, d0, d1
-+ vld1.64 {d1}, [ip], lr
-+ vst1.64 {d4}, [r0,:64], r2
-+ bne 1b
-+ pop {pc}
-+ .endm
-+
-+ .macro put_pixels8_xy2 vshrn=vrshrn.u16 no_rnd=0
-+ push {lr}
-+ lsl lr, r2, #1
-+ add ip, r1, r2
-+ vld1.64 {d0, d1}, [r1], lr
-+ vld1.64 {d2, d3}, [ip], lr
-+ .if \no_rnd
-+ vmov.i16 q11, #1
-+ .endif
-+ pld [r1]
-+ pld [ip]
-+ vext.8 d4, d0, d1, #1
-+ vext.8 d6, d2, d3, #1
-+ vaddl.u8 q8, d0, d4
-+ vaddl.u8 q9, d2, d6
-+ dmb
-+1: subs r3, r3, #2
-+ vld1.64 {d0, d1}, [r1], lr
-+ pld [r1]
-+ vadd.u16 q10, q8, q9
-+ vext.8 d4, d0, d1, #1
-+ .if \no_rnd
-+ vadd.u16 q10, q10, q11
-+ .endif
-+ vaddl.u8 q8, d0, d4
-+ \vshrn d5, q10, #2
-+ vld1.64 {d2, d3}, [ip], lr
-+ vadd.u16 q10, q8, q9
-+ pld [ip]
-+ .if \no_rnd
-+ vadd.u16 q10, q10, q11
-+ .endif
-+ vst1.64 {d5}, [r0,:64], r2
-+ \vshrn d7, q10, #2
-+ vext.8 d6, d2, d3, #1
-+ vaddl.u8 q9, d2, d6
-+ vst1.64 {d7}, [r0,:64], r2
-+ bgt 1b
-+ pop {pc}
-+ .endm
-+
-+ .macro extern name
-+ .global \name
-+ .type \name, %function
-+ .func \name
-+\name:
-+ .endm
-+
-+ .macro defun name suf rnd_op args:vararg
-+ extern ff_\name\suf\()_neon
-+ \name \rnd_op \args
-+ .endfunc
-+ .endm
-+
-+ .macro defun2 name args:vararg
-+ defun \name
-+ defun \name \args
-+ .endm
-+
-+ extern ff_put_h264_qpel16_mc00_neon
-+ mov r3, #16
-+ .endfunc
-+
-+ defun put_pixels16
-+ defun2 put_pixels16_x2, _no_rnd, vhadd.u8
-+ defun2 put_pixels16_y2, _no_rnd, vhadd.u8
-+ defun2 put_pixels16_xy2, _no_rnd, vshrn.u16, 1
-+
-+ extern ff_put_h264_qpel8_mc00_neon
-+ mov r3, #8
-+ .endfunc
-+
-+ defun put_pixels8
-+ defun2 put_pixels8_x2, _no_rnd, vhadd.u8
-+ defun2 put_pixels8_y2, _no_rnd, vhadd.u8
-+ defun2 put_pixels8_xy2, _no_rnd, vshrn.u16, 1
diff --git a/packages/mplayer/files/mru-neon-simple-idct.diff b/packages/mplayer/files/mru-neon-simple-idct.diff
deleted file mode 100644
index 772a1fd972..0000000000
--- a/packages/mplayer/files/mru-neon-simple-idct.diff
+++ /dev/null
@@ -1,501 +0,0 @@
-From: Mans Rullgard <mans@mansr.com>
-Date: Thu, 26 Jun 2008 18:37:40 +0000 (+0100)
-Subject: ARM: NEON optimised simple_idct
-X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=215b9eaa8cf0195908c92f373c018320736ec106
-
-ARM: NEON optimised simple_idct
----
-
-diff --git a/libavcodec/Makefile b/libavcodec/Makefile
-index 27746df..7fa02fa 100644
---- a/libavcodec/Makefile
-+++ b/libavcodec/Makefile
-@@ -436,6 +436,7 @@ ASM_OBJS-$(HAVE_ARMV6) += armv4l/simple_idct_armv6.o \
- OBJS-$(HAVE_NEON) += armv4l/dsputil_neon.o \
-
- ASM_OBJS-$(HAVE_NEON) += armv4l/dsputil_neon_s.o \
-+ armv4l/simple_idct_neon.o \
-
- OBJS-$(HAVE_VIS) += sparc/dsputil_vis.o \
- sparc/simple_idct_vis.o \
-diff --git a/libavcodec/armv4l/dsputil_arm.c b/libavcodec/armv4l/dsputil_arm.c
-index 89b51e7..942c0de 100644
---- a/libavcodec/armv4l/dsputil_arm.c
-+++ b/libavcodec/armv4l/dsputil_arm.c
-@@ -43,6 +43,12 @@ extern void ff_simple_idct_put_armv6(uint8_t *dest, int line_size,
- extern void ff_simple_idct_add_armv6(uint8_t *dest, int line_size,
- DCTELEM *data);
-
-+extern void ff_simple_idct_neon(DCTELEM *data);
-+extern void ff_simple_idct_put_neon(uint8_t *dest, int line_size,
-+ DCTELEM *data);
-+extern void ff_simple_idct_add_neon(uint8_t *dest, int line_size,
-+ DCTELEM *data);
-+
- /* XXX: local hack */
- static void (*ff_put_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
- static void (*ff_add_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
-@@ -233,6 +239,8 @@ void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx)
- if(idct_algo == FF_IDCT_AUTO){
- #if defined(HAVE_IPP)
- idct_algo = FF_IDCT_IPP;
-+#elif defined(HAVE_NEON)
-+ idct_algo = FF_IDCT_SIMPLENEON;
- #elif defined(HAVE_ARMV6)
- idct_algo = FF_IDCT_SIMPLEARMV6;
- #elif defined(HAVE_ARMV5TE)
-@@ -273,6 +281,13 @@ void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx)
- c->idct = simple_idct_ipp;
- c->idct_permutation_type= FF_NO_IDCT_PERM;
- #endif
-+#ifdef HAVE_NEON
-+ } else if (idct_algo==FF_IDCT_SIMPLENEON){
-+ c->idct_put= ff_simple_idct_put_neon;
-+ c->idct_add= ff_simple_idct_add_neon;
-+ c->idct = ff_simple_idct_neon;
-+ c->idct_permutation_type = FF_PARTTRANS_IDCT_PERM;
-+#endif
- }
- }
-
-diff --git a/libavcodec/armv4l/simple_idct_neon.S b/libavcodec/armv4l/simple_idct_neon.S
-new file mode 100644
-index 0000000..44701f8
---- /dev/null
-+++ b/libavcodec/armv4l/simple_idct_neon.S
-@@ -0,0 +1,411 @@
-+/*
-+ * ARM NEON IDCT
-+ *
-+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
-+ *
-+ * Based on Simple IDCT
-+ * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
-+ *
-+ * This file is part of FFmpeg.
-+ *
-+ * FFmpeg is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU Lesser General Public
-+ * License as published by the Free Software Foundation; either
-+ * version 2.1 of the License, or (at your option) any later version.
-+ *
-+ * FFmpeg is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ * Lesser General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU Lesser General Public
-+ * License along with FFmpeg; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
-+
-+#define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-+#define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-+#define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-+#define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-+#define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-+#define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-+#define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-+#define W4c ((1<<(COL_SHIFT-1))/W4)
-+#define ROW_SHIFT 11
-+#define COL_SHIFT 20
-+
-+#define w1 d0[0]
-+#define w2 d0[1]
-+#define w3 d0[2]
-+#define w4 d0[3]
-+#define w5 d1[0]
-+#define w6 d1[1]
-+#define w7 d1[2]
-+#define w4c d1[3]
-+
-+ .fpu neon
-+
-+ .macro idct_col4_top
-+ vmull.s16 q7, d6, w2 /* q9 = W2 * col[2] */
-+ vmull.s16 q8, d6, w6 /* q10 = W6 * col[2] */
-+ vmull.s16 q9, d4, w1 /* q9 = W1 * col[1] */
-+ vadd.i32 q11, q15, q7
-+ vmull.s16 q10, d4, w3 /* q10 = W3 * col[1] */
-+ vadd.i32 q12, q15, q8
-+ vmull.s16 q5, d4, w5 /* q5 = W5 * col[1] */
-+ vsub.i32 q13, q15, q8
-+ vmull.s16 q6, d4, w7 /* q6 = W7 * col[1] */
-+ vsub.i32 q14, q15, q7
-+
-+ vmlal.s16 q9, d8, w3 /* q9 += W3 * col[3] */
-+ vmlsl.s16 q10, d8, w7 /* q10 -= W7 * col[3] */
-+ vmlsl.s16 q5, d8, w1 /* q5 -= W1 * col[3] */
-+ vmlsl.s16 q6, d8, w5 /* q6 -= W5 * col[3] */
-+ .endm
-+
-+ .text
-+ .align
-+ .type idct_row4_neon, %function
-+ .func idct_row4_neon
-+idct_row4_neon:
-+ vmov.i32 q15, #(1<<(ROW_SHIFT-1))
-+ vld1.64 {d2-d5}, [a3,:128]!
-+ vmlal.s16 q15, d2, w4 /* q15 += W4 * col[0] */
-+ vld1.64 {d6,d7}, [a3,:128]!
-+ vorr d10, d3, d5
-+ vld1.64 {d8,d9}, [a3,:128]!
-+ add a3, a3, #-64
-+
-+ vorr d11, d7, d9
-+ vorr d10, d10, d11
-+ vmov a4, v1, d10
-+
-+ idct_col4_top
-+
-+ orrs a4, a4, v1
-+ beq 1f
-+
-+ vmull.s16 q7, d3, w4 /* q7 = W4 * col[4] */
-+ vmlal.s16 q9, d5, w5 /* q9 += W5 * col[5] */
-+ vmlsl.s16 q10, d5, w1 /* q10 -= W1 * col[5] */
-+ vmull.s16 q8, d7, w2 /* q8 = W2 * col[6] */
-+ vmlal.s16 q5, d5, w7 /* q5 += W7 * col[5] */
-+ vadd.i32 q11, q11, q7
-+ vsub.i32 q12, q12, q7
-+ vsub.i32 q13, q13, q7
-+ vadd.i32 q14, q14, q7
-+ vmlal.s16 q6, d5, w3 /* q6 += W3 * col[5] */
-+ vmull.s16 q7, d7, w6 /* q7 = W6 * col[6] */
-+ vmlal.s16 q9, d9, w7
-+ vmlsl.s16 q10, d9, w5
-+ vmlal.s16 q5, d9, w3
-+ vmlsl.s16 q6, d9, w1
-+ vadd.i32 q11, q11, q7
-+ vsub.i32 q12, q12, q8
-+ vadd.i32 q13, q13, q8
-+ vsub.i32 q14, q14, q7
-+
-+1: vadd.i32 q3, q11, q9
-+ vadd.i32 q4, q12, q10
-+ vshrn.i32 d2, q3, #ROW_SHIFT
-+ vshrn.i32 d4, q4, #ROW_SHIFT
-+ vadd.i32 q7, q13, q5
-+ vadd.i32 q8, q14, q6
-+ vtrn.16 d2, d4
-+ vshrn.i32 d6, q7, #ROW_SHIFT
-+ vshrn.i32 d8, q8, #ROW_SHIFT
-+ vsub.i32 q14, q14, q6
-+ vsub.i32 q11, q11, q9
-+ vtrn.16 d6, d8
-+ vsub.i32 q13, q13, q5
-+ vshrn.i32 d3, q14, #ROW_SHIFT
-+ vtrn.32 d2, d6
-+ vsub.i32 q12, q12, q10
-+ vtrn.32 d4, d8
-+ vshrn.i32 d5, q13, #ROW_SHIFT
-+ vshrn.i32 d7, q12, #ROW_SHIFT
-+ vshrn.i32 d9, q11, #ROW_SHIFT
-+
-+ vtrn.16 d3, d5
-+ vtrn.16 d7, d9
-+ vtrn.32 d3, d7
-+ vtrn.32 d5, d9
-+
-+ vst1.64 {d2-d5}, [a3,:128]!
-+ vst1.64 {d6-d9}, [a3,:128]!
-+
-+ bx lr
-+ .endfunc
-+
-+ .align
-+ .type idct_col4_neon, %function
-+ .func idct_col4_neon
-+idct_col4_neon:
-+ mov ip, #16
-+ vld1.64 {d2}, [a3,:64], ip /* d2 = col[0] */
-+ vdup.16 d30, w4c
-+ vld1.64 {d4}, [a3,:64], ip /* d3 = col[1] */
-+ vadd.i16 d30, d30, d2
-+ vld1.64 {d6}, [a3,:64], ip /* d4 = col[2] */
-+ vmull.s16 q15, d30, w4 /* q15 = W4*(col[0]+(1<<COL_SHIFT-1)/W4)*/
-+ vld1.64 {d8}, [a3,:64], ip /* d5 = col[3] */
-+
-+ ldrd v1, [a3]
-+ ldrd v3, [a3, #16]
-+ orrs v1, v1, v2
-+
-+ idct_col4_top
-+ addeq a3, a3, #16
-+ beq 1f
-+
-+ vld1.64 {d3}, [a3,:64], ip /* d6 = col[4] */
-+ vmull.s16 q7, d3, w4 /* q7 = W4 * col[4] */
-+ vadd.i32 q11, q11, q7
-+ vsub.i32 q12, q12, q7
-+ vsub.i32 q13, q13, q7
-+ vadd.i32 q14, q14, q7
-+
-+1: orrs v3, v3, v4
-+ ldrd v1, [a3, #16]
-+ addeq a3, a3, #16
-+ beq 2f
-+
-+ vld1.64 {d5}, [a3,:64], ip /* d7 = col[5] */
-+ vmlal.s16 q9, d5, w5 /* q9 += W5 * col[5] */
-+ vmlsl.s16 q10, d5, w1 /* q10 -= W1 * col[5] */
-+ vmlal.s16 q5, d5, w7 /* q5 += W7 * col[5] */
-+ vmlal.s16 q6, d5, w3 /* q6 += W3 * col[5] */
-+
-+2: orrs v1, v1, v2
-+ ldrd v1, [a3, #16]
-+ addeq a3, a3, #16
-+ beq 3f
-+
-+ vld1.64 {d7}, [a3,:64], ip /* d8 = col[6] */
-+ vmull.s16 q7, d7, w6 /* q7 = W6 * col[6] */
-+ vmull.s16 q8, d7, w2 /* q8 = W2 * col[6] */
-+ vadd.i32 q11, q11, q7
-+ vsub.i32 q14, q14, q7
-+ vsub.i32 q12, q12, q8
-+ vadd.i32 q13, q13, q8
-+
-+3: orrs v1, v1, v2
-+ addeq a3, a3, #16
-+ beq 4f
-+
-+ vld1.64 {d9}, [a3,:64], ip /* d9 = col[7] */
-+ vmlal.s16 q9, d9, w7
-+ vmlsl.s16 q10, d9, w5
-+ vmlal.s16 q5, d9, w3
-+ vmlsl.s16 q6, d9, w1
-+
-+4: vadd.i32 q3, q11, q9
-+ vadd.i32 q4, q12, q10
-+ vadd.i32 q7, q13, q5
-+ vadd.i32 q8, q14, q6
-+ vsub.i32 q11, q11, q9
-+ vsub.i32 q12, q12, q10
-+ vsub.i32 q13, q13, q5
-+ vsub.i32 q14, q14, q6
-+
-+ bx lr
-+ .endfunc
-+
-+ .macro idct_col4_st16
-+ vshr.s32 q2, q3, #COL_SHIFT
-+ vshr.s32 q3, q4, #COL_SHIFT
-+ vmovn.i32 d2, q2
-+ vshr.s32 q4, q7, #COL_SHIFT
-+ vmovn.i32 d3, q3
-+ vshr.s32 q5, q8, #COL_SHIFT
-+ vmovn.i32 d4, q4
-+ vshr.s32 q6, q14, #COL_SHIFT
-+ vmovn.i32 d5, q5
-+ vshr.s32 q7, q13, #COL_SHIFT
-+ vmovn.i32 d6, q6
-+ vshr.s32 q8, q12, #COL_SHIFT
-+ vmovn.i32 d7, q7
-+ vshr.s32 q9, q11, #COL_SHIFT
-+ vmovn.i32 d8, q8
-+ vmovn.i32 d9, q9
-+
-+ mov ip, #16
-+ vst1.64 {d2}, [a3,:64], ip
-+ vst1.64 {d3}, [a3,:64], ip
-+ vst1.64 {d4}, [a3,:64], ip
-+ vst1.64 {d5}, [a3,:64], ip
-+ vst1.64 {d6}, [a3,:64], ip
-+ vst1.64 {d7}, [a3,:64], ip
-+ vst1.64 {d8}, [a3,:64], ip
-+ vst1.64 {d9}, [a3,:64], ip
-+ .endm
-+
-+ .align
-+ .type idct_col4_add8, %function
-+ .func idct_col4_add8
-+idct_col4_add8:
-+ mov ip, a1
-+
-+ vshr.s32 q2, q3, #COL_SHIFT
-+ vshr.s32 q3, q4, #COL_SHIFT
-+ vmovn.i32 d2, q2
-+ vshr.s32 q4, q7, #COL_SHIFT
-+ vmovn.i32 d3, q3
-+ vshr.s32 q5, q8, #COL_SHIFT
-+ vmovn.i32 d4, q4
-+ vshr.s32 q6, q14, #COL_SHIFT
-+ vmovn.i32 d5, q5
-+ vld1.32 {d10[0]}, [a1,:32], a2
-+ vshr.s32 q7, q13, #COL_SHIFT
-+ vld1.32 {d10[1]}, [a1,:32], a2
-+ vmovn.i32 d6, q6
-+ vld1.32 {d11[0]}, [a1,:32], a2
-+ vshr.s32 q8, q12, #COL_SHIFT
-+ vld1.32 {d11[1]}, [a1,:32], a2
-+ vaddw.u8 q1, q1, d10
-+ vld1.32 {d12[0]}, [a1,:32], a2
-+ vmovn.i32 d7, q7
-+ vld1.32 {d12[1]}, [a1,:32], a2
-+ vqmovun.s16 d2, q1
-+ vld1.32 {d13[0]}, [a1,:32], a2
-+ vshr.s32 q9, q11, #COL_SHIFT
-+ vaddw.u8 q2, q2, d11
-+ vld1.32 {d13[1]}, [a1,:32], a2
-+ vaddw.u8 q3, q3, d12
-+ vst1.32 {d2[0]}, [ip,:32], a2
-+ vqmovun.s16 d3, q2
-+ vst1.32 {d2[1]}, [ip,:32], a2
-+ vmovn.i32 d8, q8
-+ vmovn.i32 d9, q9
-+ vst1.32 {d3[0]}, [ip,:32], a2
-+ vqmovun.s16 d4, q3
-+ vst1.32 {d3[1]}, [ip,:32], a2
-+ vaddw.u8 q4, q4, d13
-+ vst1.32 {d4[0]}, [ip,:32], a2
-+ vqmovun.s16 d5, q4
-+ vst1.32 {d4[1]}, [ip,:32], a2
-+ vst1.32 {d5[0]}, [ip,:32], a2
-+ vst1.32 {d5[1]}, [ip,:32], a2
-+
-+ bx lr
-+ .endfunc
-+
-+ .type idct_col4_st8, %function
-+ .func idct_col4_st8
-+idct_col4_st8:
-+ vshr.s32 q2, q3, #COL_SHIFT
-+ vshr.s32 q3, q4, #COL_SHIFT
-+ vmovn.i32 d2, q2
-+ vshr.s32 q4, q7, #COL_SHIFT
-+ vmovn.i32 d3, q3
-+ vshr.s32 q5, q8, #COL_SHIFT
-+ vqmovun.s16 d2, q1
-+ vmovn.i32 d4, q4
-+ vshr.s32 q6, q14, #COL_SHIFT
-+ vst1.32 {d2[0]}, [a1,:32], a2
-+ vmovn.i32 d5, q5
-+ vshr.s32 q7, q13, #COL_SHIFT
-+ vst1.32 {d2[1]}, [a1,:32], a2
-+ vmovn.i32 d6, q6
-+ vqmovun.s16 d3, q2
-+ vshr.s32 q8, q12, #COL_SHIFT
-+ vmovn.i32 d7, q7
-+ vshr.s32 q9, q11, #COL_SHIFT
-+ vst1.32 {d3[0]}, [a1,:32], a2
-+ vqmovun.s16 d4, q3
-+ vst1.32 {d3[1]}, [a1,:32], a2
-+ vmovn.i32 d8, q8
-+ vmovn.i32 d9, q9
-+ vst1.32 {d4[0]}, [a1,:32], a2
-+ vst1.32 {d4[1]}, [a1,:32], a2
-+ vqmovun.s16 d5, q4
-+ vst1.32 {d5[0]}, [a1,:32], a2
-+ vst1.32 {d5[1]}, [a1,:32], a2
-+
-+ bx lr
-+ .endfunc
-+
-+ .align 4
-+const: .short W1, W2, W3, W4, W5, W6, W7, W4c
-+
-+ .macro idct_start data
-+ push {v1-v4, lr}
-+ pld [\data]
-+ pld [\data, #64]
-+ dmb
-+ vpush {d8-d15}
-+ adr a4, const
-+ vld1.64 {d0,d1}, [a4,:128]
-+ .endm
-+
-+ .macro idct_end
-+ vpop {d8-d15}
-+ pop {v1-v4, pc}
-+ .endm
-+
-+ .align
-+ .global ff_simple_idct_neon
-+ .type ff_simple_idct_neon, %function
-+ .func ff_simple_idct_neon
-+/* void ff_simple_idct_neon(DCTELEM *data); */
-+ff_simple_idct_neon:
-+ idct_start a1
-+
-+ mov a3, a1
-+ bl idct_row4_neon
-+ bl idct_row4_neon
-+ add a3, a3, #-128
-+ bl idct_col4_neon
-+ add a3, a3, #-128
-+ idct_col4_st16
-+ add a3, a3, #-120
-+ bl idct_col4_neon
-+ add a3, a3, #-128
-+ idct_col4_st16
-+
-+ idct_end
-+ .endfunc
-+
-+ .align
-+ .global ff_simple_idct_put_neon
-+ .type ff_simple_idct_put_neon, %function
-+ .func ff_simple_idct_put_neon
-+/* void ff_simple_idct_put_neon(uint8_t *dst, int line_size, DCTELEM *data); */
-+ff_simple_idct_put_neon:
-+ idct_start a3
-+
-+ bl idct_row4_neon
-+ bl idct_row4_neon
-+ add a3, a3, #-128
-+ bl idct_col4_neon
-+ bl idct_col4_st8
-+ sub a1, a1, a2, lsl #3
-+ add a1, a1, #4
-+ add a3, a3, #-120
-+ bl idct_col4_neon
-+ bl idct_col4_st8
-+
-+ idct_end
-+ .endfunc
-+
-+ .align
-+ .global ff_simple_idct_add_neon
-+ .type ff_simple_idct_add_neon, %function
-+ .func ff_simple_idct_add_neon
-+/* void ff_simple_idct_add_neon(uint8_t *dst, int line_size, DCTELEM *data); */
-+ff_simple_idct_add_neon:
-+ idct_start a3
-+
-+ bl idct_row4_neon
-+ bl idct_row4_neon
-+ add a3, a3, #-128
-+ bl idct_col4_neon
-+ bl idct_col4_add8
-+ sub a1, a1, a2, lsl #3
-+ add a1, a1, #4
-+ add a3, a3, #-120
-+ bl idct_col4_neon
-+ bl idct_col4_add8
-+
-+ idct_end
-+ .endfunc
-diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
-index 76522c4..43e2ef3 100644
---- a/libavcodec/avcodec.h
-+++ b/libavcodec/avcodec.h
-@@ -1352,6 +1352,7 @@ typedef struct AVCodecContext {
- #define FF_IDCT_SIMPLEVIS 18
- #define FF_IDCT_WMV2 19
- #define FF_IDCT_FAAN 20
-+#define FF_IDCT_SIMPLENEON 21
-
- /**
- * slice count
-diff --git a/libavcodec/utils.c b/libavcodec/utils.c
-index cf00d25..3d1afcf 100644
---- a/libavcodec/utils.c
-+++ b/libavcodec/utils.c
-@@ -549,6 +549,7 @@ static const AVOption options[]={
- {"simplearm", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_SIMPLEARM, INT_MIN, INT_MAX, V|E|D, "idct"},
- {"simplearmv5te", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_SIMPLEARMV5TE, INT_MIN, INT_MAX, V|E|D, "idct"},
- {"simplearmv6", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_SIMPLEARMV6, INT_MIN, INT_MAX, V|E|D, "idct"},
-+{"simpleneon", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_SIMPLENEON, INT_MIN, INT_MAX, V|E|D, "idct"},
- {"h264", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_H264, INT_MIN, INT_MAX, V|E|D, "idct"},
- {"vp3", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_VP3, INT_MIN, INT_MAX, V|E|D, "idct"},
- {"ipp", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_IPP, INT_MIN, INT_MAX, V|E|D, "idct"},
diff --git a/packages/mplayer/files/mru-neon-vector-fmul-window.diff b/packages/mplayer/files/mru-neon-vector-fmul-window.diff
deleted file mode 100644
index 03ac55bc56..0000000000
--- a/packages/mplayer/files/mru-neon-vector-fmul-window.diff
+++ /dev/null
@@ -1,86 +0,0 @@
-From: Mans Rullgard <mans@mansr.com>
-Date: Sun, 3 Aug 2008 16:46:43 +0000 (+0100)
-Subject: ARM: NEON optimised vector_fmul_window
-X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=98feb31064dccfd16ce189ff4aec9ccedddf6b04
-
-ARM: NEON optimised vector_fmul_window
----
-
-diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c
-index f9d32c0..6c44940 100644
---- a/libavcodec/armv4l/dsputil_neon.c
-+++ b/libavcodec/armv4l/dsputil_neon.c
-@@ -91,6 +91,10 @@ void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
- void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride);
- void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride);
-
-+void ff_vector_fmul_window_neon(float *dst, const float *src0,
-+ const float *src1, const float *win,
-+ float add_bias, int len);
-+
- void ff_float_to_int16_neon(int16_t *, const float *, long);
- void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int);
-
-@@ -164,6 +168,8 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
- c->h264_idct_add = ff_h264_idct_add_neon;
- c->h264_idct_dc_add = ff_h264_idct_dc_add_neon;
-
-+ c->vector_fmul_window = ff_vector_fmul_window_neon;
-+
- c->float_to_int16 = ff_float_to_int16_neon;
- c->float_to_int16_interleave = ff_float_to_int16_interleave_neon;
-
-diff --git a/libavcodec/armv4l/dsputil_neon_s.S b/libavcodec/armv4l/dsputil_neon_s.S
-index 6a54803..49a09b8 100644
---- a/libavcodec/armv4l/dsputil_neon_s.S
-+++ b/libavcodec/armv4l/dsputil_neon_s.S
-@@ -324,6 +324,49 @@ extern ff_float_to_int16_interleave_neon
- pop {r4,r5,pc}
- .endfunc
-
-+extern ff_vector_fmul_window_neon
-+ vld1.32 {d16[],d17[]}, [sp,:32]
-+ push {r4,r5,lr}
-+ ldr lr, [sp, #16]
-+ sub r2, r2, #8
-+ sub r5, lr, #2
-+ add r2, r2, r5, lsl #2
-+ add r4, r3, r5, lsl #3
-+ add ip, r0, r5, lsl #3
-+ mov r5, #-16
-+ dmb
-+ vld1.64 {d0,d1}, [r1,:128]!
-+ vld1.64 {d2,d3}, [r2,:128], r5
-+ vld1.64 {d4,d5}, [r3,:128]!
-+ vld1.64 {d6,d7}, [r4,:128], r5
-+1: vmov q10, q8
-+ vmov q11, q8
-+ vmla.f32 q11, q0, q2
-+ vrev64.32 q3, q3
-+ vswp d6, d7
-+ vmla.f32 q10, q0, q3
-+ vrev64.32 q1, q1
-+ vswp d2, d3
-+ subs lr, lr, #4
-+ vmla.f32 q11, q1, q3
-+ vmls.f32 q10, q1, q2
-+ beq 2f
-+ vld1.64 {d0,d1}, [r1,:128]!
-+ vld1.64 {d2,d3}, [r2,:128], r5
-+ vld1.64 {d4,d5}, [r3,:128]!
-+ vld1.64 {d6,d7}, [r4,:128], r5
-+ vrev64.32 q11, q11
-+ vswp d22, d23
-+ vst1.64 {d20,d21}, [r0,:128]!
-+ vst1.64 {d22,d23}, [ip,:128], r5
-+ b 1b
-+2: vrev64.32 q11, q11
-+ vswp d22, d23
-+ vst1.64 {d20,d21}, [r0,:128]!
-+ vst1.64 {d22,d23}, [ip,:128], r5
-+ pop {r4,r5,pc}
-+ .endfunc
-+
- #ifdef CONFIG_VORBIS_DECODER
- extern ff_vorbis_inverse_coupling_neon
- vmov.i32 q10, #(1<<31)
diff --git a/packages/mplayer/files/mru-neon-vector-fmul.diff b/packages/mplayer/files/mru-neon-vector-fmul.diff
deleted file mode 100644
index 2710f10443..0000000000
--- a/packages/mplayer/files/mru-neon-vector-fmul.diff
+++ /dev/null
@@ -1,56 +0,0 @@
-From: Mans Rullgard <mans@mansr.com>
-Date: Sun, 3 Aug 2008 17:13:06 +0000 (+0100)
-Subject: ARM: NEON optimised vector_fmul
-X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=ba46eb14e3be96b627fd096aacaa4dbb2e186281
-
-ARM: NEON optimised vector_fmul
----
-
-diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c
-index 6c44940..c6fc173 100644
---- a/libavcodec/armv4l/dsputil_neon.c
-+++ b/libavcodec/armv4l/dsputil_neon.c
-@@ -91,6 +91,7 @@ void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
- void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride);
- void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride);
-
-+void ff_vector_fmul_neon(float *dst, const float *src, int len);
- void ff_vector_fmul_window_neon(float *dst, const float *src0,
- const float *src1, const float *win,
- float add_bias, int len);
-@@ -168,6 +169,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
- c->h264_idct_add = ff_h264_idct_add_neon;
- c->h264_idct_dc_add = ff_h264_idct_dc_add_neon;
-
-+ c->vector_fmul = ff_vector_fmul_neon;
- c->vector_fmul_window = ff_vector_fmul_window_neon;
-
- c->float_to_int16 = ff_float_to_int16_neon;
-diff --git a/libavcodec/armv4l/dsputil_neon_s.S b/libavcodec/armv4l/dsputil_neon_s.S
-index 49a09b8..7310700 100644
---- a/libavcodec/armv4l/dsputil_neon_s.S
-+++ b/libavcodec/armv4l/dsputil_neon_s.S
-@@ -324,6 +324,23 @@ extern ff_float_to_int16_interleave_neon
- pop {r4,r5,pc}
- .endfunc
-
-+extern ff_vector_fmul_neon
-+ mov r3, r0
-+ vld1.64 {d0-d3}, [r0,:128]!
-+ vld1.64 {d4-d7}, [r1,:128]!
-+ dmb
-+1: subs r2, r2, #8
-+ vmul.f32 q8, q0, q2
-+ vmul.f32 q9, q1, q3
-+ beq 2f
-+ vld1.64 {d0-d3}, [r0,:128]!
-+ vld1.64 {d4-d7}, [r1,:128]!
-+ vst1.64 {d16-d19}, [r3,:128]!
-+ b 1b
-+2: vst1.64 {d16-d19}, [r3,:128]!
-+ bx lr
-+ .endfunc
-+
- extern ff_vector_fmul_window_neon
- vld1.32 {d16[],d17[]}, [sp,:32]
- push {r4,r5,lr}
diff --git a/packages/mplayer/files/mru-neon-vorbis-inverse.diff b/packages/mplayer/files/mru-neon-vorbis-inverse.diff
deleted file mode 100644
index 6cd5dc0134..0000000000
--- a/packages/mplayer/files/mru-neon-vorbis-inverse.diff
+++ /dev/null
@@ -1,68 +0,0 @@
-From: Mans Rullgard <mans@mansr.com>
-Date: Fri, 1 Aug 2008 02:28:34 +0000 (+0100)
-Subject: ARM: NEON optimised vorbis_inverse_coupling
-X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=ac234c5ad52d8478be5aaa7c276e423873453d8b
-
-ARM: NEON optimised vorbis_inverse_coupling
----
-
-diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c
-index b584e5b..f9d32c0 100644
---- a/libavcodec/armv4l/dsputil_neon.c
-+++ b/libavcodec/armv4l/dsputil_neon.c
-@@ -94,6 +94,8 @@ void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride);
- void ff_float_to_int16_neon(int16_t *, const float *, long);
- void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int);
-
-+void ff_vorbis_inverse_coupling_neon(float *mag, float *ang, int blocksize);
-+
- void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
- {
- c->put_pixels_tab[0][0] = ff_put_pixels16_neon;
-@@ -164,4 +166,8 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
-
- c->float_to_int16 = ff_float_to_int16_neon;
- c->float_to_int16_interleave = ff_float_to_int16_interleave_neon;
-+
-+#ifdef CONFIG_VORBIS_DECODER
-+ c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_neon;
-+#endif
- }
-diff --git a/libavcodec/armv4l/dsputil_neon_s.S b/libavcodec/armv4l/dsputil_neon_s.S
-index 44f75ba..6a54803 100644
---- a/libavcodec/armv4l/dsputil_neon_s.S
-+++ b/libavcodec/armv4l/dsputil_neon_s.S
-@@ -19,6 +19,8 @@
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-+#include "config.h"
-+
- .fpu neon
- .text
-
-@@ -321,3 +323,24 @@ extern ff_float_to_int16_interleave_neon
- bne 3b
- pop {r4,r5,pc}
- .endfunc
-+
-+#ifdef CONFIG_VORBIS_DECODER
-+extern ff_vorbis_inverse_coupling_neon
-+ vmov.i32 q10, #(1<<31)
-+ dmb
-+1: vld1.64 {d2,d3}, [r1,:128]
-+ vld1.64 {d0,d1}, [r0,:128]
-+ vcle.f32 q8, q1, #0
-+ vand q9, q0, q10
-+ veor q1, q1, q9
-+ vand q2, q1, q8
-+ vbic q3, q1, q8
-+ vadd.f32 q1, q0, q2
-+ vsub.f32 q0, q0, q3
-+ subs r2, r2, #4
-+ vst1.64 {d0,d1}, [r1,:128]!
-+ vst1.64 {d2,d3}, [r0,:128]!
-+ bgt 1b
-+ bx lr
-+ .endfunc
-+#endif
diff --git a/packages/mplayer/files/omapfb.patch b/packages/mplayer/files/omapfb.patch
index 5c9bca7a6f..860cf070f4 100644
--- a/packages/mplayer/files/omapfb.patch
+++ b/packages/mplayer/files/omapfb.patch
@@ -1,5 +1,5 @@
---- a/libvo/video_out.c 2008-11-07 11:59:48.000000000 -0800
-+++ b/libvo/video_out.c 2008-11-07 12:01:52.000000000 -0800
+--- /tmp/video_out.c 2009-01-14 16:39:38.000000000 +0100
++++ trunk/libvo/video_out.c 2009-01-14 16:40:11.000000000 +0100
@@ -86,6 +86,7 @@
extern vo_functions_t video_out_bl;
extern vo_functions_t video_out_fbdev;
@@ -8,22 +8,3 @@
extern vo_functions_t video_out_svga;
extern vo_functions_t video_out_png;
extern vo_functions_t video_out_ggi;
-@@ -172,6 +173,7 @@
- #ifdef CONFIG_FBDEV
- &video_out_fbdev,
- &video_out_fbdev2,
-+ &video_out_omapfb,
- #endif
- #ifdef CONFIG_SVGALIB
- &video_out_svga,
---- a/configure 2008-11-07 12:00:32.000000000 -0800
-+++ b/configure 2008-11-07 12:13:31.000000000 -0800
-@@ -4558,7 +4558,7 @@
- fi
- if test "$_fbdev" = yes ; then
- _def_fbdev='#define CONFIG_FBDEV 1'
-- _vosrc="$_vosrc vo_fbdev.c vo_fbdev2.c"
-+ _vosrc="$_vosrc vo_fbdev.c vo_fbdev2.c vo_omapfb.c yuv.S"
- _vomodules="fbdev $_vomodules"
- else
- _def_fbdev='#undef CONFIG_FBDEV'
diff --git a/packages/mplayer/files/pld-onlyarm5-svn.patch b/packages/mplayer/files/pld-onlyarm5-svn.patch
new file mode 100644
index 0000000000..0924060c6c
--- /dev/null
+++ b/packages/mplayer/files/pld-onlyarm5-svn.patch
@@ -0,0 +1,405 @@
+--- MPlayer-1.0pre8/libavcodec/arm/dsputil_arm_s.S.orig 2006-07-03 09:53:33.000000000 +0100
++++ MPlayer-1.0pre8/libavcodec/arm/dsputil_arm_s.S 2006-07-03 10:06:58.000000000 +0100
+@@ -16,6 +16,13 @@
+ @ License along with this library; if not, write to the Free Software
+ @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ @
++#if defined(__ARM_ARCH_5__) || \
++ defined(__ARM_ARCH_5T__) || \
++ defined(__ARM_ARCH_5TE__)
++#define PLD(code...) code
++#else
++#define PLD(code...)
++#endif
+
+ .macro ADJ_ALIGN_QUADWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
+ mov \Rd0, \Rn0, lsr #(\shift * 8)
+@@ -74,7 +81,7 @@
+ put_pixels16_arm:
+ @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+ @ block = word aligned, pixles = unaligned
+- pld [r1]
++ PLD ( pld [r1] )
+ stmfd sp!, {r4-r11, lr} @ R14 is also called LR
+ adr r5, 5f
+ ands r4, r1, #3
+@@ -85,7 +92,7 @@
+ ldmia r1, {r4-r7}
+ add r1, r1, r2
+ stmia r0, {r4-r7}
+- pld [r1]
++ PLD ( pld [r1] )
+ subs r3, r3, #1
+ add r0, r0, r2
+ bne 1b
+@@ -95,7 +102,7 @@
+ ldmia r1, {r4-r8}
+ add r1, r1, r2
+ ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
+- pld [r1]
++ PLD ( pld [r1] )
+ subs r3, r3, #1
+ stmia r0, {r9-r12}
+ add r0, r0, r2
+@@ -106,7 +113,7 @@
+ ldmia r1, {r4-r8}
+ add r1, r1, r2
+ ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
+- pld [r1]
++ PLD ( pld [r1] )
+ subs r3, r3, #1
+ stmia r0, {r9-r12}
+ add r0, r0, r2
+@@ -117,7 +124,7 @@
+ ldmia r1, {r4-r8}
+ add r1, r1, r2
+ ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
+- pld [r1]
++ PLD ( pld [r1] )
+ subs r3, r3, #1
+ stmia r0, {r9-r12}
+ add r0, r0, r2
+@@ -136,7 +143,7 @@
+ put_pixels8_arm:
+ @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+ @ block = word aligned, pixles = unaligned
+- pld [r1]
++ PLD ( pld [r1] )
+ stmfd sp!, {r4-r5,lr} @ R14 is also called LR
+ adr r5, 5f
+ ands r4, r1, #3
+@@ -147,7 +154,7 @@
+ ldmia r1, {r4-r5}
+ add r1, r1, r2
+ subs r3, r3, #1
+- pld [r1]
++ PLD ( pld [r1] )
+ stmia r0, {r4-r5}
+ add r0, r0, r2
+ bne 1b
+@@ -157,7 +164,7 @@
+ ldmia r1, {r4-r5, r12}
+ add r1, r1, r2
+ ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12
+- pld [r1]
++ PLD ( pld [r1] )
+ subs r3, r3, #1
+ stmia r0, {r4-r5}
+ add r0, r0, r2
+@@ -168,7 +175,7 @@
+ ldmia r1, {r4-r5, r12}
+ add r1, r1, r2
+ ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12
+- pld [r1]
++ PLD ( pld [r1] )
+ subs r3, r3, #1
+ stmia r0, {r4-r5}
+ add r0, r0, r2
+@@ -179,7 +186,7 @@
+ ldmia r1, {r4-r5, r12}
+ add r1, r1, r2
+ ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12
+- pld [r1]
++ PLD ( pld [r1] )
+ subs r3, r3, #1
+ stmia r0, {r4-r5}
+ add r0, r0, r2
+@@ -198,7 +205,7 @@
+ put_pixels8_x2_arm:
+ @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+ @ block = word aligned, pixles = unaligned
+- pld [r1]
++ PLD ( pld [r1] )
+ stmfd sp!, {r4-r10,lr} @ R14 is also called LR
+ adr r5, 5f
+ ands r4, r1, #3
+@@ -210,7 +217,7 @@
+ ldmia r1, {r4-r5, r10}
+ add r1, r1, r2
+ ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
+- pld [r1]
++ PLD ( pld [r1] )
+ RND_AVG32 r8, r9, r4, r5, r6, r7, r12
+ subs r3, r3, #1
+ stmia r0, {r8-r9}
+@@ -223,7 +230,7 @@
+ add r1, r1, r2
+ ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
+ ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
+- pld [r1]
++ PLD ( pld [r1] )
+ RND_AVG32 r4, r5, r6, r7, r8, r9, r12
+ subs r3, r3, #1
+ stmia r0, {r4-r5}
+@@ -236,7 +243,7 @@
+ add r1, r1, r2
+ ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
+ ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
+- pld [r1]
++ PLD ( pld [r1] )
+ RND_AVG32 r4, r5, r6, r7, r8, r9, r12
+ subs r3, r3, #1
+ stmia r0, {r4-r5}
+@@ -248,7 +255,7 @@
+ ldmia r1, {r4-r5, r10}
+ add r1, r1, r2
+ ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
+- pld [r1]
++ PLD ( pld [r1] )
+ RND_AVG32 r8, r9, r6, r7, r5, r10, r12
+ subs r3, r3, #1
+ stmia r0, {r8-r9}
+@@ -267,7 +274,7 @@
+ put_no_rnd_pixels8_x2_arm:
+ @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+ @ block = word aligned, pixles = unaligned
+- pld [r1]
++ PLD ( pld [r1] )
+ stmfd sp!, {r4-r10,lr} @ R14 is also called LR
+ adr r5, 5f
+ ands r4, r1, #3
+@@ -279,7 +286,7 @@
+ ldmia r1, {r4-r5, r10}
+ add r1, r1, r2
+ ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
+- pld [r1]
++ PLD ( pld [r1] )
+ NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
+ subs r3, r3, #1
+ stmia r0, {r8-r9}
+@@ -292,7 +299,7 @@
+ add r1, r1, r2
+ ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
+ ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
+- pld [r1]
++ PLD ( pld [r1] )
+ NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
+ subs r3, r3, #1
+ stmia r0, {r4-r5}
+@@ -305,7 +312,7 @@
+ add r1, r1, r2
+ ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
+ ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
+- pld [r1]
++ PLD ( pld [r1] )
+ NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
+ subs r3, r3, #1
+ stmia r0, {r4-r5}
+@@ -317,7 +324,7 @@
+ ldmia r1, {r4-r5, r10}
+ add r1, r1, r2
+ ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
+- pld [r1]
++ PLD ( pld [r1] )
+ NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12
+ subs r3, r3, #1
+ stmia r0, {r8-r9}
+@@ -338,7 +345,7 @@
+ put_pixels8_y2_arm:
+ @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+ @ block = word aligned, pixles = unaligned
+- pld [r1]
++ PLD ( pld [r1] )
+ stmfd sp!, {r4-r11,lr} @ R14 is also called LR
+ adr r5, 5f
+ ands r4, r1, #3
+@@ -352,13 +359,13 @@
+ add r1, r1, r2
+ 6: ldmia r1, {r6-r7}
+ add r1, r1, r2
+- pld [r1]
++ PLD ( pld [r1] )
+ RND_AVG32 r8, r9, r4, r5, r6, r7, r12
+ ldmia r1, {r4-r5}
+ add r1, r1, r2
+ stmia r0, {r8-r9}
+ add r0, r0, r2
+- pld [r1]
++ PLD ( pld [r1] )
+ RND_AVG32 r8, r9, r6, r7, r4, r5, r12
+ subs r3, r3, #1
+ stmia r0, {r8-r9}
+@@ -369,18 +376,18 @@
+ 2:
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+- pld [r1]
++ PLD ( pld [r1] )
+ ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
+ 6: ldmia r1, {r7-r9}
+ add r1, r1, r2
+- pld [r1]
++ PLD ( pld [r1] )
+ ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
+ RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+ stmia r0, {r10-r11}
+ add r0, r0, r2
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+- pld [r1]
++ PLD ( pld [r1] )
+ ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
+ subs r3, r3, #1
+ RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+@@ -392,18 +399,18 @@
+ 3:
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+- pld [r1]
++ PLD ( pld [r1] )
+ ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
+ 6: ldmia r1, {r7-r9}
+ add r1, r1, r2
+- pld [r1]
++ PLD ( pld [r1] )
+ ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
+ RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+ stmia r0, {r10-r11}
+ add r0, r0, r2
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+- pld [r1]
++ PLD ( pld [r1] )
+ ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
+ subs r3, r3, #1
+ RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+@@ -415,18 +422,18 @@
+ 4:
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+- pld [r1]
++ PLD ( pld [r1] )
+ ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
+ 6: ldmia r1, {r7-r9}
+ add r1, r1, r2
+- pld [r1]
++ PLD ( pld [r1] )
+ ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
+ RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+ stmia r0, {r10-r11}
+ add r0, r0, r2
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+- pld [r1]
++ PLD ( pld [r1] )
+ ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
+ subs r3, r3, #1
+ RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+@@ -447,7 +454,7 @@
+ put_no_rnd_pixels8_y2_arm:
+ @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+ @ block = word aligned, pixles = unaligned
+- pld [r1]
++ PLD ( pld [r1] )
+ stmfd sp!, {r4-r11,lr} @ R14 is also called LR
+ adr r5, 5f
+ ands r4, r1, #3
+@@ -461,13 +468,13 @@
+ add r1, r1, r2
+ 6: ldmia r1, {r6-r7}
+ add r1, r1, r2
+- pld [r1]
++ PLD ( pld [r1] )
+ NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
+ ldmia r1, {r4-r5}
+ add r1, r1, r2
+ stmia r0, {r8-r9}
+ add r0, r0, r2
+- pld [r1]
++ PLD ( pld [r1] )
+ NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12
+ subs r3, r3, #1
+ stmia r0, {r8-r9}
+@@ -478,18 +485,18 @@
+ 2:
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+- pld [r1]
++ PLD ( pld [r1] )
+ ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
+ 6: ldmia r1, {r7-r9}
+ add r1, r1, r2
+- pld [r1]
++ PLD ( pld [r1] )
+ ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
+ NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+ stmia r0, {r10-r11}
+ add r0, r0, r2
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+- pld [r1]
++ PLD ( pld [r1] )
+ ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
+ subs r3, r3, #1
+ NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+@@ -501,18 +508,18 @@
+ 3:
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+- pld [r1]
++ PLD ( pld [r1] )
+ ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
+ 6: ldmia r1, {r7-r9}
+ add r1, r1, r2
+- pld [r1]
++ PLD ( pld [r1] )
+ ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
+ NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+ stmia r0, {r10-r11}
+ add r0, r0, r2
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+- pld [r1]
++ PLD ( pld [r1] )
+ ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
+ subs r3, r3, #1
+ NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+@@ -524,18 +531,18 @@
+ 4:
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+- pld [r1]
++ PLD ( pld [r1] )
+ ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
+ 6: ldmia r1, {r7-r9}
+ add r1, r1, r2
+- pld [r1]
++ PLD ( pld [r1] )
+ ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
+ NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+ stmia r0, {r10-r11}
+ add r0, r0, r2
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+- pld [r1]
++ PLD ( pld [r1] )
+ ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
+ subs r3, r3, #1
+ NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+@@ -562,7 +569,7 @@
+ ldmia r1, {r8-r10}
+ .endif
+ add r1, r1, r2
+- pld [r1]
++ PLD ( pld [r1] )
+ .if \align == 0
+ ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8
+ .elseif \align == 1
+@@ -624,7 +631,7 @@
+ put_pixels8_xy2_arm:
+ @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+ @ block = word aligned, pixles = unaligned
+- pld [r1]
++ PLD ( pld [r1] )
+ stmfd sp!, {r4-r11,lr} @ R14 is also called LR
+ adrl r12, 5f
+ ands r4, r1, #3
+@@ -661,7 +668,7 @@
+ put_no_rnd_pixels8_xy2_arm:
+ @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+ @ block = word aligned, pixles = unaligned
+- pld [r1]
++ PLD ( pld [r1] )
+ stmfd sp!, {r4-r11,lr} @ R14 is also called LR
+ adrl r12, 5f
+ ands r4, r1, #3