From 90fe93c652fb493f3cfe6ef83123bee2b3bd8b3d Mon Sep 17 00:00:00 2001 From: Koen Kooi Date: Wed, 14 Jan 2009 17:54:40 +0100 Subject: mplayer: update to a more recent svn versions, enable theora and dvd support --- conf/distro/include/sane-srcrevs.inc | 2 +- .../mplayer/files/mru-neon-float-to-int16.diff | 107 -- packages/mplayer/files/mru-neon-h264-chrome.diff | 364 ------- .../mplayer/files/mru-neon-h264-loopfilter.diff | 346 ------- packages/mplayer/files/mru-neon-h264-qpel.diff | 1040 -------------------- packages/mplayer/files/mru-neon-h264idct-dc.diff | 55 -- packages/mplayer/files/mru-neon-h264idctadd.diff | 123 --- packages/mplayer/files/mru-neon-put-pixels.diff | 376 ------- packages/mplayer/files/mru-neon-simple-idct.diff | 501 ---------- .../mplayer/files/mru-neon-vector-fmul-window.diff | 86 -- packages/mplayer/files/mru-neon-vector-fmul.diff | 56 -- .../mplayer/files/mru-neon-vorbis-inverse.diff | 68 -- packages/mplayer/files/omapfb.patch | 23 +- packages/mplayer/files/pld-onlyarm5-svn.patch | 405 ++++++++ packages/mplayer/mplayer_svn.bb | 235 +++-- 15 files changed, 522 insertions(+), 3265 deletions(-) delete mode 100644 packages/mplayer/files/mru-neon-float-to-int16.diff delete mode 100644 packages/mplayer/files/mru-neon-h264-chrome.diff delete mode 100644 packages/mplayer/files/mru-neon-h264-loopfilter.diff delete mode 100644 packages/mplayer/files/mru-neon-h264-qpel.diff delete mode 100644 packages/mplayer/files/mru-neon-h264idct-dc.diff delete mode 100644 packages/mplayer/files/mru-neon-h264idctadd.diff delete mode 100644 packages/mplayer/files/mru-neon-put-pixels.diff delete mode 100644 packages/mplayer/files/mru-neon-simple-idct.diff delete mode 100644 packages/mplayer/files/mru-neon-vector-fmul-window.diff delete mode 100644 packages/mplayer/files/mru-neon-vector-fmul.diff delete mode 100644 packages/mplayer/files/mru-neon-vorbis-inverse.diff create mode 100644 packages/mplayer/files/pld-onlyarm5-svn.patch diff --git a/conf/distro/include/sane-srcrevs.inc b/conf/distro/include/sane-srcrevs.inc index cc367f8b26..67b0f9b972 100644 --- a/conf/distro/include/sane-srcrevs.inc +++ b/conf/distro/include/sane-srcrevs.inc @@ -114,7 +114,7 @@ SRCREV_pn-moblin-proto ?= "8f2cb524fe06555182c25b4ba3202d7b368ac0ce" SRCREV_pn-moko-gtk-engine ?= "4734" SRCREV_pn-mokoko ?= "127" SRCREV_pn-mpd-alsa ?= "6952" -SRCREV_pn-mplayer ?= "27659" +SRCREV_pn-mplayer ?= "28311" SRCREV_pn-mplayer-maemo ?= "342" SRCREV_pn-multicat ?= "eb39ce7fb81bfa64e1a9eb5f142ca3d1065be3fa" SRCREV_pn-multitap-pad ?= "373" diff --git a/packages/mplayer/files/mru-neon-float-to-int16.diff b/packages/mplayer/files/mru-neon-float-to-int16.diff deleted file mode 100644 index 7a874cab30..0000000000 --- a/packages/mplayer/files/mru-neon-float-to-int16.diff +++ /dev/null @@ -1,107 +0,0 @@ -From: Mans Rullgard -Date: Thu, 31 Jul 2008 02:35:42 +0000 (+0100) -Subject: ARM: NEON optimised float_to_int16 -X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=f16a738cfc3307cbcba2f9c8aff4b5aa43144731 - -ARM: NEON optimised float_to_int16 ---- - -diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c -index 6dbe835..b584e5b 100644 ---- a/libavcodec/armv4l/dsputil_neon.c -+++ b/libavcodec/armv4l/dsputil_neon.c -@@ -91,6 +91,9 @@ void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, - void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride); - void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride); - -+void ff_float_to_int16_neon(int16_t *, const float *, long); -+void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int); -+ - void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) - { - c->put_pixels_tab[0][0] = ff_put_pixels16_neon; -@@ -158,4 +161,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) - - c->h264_idct_add = ff_h264_idct_add_neon; - c->h264_idct_dc_add = ff_h264_idct_dc_add_neon; -+ -+ c->float_to_int16 = ff_float_to_int16_neon; -+ c->float_to_int16_interleave = ff_float_to_int16_interleave_neon; - } -diff --git a/libavcodec/armv4l/dsputil_neon_s.S b/libavcodec/armv4l/dsputil_neon_s.S -index fc5e401..44f75ba 100644 ---- a/libavcodec/armv4l/dsputil_neon_s.S -+++ b/libavcodec/armv4l/dsputil_neon_s.S -@@ -252,3 +252,72 @@ - defun2 put_pixels8_x2, _no_rnd, vhadd.u8 - defun2 put_pixels8_y2, _no_rnd, vhadd.u8 - defun2 put_pixels8_xy2, _no_rnd, vshrn.u16, 1 -+ -+extern ff_float_to_int16_neon -+ dmb -+1: vld1.64 {d0-d3}, [r1,:128]! -+ vcvt.s32.f32 q2, q0 -+ vcvt.s32.f32 q3, q1 -+ subs r2, r2, #8 -+ vqmovn.s32 d4, q2 -+ vqmovn.s32 d5, q3 -+ vst1.64 {d4-d5}, [r0,:128]! -+ bgt 1b -+ bx lr -+ .endfunc -+ -+extern ff_float_to_int16_interleave_neon -+ cmp r3, #2 -+ ldrlt r1, [r1] -+ blt ff_float_to_int16_neon -+ bne 2f -+ -+ ldr ip, [r1] -+ ldr r1, [r1, #4] -+ vld1.64 {d0-d3}, [ip,:128]! -+ vld1.64 {d4-d7}, [r1,:128]! -+ dmb -+1: vcvt.s32.f32 q8, q0 -+ vcvt.s32.f32 q9, q1 -+ vcvt.s32.f32 q10, q2 -+ vcvt.s32.f32 q11, q3 -+ subs r2, r2, #8 -+ vqmovn.s32 d16, q8 -+ vqmovn.s32 d17, q9 -+ vqmovn.s32 d18, q10 -+ vqmovn.s32 d19, q11 -+ beq 1f -+ vld1.64 {d0-d3}, [ip,:128]! -+ vld1.64 {d4-d7}, [r1,:128]! -+ vst2.16 {d16-d19}, [r0,:64]! -+ b 1b -+1: vst2.16 {d16-d19}, [r0,:64]! -+ bx lr -+ -+2: push {r4,r5,lr} -+ lsls r4, r3, #1 -+ dmb -+ b 4f -+3: vld1.64 {d0-d3}, [ip,:128]! -+ vcvt.s32.f32 q2, q0 -+ vcvt.s32.f32 q3, q1 -+ subs lr, lr, #8 -+ vqmovn.s32 d4, q2 -+ vqmovn.s32 d5, q3 -+ vst1.16 {d4[0]}, [r5,:16], r4 -+ vst1.16 {d4[1]}, [r5,:16], r4 -+ vst1.16 {d4[2]}, [r5,:16], r4 -+ vst1.16 {d4[3]}, [r5,:16], r4 -+ vst1.16 {d5[0]}, [r5,:16], r4 -+ vst1.16 {d5[1]}, [r5,:16], r4 -+ vst1.16 {d5[2]}, [r5,:16], r4 -+ vst1.16 {d5[3]}, [r5,:16], r4 -+ bgt 3b -+ subs r3, r3, #1 -+4: ldr ip, [r1], #4 -+ mov lr, r2 -+ mov r5, r0 -+ add r0, r0, #2 -+ bne 3b -+ pop {r4,r5,pc} -+ .endfunc diff --git a/packages/mplayer/files/mru-neon-h264-chrome.diff b/packages/mplayer/files/mru-neon-h264-chrome.diff deleted file mode 100644 index cb6c4ff991..0000000000 --- a/packages/mplayer/files/mru-neon-h264-chrome.diff +++ /dev/null @@ -1,364 +0,0 @@ -From: Mans Rullgard -Date: Fri, 11 Jul 2008 01:20:07 +0000 (+0100) -Subject: ARM: NEON optimised {put,avg}_h264_chroma_mc[48] -X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=d3aa8f93b8a0061e0c3ac12aeed055961abfc113 - -ARM: NEON optimised {put,avg}_h264_chroma_mc[48] ---- - -diff --git a/libavcodec/Makefile b/libavcodec/Makefile -index 7fa02fa..36ba158 100644 ---- a/libavcodec/Makefile -+++ b/libavcodec/Makefile -@@ -437,6 +437,7 @@ OBJS-$(HAVE_NEON) += armv4l/dsputil_neon.o \ - - ASM_OBJS-$(HAVE_NEON) += armv4l/dsputil_neon_s.o \ - armv4l/simple_idct_neon.o \ -+ armv4l/h264dsp_neon.o \ - - OBJS-$(HAVE_VIS) += sparc/dsputil_vis.o \ - sparc/simple_idct_vis.o \ -diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c -index 8a10dde..a6d86cd 100644 ---- a/libavcodec/armv4l/dsputil_neon.c -+++ b/libavcodec/armv4l/dsputil_neon.c -@@ -42,6 +42,12 @@ void ff_put_pixels8_xy2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); - void ff_put_h264_qpel16_mc00_neon(uint8_t *, uint8_t *, int); - void ff_put_h264_qpel8_mc00_neon(uint8_t *, uint8_t *, int); - -+void ff_put_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int); -+void ff_put_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); -+ -+void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int); -+void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); -+ - void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) - { - c->put_pixels_tab[0][0] = ff_put_pixels16_neon; -@@ -62,6 +68,12 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) - c->put_no_rnd_pixels_tab[1][2] = ff_put_pixels8_y2_no_rnd_neon; - c->put_no_rnd_pixels_tab[1][3] = ff_put_pixels8_xy2_no_rnd_neon; - -+ c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon; -+ c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon; -+ -+ c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_neon; -+ c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_neon; -+ - c->put_h264_qpel_pixels_tab[0][0] = ff_put_h264_qpel16_mc00_neon; - c->put_h264_qpel_pixels_tab[1][0] = ff_put_h264_qpel8_mc00_neon; - } -diff --git a/libavcodec/armv4l/h264dsp_neon.S b/libavcodec/armv4l/h264dsp_neon.S -new file mode 100644 -index 0000000..28d9aa7 ---- /dev/null -+++ b/libavcodec/armv4l/h264dsp_neon.S -@@ -0,0 +1,308 @@ -+/* -+ * Copyright (c) 2008 Mans Rullgard -+ * -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+ .fpu neon -+ -+/* chroma_mc8(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */ -+ .macro h264_chroma_mc8 avg=0 -+ push {r4-r7, lr} -+ ldrd r4, [sp, #20] -+.if \avg -+ mov lr, r0 -+.endif -+ pld [r1] -+ pld [r1, r2] -+ -+ muls r7, r4, r5 -+ rsb r6, r7, r5, lsl #3 -+ rsb ip, r7, r4, lsl #3 -+ sub r4, r7, r4, lsl #3 -+ sub r4, r4, r5, lsl #3 -+ add r4, r4, #64 -+ -+ dmb -+ -+ beq 2f -+ -+ add r5, r1, r2 -+ -+ vdup.8 d0, r4 -+ lsl r4, r2, #1 -+ vdup.8 d1, ip -+ vld1.64 {d4, d5}, [r1], r4 -+ vdup.8 d2, r6 -+ vld1.64 {d6, d7}, [r5], r4 -+ vdup.8 d3, r7 -+ -+ vext.8 d5, d4, d5, #1 -+ vext.8 d7, d6, d7, #1 -+ -+1: pld [r5] -+ vmull.u8 q8, d4, d0 -+ vmlal.u8 q8, d5, d1 -+ vld1.64 {d4, d5}, [r1], r4 -+ vmlal.u8 q8, d6, d2 -+ vext.8 d5, d4, d5, #1 -+ vmlal.u8 q8, d7, d3 -+ vmull.u8 q9, d6, d0 -+ subs r3, r3, #2 -+ vmlal.u8 q9, d7, d1 -+ vmlal.u8 q9, d4, d2 -+ vmlal.u8 q9, d5, d3 -+ vrshrn.u16 d16, q8, #6 -+ vld1.64 {d6, d7}, [r5], r4 -+ pld [r1] -+ vrshrn.u16 d17, q9, #6 -+.if \avg -+ vld1.64 {d20}, [lr,:64], r2 -+ vld1.64 {d21}, [lr,:64], r2 -+ vrhadd.u8 q8, q8, q10 -+.endif -+ vext.8 d7, d6, d7, #1 -+ vst1.64 {d16}, [r0,:64], r2 -+ vst1.64 {d17}, [r0,:64], r2 -+ bgt 1b -+ -+ pop {r4-r7, pc} -+ -+2: tst r6, r6 -+ add ip, ip, r6 -+ vdup.8 d0, r4 -+ vdup.8 d1, ip -+ -+ beq 4f -+ -+ add r5, r1, r2 -+ lsl r4, r2, #1 -+ vld1.64 {d4}, [r1], r4 -+ vld1.64 {d6}, [r5], r4 -+ -+3: pld [r5] -+ vmull.u8 q8, d4, d0 -+ vmlal.u8 q8, d6, d1 -+ vld1.64 {d4}, [r1], r4 -+ vmull.u8 q9, d6, d0 -+ vmlal.u8 q9, d4, d1 -+ vld1.64 {d6}, [r5], r4 -+ vrshrn.u16 d16, q8, #6 -+ vrshrn.u16 d17, q9, #6 -+.if \avg -+ vld1.64 {d20}, [lr,:64], r2 -+ vld1.64 {d21}, [lr,:64], r2 -+ vrhadd.u8 q8, q8, q10 -+.endif -+ subs r3, r3, #2 -+ pld [r1] -+ vst1.64 {d16}, [r0,:64], r2 -+ vst1.64 {d17}, [r0,:64], r2 -+ bgt 3b -+ -+ pop {r4-r7, pc} -+ -+4: vld1.64 {d4, d5}, [r1], r2 -+ vld1.64 {d6, d7}, [r1], r2 -+ vext.8 d5, d4, d5, #1 -+ vext.8 d7, d6, d7, #1 -+ -+5: pld [r1] -+ subs r3, r3, #2 -+ vmull.u8 q8, d4, d0 -+ vmlal.u8 q8, d5, d1 -+ vld1.64 {d4, d5}, [r1], r2 -+ vmull.u8 q9, d6, d0 -+ vmlal.u8 q9, d7, d1 -+ pld [r1] -+ vext.8 d5, d4, d5, #1 -+ vrshrn.u16 d16, q8, #6 -+ vrshrn.u16 d17, q9, #6 -+.if \avg -+ vld1.64 {d20}, [lr,:64], r2 -+ vld1.64 {d21}, [lr,:64], r2 -+ vrhadd.u8 q8, q8, q10 -+.endif -+ vld1.64 {d6, d7}, [r1], r2 -+ vext.8 d7, d6, d7, #1 -+ vst1.64 {d16}, [r0,:64], r2 -+ vst1.64 {d17}, [r0,:64], r2 -+ bgt 5b -+ -+ pop {r4-r7, pc} -+ .endm -+ -+/* chroma_mc4(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */ -+ .macro h264_chroma_mc4 avg=0 -+ push {r4-r7, lr} -+ ldrd r4, [sp, #20] -+.if \avg -+ mov lr, r0 -+.endif -+ pld [r1] -+ pld [r1, r2] -+ -+ muls r7, r4, r5 -+ rsb r6, r7, r5, lsl #3 -+ rsb ip, r7, r4, lsl #3 -+ sub r4, r7, r4, lsl #3 -+ sub r4, r4, r5, lsl #3 -+ add r4, r4, #64 -+ -+ dmb -+ -+ beq 2f -+ -+ add r5, r1, r2 -+ -+ vdup.8 d0, r4 -+ lsl r4, r2, #1 -+ vdup.8 d1, ip -+ vld1.64 {d4}, [r1], r4 -+ vdup.8 d2, r6 -+ vld1.64 {d6}, [r5], r4 -+ vdup.8 d3, r7 -+ -+ vext.8 d5, d4, d5, #1 -+ vext.8 d7, d6, d7, #1 -+ vtrn.32 d4, d5 -+ vtrn.32 d6, d7 -+ -+ vtrn.32 d0, d1 -+ vtrn.32 d2, d3 -+ -+1: pld [r5] -+ vmull.u8 q8, d4, d0 -+ vmlal.u8 q8, d6, d2 -+ vld1.64 {d4}, [r1], r4 -+ vext.8 d5, d4, d5, #1 -+ vtrn.32 d4, d5 -+ vmull.u8 q9, d6, d0 -+ vmlal.u8 q9, d4, d2 -+ vld1.64 {d6}, [r5], r4 -+ vadd.i16 d16, d16, d17 -+ vadd.i16 d17, d18, d19 -+ vrshrn.u16 d16, q8, #6 -+ subs r3, r3, #2 -+ pld [r1] -+.if \avg -+ vld1.32 {d20[0]}, [lr,:32], r2 -+ vld1.32 {d20[1]}, [lr,:32], r2 -+ vrhadd.u8 d16, d16, d20 -+.endif -+ vext.8 d7, d6, d7, #1 -+ vtrn.32 d6, d7 -+ vst1.32 {d16[0]}, [r0,:32], r2 -+ vst1.32 {d16[1]}, [r0,:32], r2 -+ bgt 1b -+ -+ pop {r4-r7, pc} -+ -+2: tst r6, r6 -+ add ip, ip, r6 -+ vdup.8 d0, r4 -+ vdup.8 d1, ip -+ vtrn.32 d0, d1 -+ -+ beq 4f -+ -+ vext.32 d1, d0, d1, #1 -+ add r5, r1, r2 -+ lsl r4, r2, #1 -+ vld1.32 {d4[0]}, [r1], r4 -+ vld1.32 {d4[1]}, [r5], r4 -+ -+3: pld [r5] -+ vmull.u8 q8, d4, d0 -+ vld1.32 {d4[0]}, [r1], r4 -+ vmull.u8 q9, d4, d1 -+ vld1.32 {d4[1]}, [r5], r4 -+ vadd.i16 d16, d16, d17 -+ vadd.i16 d17, d18, d19 -+ vrshrn.u16 d16, q8, #6 -+.if \avg -+ vld1.32 {d20[0]}, [lr,:32], r2 -+ vld1.32 {d20[1]}, [lr,:32], r2 -+ vrhadd.u8 d16, d16, d20 -+.endif -+ subs r3, r3, #2 -+ pld [r1] -+ vst1.32 {d16[0]}, [r0,:32], r2 -+ vst1.32 {d16[1]}, [r0,:32], r2 -+ bgt 3b -+ -+ pop {r4-r7, pc} -+ -+4: vld1.64 {d4}, [r1], r2 -+ vld1.64 {d6}, [r1], r2 -+ vext.8 d5, d4, d5, #1 -+ vext.8 d7, d6, d7, #1 -+ vtrn.32 d4, d5 -+ vtrn.32 d6, d7 -+ -+5: vmull.u8 q8, d4, d0 -+ vmull.u8 q9, d6, d0 -+ subs r3, r3, #2 -+ vld1.64 {d4}, [r1], r2 -+ vext.8 d5, d4, d5, #1 -+ vtrn.32 d4, d5 -+ vadd.i16 d16, d16, d17 -+ vadd.i16 d17, d18, d19 -+ pld [r1] -+ vrshrn.u16 d16, q8, #6 -+.if \avg -+ vld1.32 {d20[0]}, [lr,:32], r2 -+ vld1.32 {d20[1]}, [lr,:32], r2 -+ vrhadd.u8 d16, d16, d20 -+.endif -+ vld1.64 {d6}, [r1], r2 -+ vext.8 d7, d6, d7, #1 -+ vtrn.32 d6, d7 -+ pld [r1] -+ vst1.32 {d16[0]}, [r0,:32], r2 -+ vst1.32 {d16[1]}, [r0,:32], r2 -+ bgt 5b -+ -+ pop {r4-r7, pc} -+ .endm -+ -+ .text -+ .align -+ -+ .global ff_put_h264_chroma_mc8_neon -+ .func ff_put_h264_chroma_mc8_neon -+ff_put_h264_chroma_mc8_neon: -+ h264_chroma_mc8 -+ .endfunc -+ -+ .global ff_avg_h264_chroma_mc8_neon -+ .func ff_avg_h264_chroma_mc8_neon -+ff_avg_h264_chroma_mc8_neon: -+ h264_chroma_mc8 avg=1 -+ .endfunc -+ -+ .global ff_put_h264_chroma_mc4_neon -+ .func ff_put_h264_chroma_mc4_neon -+ff_put_h264_chroma_mc4_neon: -+ h264_chroma_mc4 -+ .endfunc -+ -+ .global ff_avg_h264_chroma_mc4_neon -+ .func ff_avg_h264_chroma_mc4_neon -+ff_avg_h264_chroma_mc4_neon: -+ h264_chroma_mc4 avg=1 -+ .endfunc diff --git a/packages/mplayer/files/mru-neon-h264-loopfilter.diff b/packages/mplayer/files/mru-neon-h264-loopfilter.diff deleted file mode 100644 index 056702517b..0000000000 --- a/packages/mplayer/files/mru-neon-h264-loopfilter.diff +++ /dev/null @@ -1,346 +0,0 @@ -From: Mans Rullgard -Date: Fri, 15 Aug 2008 00:02:55 +0000 (+0100) -Subject: ARM: NEON optimised H.264 loop filter -X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=0c1b6bb0814587bd4c8a895c6d7dc2dd4cc2841a - -ARM: NEON optimised H.264 loop filter ---- - -diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c -index a6d86cd..68ecbe8 100644 ---- a/libavcodec/armv4l/dsputil_neon.c -+++ b/libavcodec/armv4l/dsputil_neon.c -@@ -48,6 +48,15 @@ void ff_put_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); - void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int); - void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); - -+void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha, -+ int beta, int8_t *tc0); -+void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha, -+ int beta, int8_t *tc0); -+void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, -+ int beta, int8_t *tc0); -+void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, -+ int beta, int8_t *tc0); -+ - void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) - { - c->put_pixels_tab[0][0] = ff_put_pixels16_neon; -@@ -76,4 +85,9 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) - - c->put_h264_qpel_pixels_tab[0][0] = ff_put_h264_qpel16_mc00_neon; - c->put_h264_qpel_pixels_tab[1][0] = ff_put_h264_qpel8_mc00_neon; -+ -+ c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon; -+ c->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon; -+ c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon; -+ c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon; - } -diff --git a/libavcodec/armv4l/h264dsp_neon.S b/libavcodec/armv4l/h264dsp_neon.S -index 28d9aa7..ac793b2 100644 ---- a/libavcodec/armv4l/h264dsp_neon.S -+++ b/libavcodec/armv4l/h264dsp_neon.S -@@ -306,3 +306,303 @@ ff_put_h264_chroma_mc4_neon: - ff_avg_h264_chroma_mc4_neon: - h264_chroma_mc4 avg=1 - .endfunc -+ -+ /* H.264 loop filter */ -+ -+ .macro h264_loop_filter_start -+ ldr ip, [sp] -+ tst r2, r2 -+ ldr ip, [ip] -+ tstne r3, r3 -+ vmov.32 d24[0], ip -+ and ip, ip, ip, lsl #16 -+ bxeq lr -+ ands ip, ip, ip, lsl #8 -+ bxlt lr -+ .endm -+ -+ .macro align_push_regs -+ and ip, sp, #15 -+ add ip, ip, #32 -+ sub sp, sp, ip -+ dmb -+ vst1.64 {d12-d15}, [sp,:128] -+ sub sp, sp, #32 -+ vst1.64 {d8-d11}, [sp,:128] -+ .endm -+ -+ .macro align_pop_regs -+ vld1.64 {d8-d11}, [sp,:128]! -+ vld1.64 {d12-d15}, [sp,:128], ip -+ .endm -+ -+ .macro h264_loop_filter_luma -+ vdup.8 q11, r2 @ alpha -+ vmovl.u8 q12, d24 -+ vabd.u8 q6, q8, q0 @ abs(p0 - q0) -+ vmovl.u16 q12, d24 -+ vabd.u8 q14, q9, q8 @ abs(p1 - p0) -+ vsli.16 q12, q12, #8 -+ vabd.u8 q15, q1, q0 @ abs(q1 - q0) -+ vsli.32 q12, q12, #16 -+ vclt.u8 q6, q6, q11 @ < alpha -+ vdup.8 q11, r3 @ beta -+ vclt.s8 q7, q12, #0 -+ vclt.u8 q14, q14, q11 @ < beta -+ vclt.u8 q15, q15, q11 @ < beta -+ vbic q6, q6, q7 -+ vabd.u8 q4, q10, q8 @ abs(p2 - p0) -+ vand q6, q6, q14 -+ vabd.u8 q5, q2, q0 @ abs(q2 - q0) -+ vclt.u8 q4, q4, q11 @ < beta -+ vand q6, q6, q15 -+ vclt.u8 q5, q5, q11 @ < beta -+ vand q4, q4, q6 -+ vand q5, q5, q6 -+ vand q12, q12, q6 -+ vrhadd.u8 q14, q8, q0 -+ vsub.i8 q6, q12, q4 -+ vqadd.u8 q7, q9, q12 -+ vhadd.u8 q10, q10, q14 -+ vsub.i8 q6, q6, q5 -+ vhadd.u8 q14, q2, q14 -+ vmin.u8 q7, q7, q10 -+ vqsub.u8 q11, q9, q12 -+ vqadd.u8 q2, q1, q12 -+ vmax.u8 q7, q7, q11 -+ vqsub.u8 q11, q1, q12 -+ vmin.u8 q14, q2, q14 -+ vmovl.u8 q2, d0 -+ vmax.u8 q14, q14, q11 -+ vmovl.u8 q10, d1 -+ vsubw.u8 q2, q2, d16 -+ vsubw.u8 q10, q10, d17 -+ vshl.i16 q2, q2, #2 -+ vshl.i16 q10, q10, #2 -+ vaddw.u8 q2, q2, d18 -+ vaddw.u8 q10, q10, d19 -+ vsubw.u8 q2, q2, d2 -+ vsubw.u8 q10, q10, d3 -+ vrshrn.i16 d4, q2, #3 -+ vrshrn.i16 d5, q10, #3 -+ vbsl q4, q7, q9 -+ vbsl q5, q14, q1 -+ vneg.s8 q7, q6 -+ vmovl.u8 q14, d16 -+ vmin.s8 q2, q2, q6 -+ vmovl.u8 q6, d17 -+ vmax.s8 q2, q2, q7 -+ vmovl.u8 q11, d0 -+ vmovl.u8 q12, d1 -+ vaddw.s8 q14, q14, d4 -+ vaddw.s8 q6, q6, d5 -+ vsubw.s8 q11, q11, d4 -+ vsubw.s8 q12, q12, d5 -+ vqmovun.s16 d16, q14 -+ vqmovun.s16 d17, q6 -+ vqmovun.s16 d0, q11 -+ vqmovun.s16 d1, q12 -+ .endm -+ -+ .global ff_h264_v_loop_filter_luma_neon -+ .func ff_h264_v_loop_filter_luma_neon -+ff_h264_v_loop_filter_luma_neon: -+ h264_loop_filter_start -+ -+ vld1.64 {d0, d1}, [r0,:128], r1 -+ vld1.64 {d2, d3}, [r0,:128], r1 -+ vld1.64 {d4, d5}, [r0,:128], r1 -+ sub r0, r0, r1, lsl #2 -+ sub r0, r0, r1, lsl #1 -+ vld1.64 {d20,d21}, [r0,:128], r1 -+ vld1.64 {d18,d19}, [r0,:128], r1 -+ vld1.64 {d16,d17}, [r0,:128], r1 -+ -+ align_push_regs -+ -+ h264_loop_filter_luma -+ -+ sub r0, r0, r1, lsl #1 -+ vst1.64 {d8, d9}, [r0,:128], r1 -+ vst1.64 {d16,d17}, [r0,:128], r1 -+ vst1.64 {d0, d1}, [r0,:128], r1 -+ vst1.64 {d10,d11}, [r0,:128] -+ -+ align_pop_regs -+ bx lr -+ .endfunc -+ -+ .global ff_h264_h_loop_filter_luma_neon -+ .func ff_h264_h_loop_filter_luma_neon -+ff_h264_h_loop_filter_luma_neon: -+ h264_loop_filter_start -+ -+ sub r0, r0, #4 -+ vld1.64 {d6}, [r0], r1 -+ vld1.64 {d20}, [r0], r1 -+ vld1.64 {d18}, [r0], r1 -+ vld1.64 {d16}, [r0], r1 -+ vld1.64 {d0}, [r0], r1 -+ vld1.64 {d2}, [r0], r1 -+ vld1.64 {d4}, [r0], r1 -+ vld1.64 {d26}, [r0], r1 -+ vld1.64 {d7}, [r0], r1 -+ vld1.64 {d21}, [r0], r1 -+ vld1.64 {d19}, [r0], r1 -+ vld1.64 {d17}, [r0], r1 -+ vld1.64 {d1}, [r0], r1 -+ vld1.64 {d3}, [r0], r1 -+ vld1.64 {d5}, [r0], r1 -+ vld1.64 {d27}, [r0], r1 -+ -+ vtrn.32 q3, q0 -+ vtrn.32 q10, q1 -+ vtrn.32 q9, q2 -+ vtrn.32 q8, q13 -+ vtrn.16 q3, q9 -+ vtrn.16 q10, q8 -+ vtrn.16 q0, q2 -+ vtrn.16 q1, q13 -+ vtrn.8 q3, q10 -+ vtrn.8 q9, q8 -+ vtrn.8 q0, q1 -+ vtrn.8 q2, q13 -+ -+ align_push_regs -+ sub sp, sp, #16 -+ vst1.64 {d4, d5}, [sp,:128] -+ sub sp, sp, #16 -+ vst1.64 {d20,d21}, [sp,:128] -+ -+ h264_loop_filter_luma -+ -+ vld1.64 {d20,d21}, [sp,:128]! -+ vld1.64 {d4, d5}, [sp,:128]! -+ -+ vtrn.32 q3, q0 -+ vtrn.32 q10, q5 -+ vtrn.32 q4, q2 -+ vtrn.32 q8, q13 -+ vtrn.16 q3, q4 -+ vtrn.16 q10, q8 -+ vtrn.16 q0, q2 -+ vtrn.16 q5, q13 -+ vtrn.8 q3, q10 -+ vtrn.8 q4, q8 -+ vtrn.8 q0, q5 -+ vtrn.8 q2, q13 -+ -+ sub r0, r0, r1, lsl #4 -+ vst1.64 {d6}, [r0], r1 -+ vst1.64 {d20}, [r0], r1 -+ vst1.64 {d8}, [r0], r1 -+ vst1.64 {d16}, [r0], r1 -+ vst1.64 {d0}, [r0], r1 -+ vst1.64 {d10}, [r0], r1 -+ vst1.64 {d4}, [r0], r1 -+ vst1.64 {d26}, [r0], r1 -+ vst1.64 {d7}, [r0], r1 -+ vst1.64 {d21}, [r0], r1 -+ vst1.64 {d9}, [r0], r1 -+ vst1.64 {d17}, [r0], r1 -+ vst1.64 {d1}, [r0], r1 -+ vst1.64 {d11}, [r0], r1 -+ vst1.64 {d5}, [r0], r1 -+ vst1.64 {d27}, [r0], r1 -+ -+ align_pop_regs -+ bx lr -+ .endfunc -+ -+ .macro h264_loop_filter_chroma -+ vdup.8 d22, r2 @ alpha -+ vmovl.u8 q12, d24 -+ vabd.u8 d26, d16, d0 @ abs(p0 - q0) -+ vmovl.u8 q2, d0 -+ vabd.u8 d28, d18, d16 @ abs(p1 - p0) -+ vsubw.u8 q2, q2, d16 -+ vsli.16 d24, d24, #8 -+ vshl.i16 q2, q2, #2 -+ vabd.u8 d30, d2, d0 @ abs(q1 - q0) -+ vaddw.u8 q2, q2, d18 -+ vclt.u8 d26, d26, d22 @ < alpha -+ vsubw.u8 q2, q2, d2 -+ vdup.8 d22, r3 @ beta -+ vclt.s8 d25, d24, #0 -+ vrshrn.i16 d4, q2, #3 -+ vclt.u8 d28, d28, d22 @ < beta -+ vbic d26, d26, d25 -+ vclt.u8 d30, d30, d22 @ < beta -+ vand d26, d26, d28 -+ vneg.s8 d25, d24 -+ vand d26, d26, d30 -+ vmin.s8 d4, d4, d24 -+ vmovl.u8 q14, d16 -+ vand d4, d4, d26 -+ vmax.s8 d4, d4, d25 -+ vmovl.u8 q11, d0 -+ vaddw.s8 q14, q14, d4 -+ vsubw.s8 q11, q11, d4 -+ vqmovun.s16 d16, q14 -+ vqmovun.s16 d0, q11 -+ .endm -+ -+ .global ff_h264_v_loop_filter_chroma_neon -+ .func ff_h264_v_loop_filter_chroma_neon -+ff_h264_v_loop_filter_chroma_neon: -+ h264_loop_filter_start -+ -+ sub r0, r0, r1, lsl #1 -+ vld1.64 {d18}, [r0,:64], r1 -+ vld1.64 {d16}, [r0,:64], r1 -+ vld1.64 {d0}, [r0,:64], r1 -+ vld1.64 {d2}, [r0,:64] -+ -+ h264_loop_filter_chroma -+ -+ sub r0, r0, r1, lsl #1 -+ vst1.64 {d16}, [r0,:64], r1 -+ vst1.64 {d0}, [r0,:64], r1 -+ -+ bx lr -+ .endfunc -+ -+ .global ff_h264_h_loop_filter_chroma_neon -+ .func ff_h264_h_loop_filter_chroma_neon -+ff_h264_h_loop_filter_chroma_neon: -+ h264_loop_filter_start -+ -+ sub r0, r0, #2 -+ vld1.32 {d18[0]}, [r0], r1 -+ vld1.32 {d16[0]}, [r0], r1 -+ vld1.32 {d0[0]}, [r0], r1 -+ vld1.32 {d2[0]}, [r0], r1 -+ vld1.32 {d18[1]}, [r0], r1 -+ vld1.32 {d16[1]}, [r0], r1 -+ vld1.32 {d0[1]}, [r0], r1 -+ vld1.32 {d2[1]}, [r0], r1 -+ -+ vtrn.16 d18, d0 -+ vtrn.16 d16, d2 -+ vtrn.8 d18, d16 -+ vtrn.8 d0, d2 -+ -+ h264_loop_filter_chroma -+ -+ vtrn.16 d18, d0 -+ vtrn.16 d16, d2 -+ vtrn.8 d18, d16 -+ vtrn.8 d0, d2 -+ -+ sub r0, r0, r1, lsl #3 -+ vst1.32 {d18[0]}, [r0], r1 -+ vst1.32 {d16[0]}, [r0], r1 -+ vst1.32 {d0[0]}, [r0], r1 -+ vst1.32 {d2[0]}, [r0], r1 -+ vst1.32 {d18[1]}, [r0], r1 -+ vst1.32 {d16[1]}, [r0], r1 -+ vst1.32 {d0[1]}, [r0], r1 -+ vst1.32 {d2[1]}, [r0], r1 -+ -+ bx lr -+ .endfunc diff --git a/packages/mplayer/files/mru-neon-h264-qpel.diff b/packages/mplayer/files/mru-neon-h264-qpel.diff deleted file mode 100644 index 6ed479b19b..0000000000 --- a/packages/mplayer/files/mru-neon-h264-qpel.diff +++ /dev/null @@ -1,1040 +0,0 @@ -From: Mans Rullgard -Date: Sat, 23 Aug 2008 00:24:04 +0000 (+0100) -Subject: ARM: NEON optimised H.264 8x8 and 16x16 qpel MC -X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=55661fd933572f67248c0730f6c75a6db0f0eb6a - -ARM: NEON optimised H.264 8x8 and 16x16 qpel MC ---- - -diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c -index 68ecbe8..a932aa9 100644 ---- a/libavcodec/armv4l/dsputil_neon.c -+++ b/libavcodec/armv4l/dsputil_neon.c -@@ -40,7 +40,38 @@ void ff_put_pixels8_y2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); - void ff_put_pixels8_xy2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); - - void ff_put_h264_qpel16_mc00_neon(uint8_t *, uint8_t *, int); -+void ff_put_h264_qpel16_mc10_neon(uint8_t *, uint8_t *, int); -+void ff_put_h264_qpel16_mc20_neon(uint8_t *, uint8_t *, int); -+void ff_put_h264_qpel16_mc30_neon(uint8_t *, uint8_t *, int); -+void ff_put_h264_qpel16_mc01_neon(uint8_t *, uint8_t *, int); -+void ff_put_h264_qpel16_mc11_neon(uint8_t *, uint8_t *, int); -+void ff_put_h264_qpel16_mc21_neon(uint8_t *, uint8_t *, int); -+void ff_put_h264_qpel16_mc31_neon(uint8_t *, uint8_t *, int); -+void ff_put_h264_qpel16_mc02_neon(uint8_t *, uint8_t *, int); -+void ff_put_h264_qpel16_mc12_neon(uint8_t *, uint8_t *, int); -+void ff_put_h264_qpel16_mc22_neon(uint8_t *, uint8_t *, int); -+void ff_put_h264_qpel16_mc32_neon(uint8_t *, uint8_t *, int); -+void ff_put_h264_qpel16_mc03_neon(uint8_t *, uint8_t *, int); -+void ff_put_h264_qpel16_mc13_neon(uint8_t *, uint8_t *, int); -+void ff_put_h264_qpel16_mc23_neon(uint8_t *, uint8_t *, int); -+void ff_put_h264_qpel16_mc33_neon(uint8_t *, uint8_t *, int); -+ - void ff_put_h264_qpel8_mc00_neon(uint8_t *, uint8_t *, int); -+void ff_put_h264_qpel8_mc10_neon(uint8_t *, uint8_t *, int); -+void ff_put_h264_qpel8_mc20_neon(uint8_t *, uint8_t *, int); -+void ff_put_h264_qpel8_mc30_neon(uint8_t *, uint8_t *, int); -+void ff_put_h264_qpel8_mc01_neon(uint8_t *, uint8_t *, int); -+void ff_put_h264_qpel8_mc11_neon(uint8_t *, uint8_t *, int); -+void ff_put_h264_qpel8_mc21_neon(uint8_t *, uint8_t *, int); -+void ff_put_h264_qpel8_mc31_neon(uint8_t *, uint8_t *, int); -+void ff_put_h264_qpel8_mc02_neon(uint8_t *, uint8_t *, int); -+void ff_put_h264_qpel8_mc12_neon(uint8_t *, uint8_t *, int); -+void ff_put_h264_qpel8_mc22_neon(uint8_t *, uint8_t *, int); -+void ff_put_h264_qpel8_mc32_neon(uint8_t *, uint8_t *, int); -+void ff_put_h264_qpel8_mc03_neon(uint8_t *, uint8_t *, int); -+void ff_put_h264_qpel8_mc13_neon(uint8_t *, uint8_t *, int); -+void ff_put_h264_qpel8_mc23_neon(uint8_t *, uint8_t *, int); -+void ff_put_h264_qpel8_mc33_neon(uint8_t *, uint8_t *, int); - - void ff_put_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int); - void ff_put_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); -@@ -83,8 +114,39 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) - c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_neon; - c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_neon; - -- c->put_h264_qpel_pixels_tab[0][0] = ff_put_h264_qpel16_mc00_neon; -- c->put_h264_qpel_pixels_tab[1][0] = ff_put_h264_qpel8_mc00_neon; -+ c->put_h264_qpel_pixels_tab[0][ 0] = ff_put_h264_qpel16_mc00_neon; -+ c->put_h264_qpel_pixels_tab[0][ 1] = ff_put_h264_qpel16_mc10_neon; -+ c->put_h264_qpel_pixels_tab[0][ 2] = ff_put_h264_qpel16_mc20_neon; -+ c->put_h264_qpel_pixels_tab[0][ 3] = ff_put_h264_qpel16_mc30_neon; -+ c->put_h264_qpel_pixels_tab[0][ 4] = ff_put_h264_qpel16_mc01_neon; -+ c->put_h264_qpel_pixels_tab[0][ 5] = ff_put_h264_qpel16_mc11_neon; -+ c->put_h264_qpel_pixels_tab[0][ 6] = ff_put_h264_qpel16_mc21_neon; -+ c->put_h264_qpel_pixels_tab[0][ 7] = ff_put_h264_qpel16_mc31_neon; -+ c->put_h264_qpel_pixels_tab[0][ 8] = ff_put_h264_qpel16_mc02_neon; -+ c->put_h264_qpel_pixels_tab[0][ 9] = ff_put_h264_qpel16_mc12_neon; -+ c->put_h264_qpel_pixels_tab[0][10] = ff_put_h264_qpel16_mc22_neon; -+ c->put_h264_qpel_pixels_tab[0][11] = ff_put_h264_qpel16_mc32_neon; -+ c->put_h264_qpel_pixels_tab[0][12] = ff_put_h264_qpel16_mc03_neon; -+ c->put_h264_qpel_pixels_tab[0][13] = ff_put_h264_qpel16_mc13_neon; -+ c->put_h264_qpel_pixels_tab[0][14] = ff_put_h264_qpel16_mc23_neon; -+ c->put_h264_qpel_pixels_tab[0][15] = ff_put_h264_qpel16_mc33_neon; -+ -+ c->put_h264_qpel_pixels_tab[1][ 0] = ff_put_h264_qpel8_mc00_neon; -+ c->put_h264_qpel_pixels_tab[1][ 1] = ff_put_h264_qpel8_mc10_neon; -+ c->put_h264_qpel_pixels_tab[1][ 2] = ff_put_h264_qpel8_mc20_neon; -+ c->put_h264_qpel_pixels_tab[1][ 3] = ff_put_h264_qpel8_mc30_neon; -+ c->put_h264_qpel_pixels_tab[1][ 4] = ff_put_h264_qpel8_mc01_neon; -+ c->put_h264_qpel_pixels_tab[1][ 5] = ff_put_h264_qpel8_mc11_neon; -+ c->put_h264_qpel_pixels_tab[1][ 6] = ff_put_h264_qpel8_mc21_neon; -+ c->put_h264_qpel_pixels_tab[1][ 7] = ff_put_h264_qpel8_mc31_neon; -+ c->put_h264_qpel_pixels_tab[1][ 8] = ff_put_h264_qpel8_mc02_neon; -+ c->put_h264_qpel_pixels_tab[1][ 9] = ff_put_h264_qpel8_mc12_neon; -+ c->put_h264_qpel_pixels_tab[1][10] = ff_put_h264_qpel8_mc22_neon; -+ c->put_h264_qpel_pixels_tab[1][11] = ff_put_h264_qpel8_mc32_neon; -+ c->put_h264_qpel_pixels_tab[1][12] = ff_put_h264_qpel8_mc03_neon; -+ c->put_h264_qpel_pixels_tab[1][13] = ff_put_h264_qpel8_mc13_neon; -+ c->put_h264_qpel_pixels_tab[1][14] = ff_put_h264_qpel8_mc23_neon; -+ c->put_h264_qpel_pixels_tab[1][15] = ff_put_h264_qpel8_mc33_neon; - - c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon; - c->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon; -diff --git a/libavcodec/armv4l/h264dsp_neon.S b/libavcodec/armv4l/h264dsp_neon.S -index ac793b2..398e9c8 100644 ---- a/libavcodec/armv4l/h264dsp_neon.S -+++ b/libavcodec/armv4l/h264dsp_neon.S -@@ -20,6 +20,39 @@ - - .fpu neon - -+ .macro transpose_8x8 r0 r1 r2 r3 r4 r5 r6 r7 -+ vtrn.32 \r0, \r4 -+ vtrn.32 \r1, \r5 -+ vtrn.32 \r2, \r6 -+ vtrn.32 \r3, \r7 -+ vtrn.16 \r0, \r2 -+ vtrn.16 \r1, \r3 -+ vtrn.16 \r4, \r6 -+ vtrn.16 \r5, \r7 -+ vtrn.8 \r0, \r1 -+ vtrn.8 \r2, \r3 -+ vtrn.8 \r4, \r5 -+ vtrn.8 \r6, \r7 -+ .endm -+ -+ .macro swap4 r0 r1 r2 r3 r4 r5 r6 r7 -+ vswp \r0, \r4 -+ vswp \r1, \r5 -+ vswp \r2, \r6 -+ vswp \r3, \r7 -+ .endm -+ -+ .macro transpose16_4x4 r0 r1 r2 r3 r4 r5 r6 r7 -+ vtrn.32 \r0, \r2 -+ vtrn.32 \r1, \r3 -+ vtrn.32 \r4, \r6 -+ vtrn.32 \r5, \r7 -+ vtrn.16 \r0, \r1 -+ vtrn.16 \r2, \r3 -+ vtrn.16 \r4, \r5 -+ vtrn.16 \r6, \r7 -+ .endm -+ - /* chroma_mc8(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */ - .macro h264_chroma_mc8 avg=0 - push {r4-r7, lr} -@@ -455,18 +488,7 @@ ff_h264_h_loop_filter_luma_neon: - vld1.64 {d5}, [r0], r1 - vld1.64 {d27}, [r0], r1 - -- vtrn.32 q3, q0 -- vtrn.32 q10, q1 -- vtrn.32 q9, q2 -- vtrn.32 q8, q13 -- vtrn.16 q3, q9 -- vtrn.16 q10, q8 -- vtrn.16 q0, q2 -- vtrn.16 q1, q13 -- vtrn.8 q3, q10 -- vtrn.8 q9, q8 -- vtrn.8 q0, q1 -- vtrn.8 q2, q13 -+ transpose_8x8 q3, q10, q9, q8, q0, q1, q2, q13 - - align_push_regs - sub sp, sp, #16 -@@ -479,18 +501,7 @@ ff_h264_h_loop_filter_luma_neon: - vld1.64 {d20,d21}, [sp,:128]! - vld1.64 {d4, d5}, [sp,:128]! - -- vtrn.32 q3, q0 -- vtrn.32 q10, q5 -- vtrn.32 q4, q2 -- vtrn.32 q8, q13 -- vtrn.16 q3, q4 -- vtrn.16 q10, q8 -- vtrn.16 q0, q2 -- vtrn.16 q5, q13 -- vtrn.8 q3, q10 -- vtrn.8 q4, q8 -- vtrn.8 q0, q5 -- vtrn.8 q2, q13 -+ transpose_8x8 q3, q10, q4, q8, q0, q5, q2, q13 - - sub r0, r0, r1, lsl #4 - vst1.64 {d6}, [r0], r1 -@@ -606,3 +617,862 @@ ff_h264_h_loop_filter_chroma_neon: - - bx lr - .endfunc -+ -+ /* H.264 qpel MC */ -+ -+ .macro lowpass_8 r0, r1, r2, r3, d0, d1, narrow=1 -+ vext.8 d4, \r0, \r1, #1 -+ vext.8 d2, \r0, \r1, #2 -+ vext.8 d3, \r0, \r1, #3 -+ vext.8 d5, \r0, \r1, #4 -+ vext.8 d6, \r0, \r1, #5 -+ -+ vext.8 d20, \r2, \r3, #1 -+ vext.8 d18, \r2, \r3, #2 -+ vext.8 d19, \r2, \r3, #3 -+ vext.8 d21, \r2, \r3, #4 -+ vext.8 d7, \r2, \r3, #5 -+ -+ vaddl.u8 q1, d2, d3 -+ vaddl.u8 q2, d4, d5 -+ vaddl.u8 q0, \r0, d6 -+ vaddl.u8 q9, d18, d19 -+ vaddl.u8 q10, d20, d21 -+ vaddl.u8 q8, \r2, d7 -+ -+ vshl.i16 q3, q1, #4 -+ vshl.i16 q1, q1, #2 -+ vshl.i16 q15, q2, #2 -+ vadd.i16 q1, q1, q3 -+ vadd.i16 q2, q2, q15 -+ -+ vshl.i16 q3, q9, #4 -+ vshl.i16 q9, q9, #2 -+ vshl.i16 q15, q10, #2 -+ vadd.i16 q9, q9, q3 -+ vadd.i16 q10, q10, q15 -+ -+ vsub.i16 q1, q1, q2 -+ vsub.i16 q9, q9, q10 -+.if \narrow -+ vadd.i16 q1, q1, q0 -+ vadd.i16 q9, q9, q8 -+ vqrshrun.s16 \d0, q1, #5 -+ vqrshrun.s16 \d1, q9, #5 -+.else -+ vadd.i16 \d0, q1, q0 -+ vadd.i16 \d1, q9, q8 -+.endif -+ .endm -+ -+ .macro lowpass_8_1 r0, r1, d0, narrow=1 -+ vext.8 d4, \r0, \r1, #1 -+ vext.8 d2, \r0, \r1, #2 -+ vext.8 d3, \r0, \r1, #3 -+ vext.8 d5, \r0, \r1, #4 -+ vext.8 d6, \r0, \r1, #5 -+ -+ vaddl.u8 q1, d2, d3 -+ vaddl.u8 q2, d4, d5 -+ vaddl.u8 q0, \r0, d6 -+ -+ vshl.i16 q3, q1, #4 -+ vshl.i16 q1, q1, #2 -+ vshl.i16 q15, q2, #2 -+ vadd.i16 q1, q1, q3 -+ vadd.i16 q2, q2, q15 -+ -+ vadd.i16 q1, q1, q0 -+.if \narrow -+ vsub.i16 q1, q1, q2 -+ vqrshrun.s16 \d0, q1, #5 -+.else -+ vsub.i16 \d0, q1, q2 -+.endif -+ .endm -+ -+ .macro lowpass_8.16 r0, r1, l0, h0, l1, h1, d -+ vext.16 q2, \r0, \r1, #1 -+ vext.16 q1, \r0, \r1, #2 -+ vext.16 q0, \r0, \r1, #3 -+ vext.16 q3, \r0, \r1, #4 -+ vext.16 \r1, \r0, \r1, #5 -+ -+ vaddl.s16 q9, d2, d0 -+ vaddl.s16 q1, d3, d1 -+ vaddl.s16 q10, d4, d6 -+ vaddl.s16 q2, d5, d7 -+ vaddl.s16 q0, \h0, \h1 -+ vaddl.s16 q8, \l0, \l1 -+ -+ vshl.i32 q3, q9, #4 -+ vshl.i32 q9, q9, #2 -+ vshl.i32 q15, q10, #2 -+ vadd.i32 q9, q9, q3 -+ vadd.i32 q10, q10, q15 -+ -+ vshl.i32 q3, q1, #4 -+ vshl.i32 q1, q1, #2 -+ vshl.i32 q15, q2, #2 -+ vadd.i32 q1, q1, q3 -+ vadd.i32 q2, q2, q15 -+ -+ vadd.i32 q9, q9, q8 -+ vsub.i32 q9, q9, q10 -+ -+ vadd.i32 q1, q1, q0 -+ vsub.i32 q1, q1, q2 -+ -+ vrshrn.s32 d18, q9, #10 -+ vrshrn.s32 d19, q1, #10 -+ -+ vqmovun.s16 \d, q9 -+ .endm -+ -+ .func put_h264_qpel16_h_lowpass_neon_packed -+put_h264_qpel16_h_lowpass_neon_packed: -+ mov r4, lr -+ mov ip, #16 -+ mov r3, #8 -+ bl put_h264_qpel8_h_lowpass_neon -+ sub r1, r1, r2, lsl #4 -+ add r1, r1, #8 -+ mov ip, #16 -+ mov lr, r4 -+ b put_h264_qpel8_h_lowpass_neon -+ .endfunc -+ -+ .func put_h264_qpel16_h_lowpass_neon -+put_h264_qpel16_h_lowpass_neon: -+ push {lr} -+ mov ip, #16 -+ dmb -+ bl put_h264_qpel8_h_lowpass_neon -+ sub r0, r0, r3, lsl #4 -+ sub r1, r1, r2, lsl #4 -+ add r0, r0, #8 -+ add r1, r1, #8 -+ mov ip, #16 -+ pop {lr} -+ .endfunc -+ -+ .func put_h264_qpel8_h_lowpass_neon -+put_h264_qpel8_h_lowpass_neon: -+1: vld1.64 {d0, d1}, [r1], r2 -+ vld1.64 {d16,d17}, [r1], r2 -+ subs ip, ip, #2 -+ lowpass_8 d0, d1, d16, d17, d0, d16 -+ vst1.64 {d0}, [r0,:64], r3 -+ vst1.64 {d16}, [r0,:64], r3 -+ bne 1b -+ bx lr -+ .endfunc -+ -+ .func put_h264_qpel16_h_lowpass_l2_neon -+put_h264_qpel16_h_lowpass_l2_neon: -+ push {lr} -+ mov ip, #16 -+ dmb -+ bl put_h264_qpel8_h_lowpass_l2_neon -+ sub r0, r0, r2, lsl #4 -+ sub r1, r1, r2, lsl #4 -+ sub r3, r3, r2, lsl #4 -+ add r0, r0, #8 -+ add r1, r1, #8 -+ add r3, r3, #8 -+ mov ip, #16 -+ pop {lr} -+ .endfunc -+ -+ .func put_h264_qpel8_h_lowpass_l2_neon -+put_h264_qpel8_h_lowpass_l2_neon: -+1: vld1.64 {d0, d1}, [r1], r2 -+ vld1.64 {d16,d17}, [r1], r2 -+ vld1.64 {d28}, [r3], r2 -+ vld1.64 {d29}, [r3], r2 -+ subs ip, ip, #2 -+ lowpass_8 d0, d1, d16, d17, d0, d1 -+ vrhadd.u8 q0, q0, q14 -+ vst1.64 {d0}, [r0,:64], r2 -+ vst1.64 {d1}, [r0,:64], r2 -+ bne 1b -+ bx lr -+ .endfunc -+ -+ .func put_h264_qpel16_v_lowpass_neon_packed -+put_h264_qpel16_v_lowpass_neon_packed: -+ mov r4, lr -+ mov r2, #8 -+ bl put_h264_qpel8_v_lowpass_neon -+ sub r1, r1, r3, lsl #2 -+ bl put_h264_qpel8_v_lowpass_neon -+ sub r1, r1, r3, lsl #4 -+ sub r1, r1, r3, lsl #2 -+ add r1, r1, #8 -+ bl put_h264_qpel8_v_lowpass_neon -+ sub r1, r1, r3, lsl #2 -+ mov lr, r4 -+ b put_h264_qpel8_v_lowpass_neon -+ .endfunc -+ -+ .func put_h264_qpel16_v_lowpass_neon -+put_h264_qpel16_v_lowpass_neon: -+ mov r4, lr -+ bl put_h264_qpel8_v_lowpass_neon -+ sub r1, r1, r3, lsl #2 -+ bl put_h264_qpel8_v_lowpass_neon -+ sub r0, r0, r2, lsl #4 -+ add r0, r0, #8 -+ sub r1, r1, r3, lsl #4 -+ sub r1, r1, r3, lsl #2 -+ add r1, r1, #8 -+ bl put_h264_qpel8_v_lowpass_neon -+ sub r1, r1, r3, lsl #2 -+ mov lr, r4 -+ .endfunc -+ -+ .func put_h264_qpel8_v_lowpass_neon -+put_h264_qpel8_v_lowpass_neon: -+ vld1.64 {d8}, [r1], r3 -+ vld1.64 {d10}, [r1], r3 -+ vld1.64 {d12}, [r1], r3 -+ vld1.64 {d14}, [r1], r3 -+ vld1.64 {d22}, [r1], r3 -+ vld1.64 {d24}, [r1], r3 -+ vld1.64 {d26}, [r1], r3 -+ vld1.64 {d28}, [r1], r3 -+ vld1.64 {d9}, [r1], r3 -+ vld1.64 {d11}, [r1], r3 -+ vld1.64 {d13}, [r1], r3 -+ vld1.64 {d15}, [r1], r3 -+ vld1.64 {d23}, [r1] -+ -+ transpose_8x8 q4, q5, q6, q7, q11, q12, q13, q14 -+ lowpass_8 d8, d9, d10, d11, d8, d10 -+ lowpass_8 d12, d13, d14, d15, d12, d14 -+ lowpass_8 d22, d23, d24, d25, d22, d24 -+ lowpass_8 d26, d27, d28, d29, d26, d28 -+ transpose_8x8 d8, d10, d12, d14, d22, d24, d26, d28 -+ -+ vst1.64 {d8}, [r0,:64], r2 -+ vst1.64 {d10}, [r0,:64], r2 -+ vst1.64 {d12}, [r0,:64], r2 -+ vst1.64 {d14}, [r0,:64], r2 -+ vst1.64 {d22}, [r0,:64], r2 -+ vst1.64 {d24}, [r0,:64], r2 -+ vst1.64 {d26}, [r0,:64], r2 -+ vst1.64 {d28}, [r0,:64], r2 -+ -+ bx lr -+ .endfunc -+ -+ .func put_h264_qpel16_v_lowpass_l2_neon -+put_h264_qpel16_v_lowpass_l2_neon: -+ mov r4, lr -+ bl put_h264_qpel8_v_lowpass_l2_neon -+ sub r1, r1, r3, lsl #2 -+ bl put_h264_qpel8_v_lowpass_l2_neon -+ sub r0, r0, r3, lsl #4 -+ sub ip, ip, r2, lsl #4 -+ add r0, r0, #8 -+ add ip, ip, #8 -+ sub r1, r1, r3, lsl #4 -+ sub r1, r1, r3, lsl #2 -+ add r1, r1, #8 -+ bl put_h264_qpel8_v_lowpass_l2_neon -+ sub r1, r1, r3, lsl #2 -+ mov lr, r4 -+ .endfunc -+ -+ .func put_h264_qpel8_v_lowpass_l2_neon -+put_h264_qpel8_v_lowpass_l2_neon: -+ vld1.64 {d8}, [r1], r3 -+ vld1.64 {d10}, [r1], r3 -+ vld1.64 {d12}, [r1], r3 -+ vld1.64 {d14}, [r1], r3 -+ vld1.64 {d22}, [r1], r3 -+ vld1.64 {d24}, [r1], r3 -+ vld1.64 {d26}, [r1], r3 -+ vld1.64 {d28}, [r1], r3 -+ vld1.64 {d9}, [r1], r3 -+ vld1.64 {d11}, [r1], r3 -+ vld1.64 {d13}, [r1], r3 -+ vld1.64 {d15}, [r1], r3 -+ vld1.64 {d23}, [r1] -+ -+ transpose_8x8 q4, q5, q6, q7, q11, q12, q13, q14 -+ lowpass_8 d8, d9, d10, d11, d8, d9 -+ lowpass_8 d12, d13, d14, d15, d12, d13 -+ lowpass_8 d22, d23, d24, d25, d22, d23 -+ lowpass_8 d26, d27, d28, d29, d26, d27 -+ transpose_8x8 d8, d9, d12, d13, d22, d23, d26, d27 -+ -+ vld1.64 {d0}, [ip], r2 -+ vld1.64 {d1}, [ip], r2 -+ vld1.64 {d2}, [ip], r2 -+ vld1.64 {d3}, [ip], r2 -+ vld1.64 {d4}, [ip], r2 -+ vrhadd.u8 q0, q0, q4 -+ vld1.64 {d5}, [ip], r2 -+ vrhadd.u8 q1, q1, q6 -+ vld1.64 {d6}, [ip], r2 -+ vrhadd.u8 q2, q2, q11 -+ vld1.64 {d7}, [ip], r2 -+ -+ vst1.64 {d0}, [r0,:64], r3 -+ vst1.64 {d1}, [r0,:64], r3 -+ vrhadd.u8 q3, q3, q13 -+ vst1.64 {d2}, [r0,:64], r3 -+ vst1.64 {d3}, [r0,:64], r3 -+ vst1.64 {d4}, [r0,:64], r3 -+ vst1.64 {d5}, [r0,:64], r3 -+ vst1.64 {d6}, [r0,:64], r3 -+ vst1.64 {d7}, [r0,:64], r3 -+ -+ bx lr -+ .endfunc -+ -+ .func put_h264_qpel8_hv_lowpass_neon_top -+put_h264_qpel8_hv_lowpass_neon_top: -+ mov ip, #12 -+1: vld1.64 {d0, d1}, [r1], r3 -+ vld1.64 {d16,d17}, [r1], r3 -+ subs ip, ip, #2 -+ lowpass_8 d0, d1, d16, d17, q0, q1, narrow=0 -+ vst1.64 {d0-d3}, [r4,:128]! -+ bne 1b -+ -+ vld1.64 {d0, d1}, [r1] -+ lowpass_8_1 d0, d1, q12, narrow=0 -+ -+ mov ip, #-16 -+ add r4, r4, ip -+ vld1.64 {d30,d31}, [r4,:128], ip -+ vld1.64 {d20,d21}, [r4,:128], ip -+ vld1.64 {d18,d19}, [r4,:128], ip -+ vld1.64 {d16,d17}, [r4,:128], ip -+ vld1.64 {d14,d15}, [r4,:128], ip -+ vld1.64 {d12,d13}, [r4,:128], ip -+ vld1.64 {d10,d11}, [r4,:128], ip -+ vld1.64 {d8, d9}, [r4,:128], ip -+ vld1.64 {d6, d7}, [r4,:128], ip -+ vld1.64 {d4, d5}, [r4,:128], ip -+ vld1.64 {d2, d3}, [r4,:128], ip -+ vld1.64 {d0, d1}, [r4,:128] -+ -+ swap4 d1, d3, d5, d7, d8, d10, d12, d14 -+ transpose16_4x4 q0, q1, q2, q3, q4, q5, q6, q7 -+ -+ swap4 d17, d19, d21, d31, d24, d26, d28, d22 -+ transpose16_4x4 q8, q9, q10, q15, q12, q13, q14, q11 -+ -+ vst1.64 {d30,d31}, [r4,:128]! -+ vst1.64 {d6, d7}, [r4,:128]! -+ vst1.64 {d20,d21}, [r4,:128]! -+ vst1.64 {d4, d5}, [r4,:128]! -+ vst1.64 {d18,d19}, [r4,:128]! -+ vst1.64 {d2, d3}, [r4,:128]! -+ vst1.64 {d16,d17}, [r4,:128]! -+ vst1.64 {d0, d1}, [r4,:128] -+ -+ lowpass_8.16 q4, q12, d8, d9, d24, d25, d8 -+ lowpass_8.16 q5, q13, d10, d11, d26, d27, d9 -+ lowpass_8.16 q6, q14, d12, d13, d28, d29, d10 -+ lowpass_8.16 q7, q11, d14, d15, d22, d23, d11 -+ -+ vld1.64 {d16,d17}, [r4,:128], ip -+ vld1.64 {d30,d31}, [r4,:128], ip -+ lowpass_8.16 q8, q15, d16, d17, d30, d31, d12 -+ vld1.64 {d16,d17}, [r4,:128], ip -+ vld1.64 {d30,d31}, [r4,:128], ip -+ lowpass_8.16 q8, q15, d16, d17, d30, d31, d13 -+ vld1.64 {d16,d17}, [r4,:128], ip -+ vld1.64 {d30,d31}, [r4,:128], ip -+ lowpass_8.16 q8, q15, d16, d17, d30, d31, d14 -+ vld1.64 {d16,d17}, [r4,:128], ip -+ vld1.64 {d30,d31}, [r4,:128] -+ lowpass_8.16 q8, q15, d16, d17, d30, d31, d15 -+ -+ transpose_8x8 d12, d13, d14, d15, d8, d9, d10, d11 -+ -+ bx lr -+ .endfunc -+ -+ .func put_h264_qpel8_hv_lowpass_neon -+put_h264_qpel8_hv_lowpass_neon: -+ mov r10, lr -+ bl put_h264_qpel8_hv_lowpass_neon_top -+ vst1.64 {d12}, [r0,:64], r2 -+ vst1.64 {d13}, [r0,:64], r2 -+ vst1.64 {d14}, [r0,:64], r2 -+ vst1.64 {d15}, [r0,:64], r2 -+ vst1.64 {d8}, [r0,:64], r2 -+ vst1.64 {d9}, [r0,:64], r2 -+ vst1.64 {d10}, [r0,:64], r2 -+ vst1.64 {d11}, [r0,:64], r2 -+ -+ mov lr, r10 -+ bx lr -+ .endfunc -+ -+ .func put_h264_qpel8_hv_lowpass_l2_neon -+put_h264_qpel8_hv_lowpass_l2_neon: -+ mov r10, lr -+ bl put_h264_qpel8_hv_lowpass_neon_top -+ -+ vld1.64 {d0, d1}, [r2,:128]! -+ vld1.64 {d2, d3}, [r2,:128]! -+ vrhadd.u8 q0, q0, q6 -+ vld1.64 {d4, d5}, [r2,:128]! -+ vrhadd.u8 q1, q1, q7 -+ vld1.64 {d6, d7}, [r2,:128]! -+ vrhadd.u8 q2, q2, q4 -+ -+ vst1.64 {d0}, [r0,:64], r3 -+ vrhadd.u8 q3, q3, q5 -+ vst1.64 {d1}, [r0,:64], r3 -+ vst1.64 {d2}, [r0,:64], r3 -+ vst1.64 {d3}, [r0,:64], r3 -+ vst1.64 {d4}, [r0,:64], r3 -+ vst1.64 {d5}, [r0,:64], r3 -+ vst1.64 {d6}, [r0,:64], r3 -+ vst1.64 {d7}, [r0,:64], r3 -+ -+ mov lr, r10 -+ bx lr -+ .endfunc -+ -+ .func put_h264_qpel16_hv_lowpass_neon -+put_h264_qpel16_hv_lowpass_neon: -+ mov r9, lr -+ bl put_h264_qpel8_hv_lowpass_neon -+ sub r1, r1, r3, lsl #2 -+ bl put_h264_qpel8_hv_lowpass_neon -+ sub r1, r1, r3, lsl #4 -+ sub r1, r1, r3, lsl #2 -+ add r1, r1, #8 -+ sub r0, r0, r2, lsl #4 -+ add r0, r0, #8 -+ bl put_h264_qpel8_hv_lowpass_neon -+ sub r1, r1, r3, lsl #2 -+ mov lr, r9 -+ b put_h264_qpel8_hv_lowpass_neon -+ .endfunc -+ -+ .func put_h264_qpel16_hv_lowpass_l2_neon -+put_h264_qpel16_hv_lowpass_l2_neon: -+ mov r9, lr -+ sub r2, r4, #256 -+ bl put_h264_qpel8_hv_lowpass_l2_neon -+ sub r1, r1, r3, lsl #2 -+ bl put_h264_qpel8_hv_lowpass_l2_neon -+ sub r1, r1, r3, lsl #4 -+ sub r1, r1, r3, lsl #2 -+ add r1, r1, #8 -+ sub r0, r0, r3, lsl #4 -+ add r0, r0, #8 -+ bl put_h264_qpel8_hv_lowpass_l2_neon -+ sub r1, r1, r3, lsl #2 -+ mov lr, r9 -+ b put_h264_qpel8_hv_lowpass_l2_neon -+ .endfunc -+ -+ .global ff_put_h264_qpel8_mc10_neon -+ .func ff_put_h264_qpel8_mc10_neon -+ff_put_h264_qpel8_mc10_neon: -+ mov r3, r1 -+ sub r1, r1, #2 -+ mov ip, #8 -+ dmb -+ b put_h264_qpel8_h_lowpass_l2_neon -+ .endfunc -+ -+ .global ff_put_h264_qpel8_mc20_neon -+ .func ff_put_h264_qpel8_mc20_neon -+ff_put_h264_qpel8_mc20_neon: -+ sub r1, r1, #2 -+ mov r3, r2 -+ mov ip, #8 -+ dmb -+ b put_h264_qpel8_h_lowpass_neon -+ .endfunc -+ -+ .global ff_put_h264_qpel8_mc30_neon -+ .func ff_put_h264_qpel8_mc30_neon -+ff_put_h264_qpel8_mc30_neon: -+ add r3, r1, #1 -+ sub r1, r1, #2 -+ mov ip, #8 -+ dmb -+ b put_h264_qpel8_h_lowpass_l2_neon -+ .endfunc -+ -+ .global ff_put_h264_qpel8_mc01_neon -+ .func ff_put_h264_qpel8_mc01_neon -+ff_put_h264_qpel8_mc01_neon: -+ push {lr} -+ mov ip, r1 -+put_h264_qpel8_mc01: -+ mov r3, r2 -+ sub r1, r1, r2, lsl #1 -+ dmb -+ vpush {d8-d15} -+ bl put_h264_qpel8_v_lowpass_l2_neon -+ vpop {d8-d15} -+ pop {pc} -+ .endfunc -+ -+ .global ff_put_h264_qpel8_mc11_neon -+ .func ff_put_h264_qpel8_mc11_neon -+ff_put_h264_qpel8_mc11_neon: -+ push {r0, r1, r2, lr} -+put_h264_qpel8_mc11: -+ sub sp, sp, #64 -+ mov r0, sp -+ sub r1, r1, #2 -+ mov r3, #8 -+ mov ip, #8 -+ dmb -+ vpush {d8-d15} -+ bl put_h264_qpel8_h_lowpass_neon -+ ldrd r0, [sp, #128] -+ mov r3, r2 -+ add ip, sp, #64 -+ sub r1, r1, r2, lsl #1 -+ mov r2, #8 -+ bl put_h264_qpel8_v_lowpass_l2_neon -+ vpop {d8-d15} -+ add sp, sp, #76 -+ pop {pc} -+ .endfunc -+ -+ .global ff_put_h264_qpel8_mc21_neon -+ .func ff_put_h264_qpel8_mc21_neon -+ff_put_h264_qpel8_mc21_neon: -+ push {r0, r1, r4, r10, r11, lr} -+put_h264_qpel8_mc21: -+ mov r11, sp -+ bic sp, sp, #15 -+ sub sp, sp, #(8*8+16*12) -+ sub r1, r1, #2 -+ mov r3, #8 -+ mov r0, sp -+ mov ip, #8 -+ dmb -+ vpush {d8-d15} -+ bl put_h264_qpel8_h_lowpass_neon -+ mov r4, r0 -+ ldrd r0, [r11] -+ sub r1, r1, r2, lsl #1 -+ sub r1, r1, #2 -+ mov r3, r2 -+ sub r2, r4, #64 -+ bl put_h264_qpel8_hv_lowpass_l2_neon -+ vpop {d8-d15} -+ add sp, r11, #8 -+ pop {r4, r10, r11, pc} -+ .endfunc -+ -+ .global ff_put_h264_qpel8_mc31_neon -+ .func ff_put_h264_qpel8_mc31_neon -+ff_put_h264_qpel8_mc31_neon: -+ add r1, r1, #1 -+ push {r0, r1, r2, lr} -+ sub r1, r1, #1 -+ b put_h264_qpel8_mc11 -+ .endfunc -+ -+ .global ff_put_h264_qpel8_mc02_neon -+ .func ff_put_h264_qpel8_mc02_neon -+ff_put_h264_qpel8_mc02_neon: -+ push {lr} -+ sub r1, r1, r2, lsl #1 -+ mov r3, r2 -+ dmb -+ vpush {d8-d15} -+ bl put_h264_qpel8_v_lowpass_neon -+ vpop {d8-d15} -+ pop {pc} -+ .endfunc -+ -+ .global ff_put_h264_qpel8_mc12_neon -+ .func ff_put_h264_qpel8_mc12_neon -+ff_put_h264_qpel8_mc12_neon: -+ push {r0, r1, r4, r10, r11, lr} -+put_h264_qpel8_mc12: -+ mov r11, sp -+ bic sp, sp, #15 -+ sub sp, sp, #(8*8+16*12) -+ sub r1, r1, r2, lsl #1 -+ mov r3, r2 -+ mov r2, #8 -+ mov r0, sp -+ dmb -+ vpush {d8-d15} -+ bl put_h264_qpel8_v_lowpass_neon -+ mov r4, r0 -+ ldrd r0, [r11] -+ sub r1, r1, r3, lsl #1 -+ sub r1, r1, #2 -+ sub r2, r4, #64 -+ bl put_h264_qpel8_hv_lowpass_l2_neon -+ vpop {d8-d15} -+ add sp, r11, #8 -+ pop {r4, r10, r11, pc} -+ .endfunc -+ -+ .global ff_put_h264_qpel8_mc22_neon -+ .func ff_put_h264_qpel8_mc22_neon -+ff_put_h264_qpel8_mc22_neon: -+ push {r4, r10, r11, lr} -+ mov r11, sp -+ bic sp, sp, #15 -+ sub r1, r1, r2, lsl #1 -+ sub r1, r1, #2 -+ mov r3, r2 -+ sub sp, sp, #(16*12) -+ mov r4, sp -+ dmb -+ vpush {d8-d15} -+ bl put_h264_qpel8_hv_lowpass_neon -+ vpop {d8-d15} -+ mov sp, r11 -+ pop {r4, r10, r11, pc} -+ .endfunc -+ -+ .global ff_put_h264_qpel8_mc32_neon -+ .func ff_put_h264_qpel8_mc32_neon -+ff_put_h264_qpel8_mc32_neon: -+ push {r0, r1, r4, r10, r11, lr} -+ add r1, r1, #1 -+ b put_h264_qpel8_mc12 -+ .endfunc -+ -+ .global ff_put_h264_qpel8_mc03_neon -+ .func ff_put_h264_qpel8_mc03_neon -+ff_put_h264_qpel8_mc03_neon: -+ push {lr} -+ add ip, r1, r2 -+ b put_h264_qpel8_mc01 -+ .endfunc -+ -+ .global ff_put_h264_qpel8_mc13_neon -+ .func ff_put_h264_qpel8_mc13_neon -+ff_put_h264_qpel8_mc13_neon: -+ push {r0, r1, r2, lr} -+ add r1, r1, r2 -+ b put_h264_qpel8_mc11 -+ .endfunc -+ -+ .global ff_put_h264_qpel8_mc23_neon -+ .func ff_put_h264_qpel8_mc23_neon -+ff_put_h264_qpel8_mc23_neon: -+ push {r0, r1, r4, r10, r11, lr} -+ add r1, r1, r2 -+ b put_h264_qpel8_mc21 -+ .endfunc -+ -+ .global ff_put_h264_qpel8_mc33_neon -+ .func ff_put_h264_qpel8_mc33_neon -+ff_put_h264_qpel8_mc33_neon: -+ add r1, r1, #1 -+ push {r0, r1, r2, lr} -+ add r1, r1, r2 -+ sub r1, r1, #1 -+ b put_h264_qpel8_mc11 -+ .endfunc -+ -+ .global ff_put_h264_qpel16_mc10_neon -+ .func ff_put_h264_qpel16_mc10_neon -+ff_put_h264_qpel16_mc10_neon: -+ mov r3, r1 -+ sub r1, r1, #2 -+ b put_h264_qpel16_h_lowpass_l2_neon -+ .endfunc -+ -+ .global ff_put_h264_qpel16_mc20_neon -+ .func ff_put_h264_qpel16_mc20_neon -+ff_put_h264_qpel16_mc20_neon: -+ sub r1, r1, #2 -+ mov r3, r2 -+ b put_h264_qpel16_h_lowpass_neon -+ .endfunc -+ -+ .global ff_put_h264_qpel16_mc30_neon -+ .func ff_put_h264_qpel16_mc30_neon -+ff_put_h264_qpel16_mc30_neon: -+ add r3, r1, #1 -+ sub r1, r1, #2 -+ b put_h264_qpel16_h_lowpass_l2_neon -+ .endfunc -+ -+ .global ff_put_h264_qpel16_mc01_neon -+ .func ff_put_h264_qpel16_mc01_neon -+ff_put_h264_qpel16_mc01_neon: -+ push {r4, lr} -+ mov ip, r1 -+put_h264_qpel16_mc01: -+ mov r3, r2 -+ sub r1, r1, r2, lsl #1 -+ dmb -+ vpush {d8-d15} -+ bl put_h264_qpel16_v_lowpass_l2_neon -+ vpop {d8-d15} -+ pop {r4, pc} -+ .endfunc -+ -+ .global ff_put_h264_qpel16_mc11_neon -+ .func ff_put_h264_qpel16_mc11_neon -+ff_put_h264_qpel16_mc11_neon: -+ push {r0, r1, r4, lr} -+put_h264_qpel16_mc11: -+ sub sp, sp, #256 -+ mov r0, sp -+ sub r1, r1, #2 -+ mov r3, #16 -+ dmb -+ vpush {d8-d15} -+ bl put_h264_qpel16_h_lowpass_neon -+ add r0, sp, #256 -+ ldrd r0, [r0, #64] -+ mov r3, r2 -+ add ip, sp, #64 -+ sub r1, r1, r2, lsl #1 -+ mov r2, #16 -+ bl put_h264_qpel16_v_lowpass_l2_neon -+ vpop {d8-d15} -+ add sp, sp, #(256+8) -+ pop {r4, pc} -+ .endfunc -+ -+ .global ff_put_h264_qpel16_mc21_neon -+ .func ff_put_h264_qpel16_mc21_neon -+ff_put_h264_qpel16_mc21_neon: -+ push {r0, r1, r4-r5, r9-r11, lr} -+put_h264_qpel16_mc21: -+ mov r11, sp -+ bic sp, sp, #15 -+ sub sp, sp, #(16*16+16*12) -+ sub r1, r1, #2 -+ mov r0, sp -+ dmb -+ vpush {d8-d15} -+ bl put_h264_qpel16_h_lowpass_neon_packed -+ mov r4, r0 -+ ldrd r0, [r11] -+ sub r1, r1, r2, lsl #1 -+ sub r1, r1, #2 -+ mov r3, r2 -+ bl put_h264_qpel16_hv_lowpass_l2_neon -+ vpop {d8-d15} -+ add sp, r11, #8 -+ pop {r4-r5, r9-r11, pc} -+ .endfunc -+ -+ .global ff_put_h264_qpel16_mc31_neon -+ .func ff_put_h264_qpel16_mc31_neon -+ff_put_h264_qpel16_mc31_neon: -+ add r1, r1, #1 -+ push {r0, r1, r4, lr} -+ sub r1, r1, #1 -+ b put_h264_qpel16_mc11 -+ .endfunc -+ -+ .global ff_put_h264_qpel16_mc02_neon -+ .func ff_put_h264_qpel16_mc02_neon -+ff_put_h264_qpel16_mc02_neon: -+ push {r4, lr} -+ sub r1, r1, r2, lsl #1 -+ mov r3, r2 -+ dmb -+ vpush {d8-d15} -+ bl put_h264_qpel16_v_lowpass_neon -+ vpop {d8-d15} -+ pop {r4, pc} -+ .endfunc -+ -+ .global ff_put_h264_qpel16_mc12_neon -+ .func ff_put_h264_qpel16_mc12_neon -+ff_put_h264_qpel16_mc12_neon: -+ push {r0, r1, r4-r5, r9-r11, lr} -+put_h264_qpel16_mc12: -+ mov r11, sp -+ bic sp, sp, #15 -+ sub sp, sp, #(16*16+16*12) -+ sub r1, r1, r2, lsl #1 -+ mov r0, sp -+ mov r3, r2 -+ dmb -+ vpush {d8-d15} -+ bl put_h264_qpel16_v_lowpass_neon_packed -+ mov r4, r0 -+ ldrd r0, [r11] -+ sub r1, r1, r3, lsl #1 -+ sub r1, r1, #2 -+ mov r2, r3 -+ bl put_h264_qpel16_hv_lowpass_l2_neon -+ vpop {d8-d15} -+ add sp, r11, #8 -+ pop {r4-r5, r9-r11, pc} -+ .endfunc -+ -+ .global ff_put_h264_qpel16_mc22_neon -+ .func ff_put_h264_qpel16_mc22_neon -+ff_put_h264_qpel16_mc22_neon: -+ push {r4, r9-r11, lr} -+ mov r11, sp -+ bic sp, sp, #15 -+ sub r1, r1, r2, lsl #1 -+ sub r1, r1, #2 -+ mov r3, r2 -+ sub sp, sp, #(16*12) -+ mov r4, sp -+ dmb -+ vpush {d8-d15} -+ bl put_h264_qpel16_hv_lowpass_neon -+ vpop {d8-d15} -+ mov sp, r11 -+ pop {r4, r9-r11, pc} -+ .endfunc -+ -+ .global ff_put_h264_qpel16_mc32_neon -+ .func ff_put_h264_qpel16_mc32_neon -+ff_put_h264_qpel16_mc32_neon: -+ push {r0, r1, r4-r5, r9-r11, lr} -+ add r1, r1, #1 -+ b put_h264_qpel16_mc12 -+ .endfunc -+ -+ .global ff_put_h264_qpel16_mc03_neon -+ .func ff_put_h264_qpel16_mc03_neon -+ff_put_h264_qpel16_mc03_neon: -+ push {r4, lr} -+ add ip, r1, r2 -+ b put_h264_qpel16_mc01 -+ .endfunc -+ -+ .global ff_put_h264_qpel16_mc13_neon -+ .func ff_put_h264_qpel16_mc13_neon -+ff_put_h264_qpel16_mc13_neon: -+ push {r0, r1, r4, lr} -+ add r1, r1, r2 -+ b put_h264_qpel16_mc11 -+ .endfunc -+ -+ .global ff_put_h264_qpel16_mc23_neon -+ .func ff_put_h264_qpel16_mc23_neon -+ff_put_h264_qpel16_mc23_neon: -+ push {r0, r1, r4-r5, r9-r11, lr} -+ add r1, r1, r2 -+ b put_h264_qpel16_mc21 -+ .endfunc -+ -+ .global ff_put_h264_qpel16_mc33_neon -+ .func ff_put_h264_qpel16_mc33_neon -+ff_put_h264_qpel16_mc33_neon: -+ add r1, r1, #1 -+ push {r0, r1, r4, lr} -+ add r1, r1, r2 -+ sub r1, r1, #1 -+ b put_h264_qpel16_mc11 -+ .endfunc diff --git a/packages/mplayer/files/mru-neon-h264idct-dc.diff b/packages/mplayer/files/mru-neon-h264idct-dc.diff deleted file mode 100644 index 9f316b1b5b..0000000000 --- a/packages/mplayer/files/mru-neon-h264idct-dc.diff +++ /dev/null @@ -1,55 +0,0 @@ -From: Mans Rullgard -Date: Mon, 25 Aug 2008 00:05:54 +0000 (+0100) -Subject: ARM: NEON optimised h264_idct_dc_add -X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=1097c36b47b5019b2a8668f82796ffe76f482408 - -ARM: NEON optimised h264_idct_dc_add ---- - -diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c -index 74f9b4d..6dbe835 100644 ---- a/libavcodec/armv4l/dsputil_neon.c -+++ b/libavcodec/armv4l/dsputil_neon.c -@@ -89,6 +89,7 @@ void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, - int beta, int8_t *tc0); - - void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride); -+void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride); - - void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) - { -@@ -156,4 +157,5 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) - c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon; - - c->h264_idct_add = ff_h264_idct_add_neon; -+ c->h264_idct_dc_add = ff_h264_idct_dc_add_neon; - } -diff --git a/libavcodec/armv4l/h264idct_neon.S b/libavcodec/armv4l/h264idct_neon.S -index 8f456f3..34e217f 100644 ---- a/libavcodec/armv4l/h264idct_neon.S -+++ b/libavcodec/armv4l/h264idct_neon.S -@@ -75,3 +75,24 @@ ff_h264_idct_add_neon: - - bx lr - .endfunc -+ -+ .global ff_h264_idct_dc_add_neon -+ .func ff_h264_idct_dc_add_neon -+ff_h264_idct_dc_add_neon: -+ vld1.16 {d2[],d3[]}, [r1,:16] -+ vrshr.s16 q1, q1, #6 -+ vld1.32 {d0[0]}, [r0,:32], r2 -+ vld1.32 {d0[1]}, [r0,:32], r2 -+ vaddw.u8 q2, q1, d0 -+ vld1.32 {d1[0]}, [r0,:32], r2 -+ vld1.32 {d1[1]}, [r0,:32], r2 -+ vaddw.u8 q1, q1, d1 -+ vqmovun.s16 d0, q2 -+ vqmovun.s16 d1, q1 -+ sub r0, r0, r2, lsl #2 -+ vst1.32 {d0[0]}, [r0,:32], r2 -+ vst1.32 {d0[1]}, [r0,:32], r2 -+ vst1.32 {d1[0]}, [r0,:32], r2 -+ vst1.32 {d1[1]}, [r0,:32], r2 -+ bx lr -+ .endfunc diff --git a/packages/mplayer/files/mru-neon-h264idctadd.diff b/packages/mplayer/files/mru-neon-h264idctadd.diff deleted file mode 100644 index 0f0931fbff..0000000000 --- a/packages/mplayer/files/mru-neon-h264idctadd.diff +++ /dev/null @@ -1,123 +0,0 @@ -From: Mans Rullgard -Date: Sun, 24 Aug 2008 21:27:49 +0000 (+0100) -Subject: ARM: NEON optimised h264_idct_add -X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=ebfab90234268bb35600a06e9982ca1358ea43f3 - -ARM: NEON optimised h264_idct_add ---- - -diff --git a/libavcodec/Makefile b/libavcodec/Makefile -index 36ba158..053e752 100644 ---- a/libavcodec/Makefile -+++ b/libavcodec/Makefile -@@ -438,6 +438,7 @@ OBJS-$(HAVE_NEON) += armv4l/dsputil_neon.o \ - ASM_OBJS-$(HAVE_NEON) += armv4l/dsputil_neon_s.o \ - armv4l/simple_idct_neon.o \ - armv4l/h264dsp_neon.o \ -+ armv4l/h264idct_neon.o \ - - OBJS-$(HAVE_VIS) += sparc/dsputil_vis.o \ - sparc/simple_idct_vis.o \ -diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c -index a932aa9..74f9b4d 100644 ---- a/libavcodec/armv4l/dsputil_neon.c -+++ b/libavcodec/armv4l/dsputil_neon.c -@@ -88,6 +88,8 @@ void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, - void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, - int beta, int8_t *tc0); - -+void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride); -+ - void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) - { - c->put_pixels_tab[0][0] = ff_put_pixels16_neon; -@@ -152,4 +154,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) - c->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon; - c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon; - c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon; -+ -+ c->h264_idct_add = ff_h264_idct_add_neon; - } -diff --git a/libavcodec/armv4l/h264idct_neon.S b/libavcodec/armv4l/h264idct_neon.S -new file mode 100644 -index 0000000..8f456f3 ---- /dev/null -+++ b/libavcodec/armv4l/h264idct_neon.S -@@ -0,0 +1,77 @@ -+/* -+ * Copyright (c) 2008 Mans Rullgard -+ * -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+ .fpu neon -+ -+ .text -+ -+ .global ff_h264_idct_add_neon -+ .func ff_h264_idct_add_neon -+ff_h264_idct_add_neon: -+ mov r3, #(1<<5) -+ vmov.i16 d16, #0 -+ vmov.16 d16[0], r3 -+ vld1.64 {d0-d3}, [r1,:128] -+ vadd.i16 d0, d0, d16 -+ -+ vswp d1, d2 -+ vadd.i16 d4, d0, d1 -+ vshr.s16 q8, q1, #1 -+ vsub.i16 d5, d0, d1 -+ vadd.i16 d6, d2, d17 -+ vsub.i16 d7, d16, d3 -+ vadd.i16 q0, q2, q3 -+ vsub.i16 q1, q2, q3 -+ -+ vtrn.16 d0, d1 -+ vtrn.16 d3, d2 -+ vtrn.32 d0, d3 -+ vtrn.32 d1, d2 -+ -+ vadd.i16 d4, d0, d3 -+ vld1.32 {d18[0]}, [r0,:32], r2 -+ vswp d1, d3 -+ vshr.s16 q8, q1, #1 -+ vld1.32 {d19[1]}, [r0,:32], r2 -+ vsub.i16 d5, d0, d1 -+ vld1.32 {d18[1]}, [r0,:32], r2 -+ vadd.i16 d6, d16, d3 -+ vld1.32 {d19[0]}, [r0,:32], r2 -+ vsub.i16 d7, d2, d17 -+ sub r0, r0, r2, lsl #2 -+ vadd.i16 q0, q2, q3 -+ vsub.i16 q1, q2, q3 -+ -+ vshr.s16 q0, q0, #6 -+ vshr.s16 q1, q1, #6 -+ -+ vaddw.u8 q0, q0, d18 -+ vaddw.u8 q1, q1, d19 -+ -+ vqmovun.s16 d0, q0 -+ vqmovun.s16 d1, q1 -+ -+ vst1.32 {d0[0]}, [r0,:32], r2 -+ vst1.32 {d1[1]}, [r0,:32], r2 -+ vst1.32 {d0[1]}, [r0,:32], r2 -+ vst1.32 {d1[0]}, [r0,:32], r2 -+ -+ bx lr -+ .endfunc diff --git a/packages/mplayer/files/mru-neon-put-pixels.diff b/packages/mplayer/files/mru-neon-put-pixels.diff deleted file mode 100644 index 85650d913b..0000000000 --- a/packages/mplayer/files/mru-neon-put-pixels.diff +++ /dev/null @@ -1,376 +0,0 @@ -From: Mans Rullgard -Date: Fri, 13 Jun 2008 01:21:58 +0000 (+0100) -Subject: ARM: NEON optimised put_pixels functions -X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=86410ed1948118a29c70946d5294df9feb04dfef - -ARM: NEON optimised put_pixels functions ---- - -diff --git a/libavcodec/Makefile b/libavcodec/Makefile -index d91185e..27746df 100644 ---- a/libavcodec/Makefile -+++ b/libavcodec/Makefile -@@ -433,6 +433,10 @@ ASM_OBJS-$(HAVE_ARMV5TE) += armv4l/simple_idct_armv5te.o \ - - ASM_OBJS-$(HAVE_ARMV6) += armv4l/simple_idct_armv6.o \ - -+OBJS-$(HAVE_NEON) += armv4l/dsputil_neon.o \ -+ -+ASM_OBJS-$(HAVE_NEON) += armv4l/dsputil_neon_s.o \ -+ - OBJS-$(HAVE_VIS) += sparc/dsputil_vis.o \ - sparc/simple_idct_vis.o \ - -diff --git a/libavcodec/armv4l/dsputil_arm.c b/libavcodec/armv4l/dsputil_arm.c -index 100b89e..89b51e7 100644 ---- a/libavcodec/armv4l/dsputil_arm.c -+++ b/libavcodec/armv4l/dsputil_arm.c -@@ -26,6 +26,7 @@ - - extern void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx); - extern void ff_float_init_arm_vfp(DSPContext* c, AVCodecContext *avctx); -+extern void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx); - - extern void j_rev_dct_ARM(DCTELEM *data); - extern void simple_idct_ARM(DCTELEM *data); -@@ -302,4 +303,7 @@ void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx) - #ifdef HAVE_ARMVFP - ff_float_init_arm_vfp(c, avctx); - #endif -+#ifdef HAVE_NEON -+ ff_dsputil_init_neon(c, avctx); -+#endif - } -diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c -new file mode 100644 -index 0000000..8a10dde ---- /dev/null -+++ b/libavcodec/armv4l/dsputil_neon.c -@@ -0,0 +1,67 @@ -+/* -+ * ARM NEON optimised DSP functions -+ * Copyright (c) 2008 Mans Rullgard -+ * -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+#include -+ -+#include "libavcodec/avcodec.h" -+#include "libavcodec/dsputil.h" -+ -+void ff_put_pixels16_neon(uint8_t *, const uint8_t *, int, int); -+void ff_put_pixels16_x2_neon(uint8_t *, const uint8_t *, int, int); -+void ff_put_pixels16_y2_neon(uint8_t *, const uint8_t *, int, int); -+void ff_put_pixels16_xy2_neon(uint8_t *, const uint8_t *, int, int); -+void ff_put_pixels8_neon(uint8_t *, const uint8_t *, int, int); -+void ff_put_pixels8_x2_neon(uint8_t *, const uint8_t *, int, int); -+void ff_put_pixels8_y2_neon(uint8_t *, const uint8_t *, int, int); -+void ff_put_pixels8_xy2_neon(uint8_t *, const uint8_t *, int, int); -+void ff_put_pixels16_x2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); -+void ff_put_pixels16_y2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); -+void ff_put_pixels16_xy2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); -+void ff_put_pixels8_x2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); -+void ff_put_pixels8_y2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); -+void ff_put_pixels8_xy2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); -+ -+void ff_put_h264_qpel16_mc00_neon(uint8_t *, uint8_t *, int); -+void ff_put_h264_qpel8_mc00_neon(uint8_t *, uint8_t *, int); -+ -+void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) -+{ -+ c->put_pixels_tab[0][0] = ff_put_pixels16_neon; -+ c->put_pixels_tab[0][1] = ff_put_pixels16_x2_neon; -+ c->put_pixels_tab[0][2] = ff_put_pixels16_y2_neon; -+ c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_neon; -+ c->put_pixels_tab[1][0] = ff_put_pixels8_neon; -+ c->put_pixels_tab[1][1] = ff_put_pixels8_x2_neon; -+ c->put_pixels_tab[1][2] = ff_put_pixels8_y2_neon; -+ c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_neon; -+ -+ c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_neon; -+ c->put_no_rnd_pixels_tab[0][1] = ff_put_pixels16_x2_no_rnd_neon; -+ c->put_no_rnd_pixels_tab[0][2] = ff_put_pixels16_y2_no_rnd_neon; -+ c->put_no_rnd_pixels_tab[0][3] = ff_put_pixels16_xy2_no_rnd_neon; -+ c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_neon; -+ c->put_no_rnd_pixels_tab[1][1] = ff_put_pixels8_x2_no_rnd_neon; -+ c->put_no_rnd_pixels_tab[1][2] = ff_put_pixels8_y2_no_rnd_neon; -+ c->put_no_rnd_pixels_tab[1][3] = ff_put_pixels8_xy2_no_rnd_neon; -+ -+ c->put_h264_qpel_pixels_tab[0][0] = ff_put_h264_qpel16_mc00_neon; -+ c->put_h264_qpel_pixels_tab[1][0] = ff_put_h264_qpel8_mc00_neon; -+} -diff --git a/libavcodec/armv4l/dsputil_neon_s.S b/libavcodec/armv4l/dsputil_neon_s.S -new file mode 100644 -index 0000000..fc5e401 ---- /dev/null -+++ b/libavcodec/armv4l/dsputil_neon_s.S -@@ -0,0 +1,254 @@ -+/* -+ * ARM NEON optimised DSP functions -+ * Copyright (c) 2008 Mans Rullgard -+ * -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+ .fpu neon -+ .text -+ -+ .macro put_pixels16 -+ dmb -+1: vld1.64 {d0, d1}, [r1], r2 -+ vld1.64 {d2, d3}, [r1], r2 -+ vld1.64 {d4, d5}, [r1], r2 -+ vld1.64 {d6, d7}, [r1], r2 -+ pld [r1] -+ subs r3, r3, #4 -+ vst1.64 {d0, d1}, [r0,:128], r2 -+ vst1.64 {d2, d3}, [r0,:128], r2 -+ vst1.64 {d4, d5}, [r0,:128], r2 -+ vst1.64 {d6, d7}, [r0,:128], r2 -+ bne 1b -+ bx lr -+ .endm -+ -+ .macro put_pixels16_x2 vhadd=vrhadd.u8 -+ dmb -+1: vld1.64 {d0-d2}, [r1], r2 -+ vld1.64 {d4-d6}, [r1], r2 -+ pld [r1] -+ subs r3, r3, #2 -+ vext.8 q1, q0, q1, #1 -+ vext.8 q3, q2, q3, #1 -+ \vhadd q0, q0, q1 -+ \vhadd q2, q2, q3 -+ vst1.64 {d0, d1}, [r0,:128], r2 -+ vst1.64 {d4, d5}, [r0,:128], r2 -+ bne 1b -+ bx lr -+ .endm -+ -+ .macro put_pixels16_y2 vhadd=vrhadd.u8 -+ push {lr} -+ add ip, r1, r2 -+ lsl lr, r2, #1 -+ vld1.64 {d0, d1}, [r1], lr -+ vld1.64 {d2, d3}, [ip], lr -+ dmb -+1: subs r3, r3, #2 -+ \vhadd q2, q0, q1 -+ vld1.64 {d0, d1}, [r1], lr -+ vst1.64 {d4, d5}, [r0,:128], r2 -+ \vhadd q2, q0, q1 -+ vld1.64 {d2, d3}, [ip], lr -+ vst1.64 {d4, d5}, [r0,:128], r2 -+ bne 1b -+ pop {pc} -+ .endm -+ -+ .macro put_pixels16_xy2 vshrn=vrshrn.u16 no_rnd=0 -+ push {lr} -+ lsl lr, r2, #1 -+ add ip, r1, r2 -+ vld1.64 {d0-d2}, [r1], lr -+ vld1.64 {d4-d6}, [ip], lr -+ .if \no_rnd -+ vmov.i16 q13, #1 -+ .endif -+ pld [r1] -+ pld [ip] -+ vext.8 q1, q0, q1, #1 -+ vext.8 q3, q2, q3, #1 -+ vaddl.u8 q8, d0, d2 -+ vaddl.u8 q10, d1, d3 -+ vaddl.u8 q9, d4, d6 -+ vaddl.u8 q11, d5, d7 -+ dmb -+1: subs r3, r3, #2 -+ vld1.64 {d0-d2}, [r1], lr -+ vadd.u16 q12, q8, q9 -+ pld [r1] -+ .if \no_rnd -+ vadd.u16 q12, q12, q13 -+ .endif -+ vext.8 q15, q0, q1, #1 -+ vadd.u16 q1 , q10, q11 -+ \vshrn d28, q12, #2 -+ .if \no_rnd -+ vadd.u16 q1, q1, q13 -+ .endif -+ \vshrn d29, q1, #2 -+ vaddl.u8 q8, d0, d30 -+ vld1.64 {d2-d4}, [ip], lr -+ vaddl.u8 q10, d1, d31 -+ vst1.64 {d28,d29}, [r0,:128], r2 -+ vadd.u16 q12, q8, q9 -+ pld [ip] -+ .if \no_rnd -+ vadd.u16 q12, q12, q13 -+ .endif -+ vext.8 q2, q1, q2, #1 -+ vadd.u16 q0, q10, q11 -+ \vshrn d30, q12, #2 -+ .if \no_rnd -+ vadd.u16 q0, q0, q13 -+ .endif -+ \vshrn d31, q0, #2 -+ vaddl.u8 q9, d2, d4 -+ vaddl.u8 q11, d3, d5 -+ vst1.64 {d30,d31}, [r0,:128], r2 -+ bgt 1b -+ pop {pc} -+ .endm -+ -+ .macro put_pixels8 -+ dmb -+1: vld1.64 {d0}, [r1], r2 -+ vld1.64 {d1}, [r1], r2 -+ vld1.64 {d2}, [r1], r2 -+ vld1.64 {d3}, [r1], r2 -+ subs r3, r3, #4 -+ vst1.64 {d0}, [r0,:64], r2 -+ vst1.64 {d1}, [r0,:64], r2 -+ vst1.64 {d2}, [r0,:64], r2 -+ vst1.64 {d3}, [r0,:64], r2 -+ bne 1b -+ bx lr -+ .endm -+ -+ .macro put_pixels8_x2 vhadd=vrhadd.u8 -+ dmb -+1: vld1.64 {d0, d1}, [r1], r2 -+ vld1.64 {d2, d3}, [r1], r2 -+ pld [r1] -+ subs r3, r3, #2 -+ vext.8 d1, d0, d1, #1 -+ vext.8 d3, d2, d3, #1 -+ vswp d1, d2 -+ \vhadd q0, q0, q1 -+ vst1.64 {d0}, [r0,:64], r2 -+ vst1.64 {d1}, [r0,:64], r2 -+ bne 1b -+ bx lr -+ .endm -+ -+ .macro put_pixels8_y2 vhadd=vrhadd.u8 -+ push {lr} -+ add ip, r1, r2 -+ lsl lr, r2, #1 -+ vld1.64 {d0}, [r1], lr -+ vld1.64 {d1}, [ip], lr -+ dmb -+1: subs r3, r3, #2 -+ \vhadd d4, d0, d1 -+ vld1.64 {d0}, [r1], lr -+ vst1.64 {d4}, [r0,:64], r2 -+ \vhadd d4, d0, d1 -+ vld1.64 {d1}, [ip], lr -+ vst1.64 {d4}, [r0,:64], r2 -+ bne 1b -+ pop {pc} -+ .endm -+ -+ .macro put_pixels8_xy2 vshrn=vrshrn.u16 no_rnd=0 -+ push {lr} -+ lsl lr, r2, #1 -+ add ip, r1, r2 -+ vld1.64 {d0, d1}, [r1], lr -+ vld1.64 {d2, d3}, [ip], lr -+ .if \no_rnd -+ vmov.i16 q11, #1 -+ .endif -+ pld [r1] -+ pld [ip] -+ vext.8 d4, d0, d1, #1 -+ vext.8 d6, d2, d3, #1 -+ vaddl.u8 q8, d0, d4 -+ vaddl.u8 q9, d2, d6 -+ dmb -+1: subs r3, r3, #2 -+ vld1.64 {d0, d1}, [r1], lr -+ pld [r1] -+ vadd.u16 q10, q8, q9 -+ vext.8 d4, d0, d1, #1 -+ .if \no_rnd -+ vadd.u16 q10, q10, q11 -+ .endif -+ vaddl.u8 q8, d0, d4 -+ \vshrn d5, q10, #2 -+ vld1.64 {d2, d3}, [ip], lr -+ vadd.u16 q10, q8, q9 -+ pld [ip] -+ .if \no_rnd -+ vadd.u16 q10, q10, q11 -+ .endif -+ vst1.64 {d5}, [r0,:64], r2 -+ \vshrn d7, q10, #2 -+ vext.8 d6, d2, d3, #1 -+ vaddl.u8 q9, d2, d6 -+ vst1.64 {d7}, [r0,:64], r2 -+ bgt 1b -+ pop {pc} -+ .endm -+ -+ .macro extern name -+ .global \name -+ .type \name, %function -+ .func \name -+\name: -+ .endm -+ -+ .macro defun name suf rnd_op args:vararg -+ extern ff_\name\suf\()_neon -+ \name \rnd_op \args -+ .endfunc -+ .endm -+ -+ .macro defun2 name args:vararg -+ defun \name -+ defun \name \args -+ .endm -+ -+ extern ff_put_h264_qpel16_mc00_neon -+ mov r3, #16 -+ .endfunc -+ -+ defun put_pixels16 -+ defun2 put_pixels16_x2, _no_rnd, vhadd.u8 -+ defun2 put_pixels16_y2, _no_rnd, vhadd.u8 -+ defun2 put_pixels16_xy2, _no_rnd, vshrn.u16, 1 -+ -+ extern ff_put_h264_qpel8_mc00_neon -+ mov r3, #8 -+ .endfunc -+ -+ defun put_pixels8 -+ defun2 put_pixels8_x2, _no_rnd, vhadd.u8 -+ defun2 put_pixels8_y2, _no_rnd, vhadd.u8 -+ defun2 put_pixels8_xy2, _no_rnd, vshrn.u16, 1 diff --git a/packages/mplayer/files/mru-neon-simple-idct.diff b/packages/mplayer/files/mru-neon-simple-idct.diff deleted file mode 100644 index 772a1fd972..0000000000 --- a/packages/mplayer/files/mru-neon-simple-idct.diff +++ /dev/null @@ -1,501 +0,0 @@ -From: Mans Rullgard -Date: Thu, 26 Jun 2008 18:37:40 +0000 (+0100) -Subject: ARM: NEON optimised simple_idct -X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=215b9eaa8cf0195908c92f373c018320736ec106 - -ARM: NEON optimised simple_idct ---- - -diff --git a/libavcodec/Makefile b/libavcodec/Makefile -index 27746df..7fa02fa 100644 ---- a/libavcodec/Makefile -+++ b/libavcodec/Makefile -@@ -436,6 +436,7 @@ ASM_OBJS-$(HAVE_ARMV6) += armv4l/simple_idct_armv6.o \ - OBJS-$(HAVE_NEON) += armv4l/dsputil_neon.o \ - - ASM_OBJS-$(HAVE_NEON) += armv4l/dsputil_neon_s.o \ -+ armv4l/simple_idct_neon.o \ - - OBJS-$(HAVE_VIS) += sparc/dsputil_vis.o \ - sparc/simple_idct_vis.o \ -diff --git a/libavcodec/armv4l/dsputil_arm.c b/libavcodec/armv4l/dsputil_arm.c -index 89b51e7..942c0de 100644 ---- a/libavcodec/armv4l/dsputil_arm.c -+++ b/libavcodec/armv4l/dsputil_arm.c -@@ -43,6 +43,12 @@ extern void ff_simple_idct_put_armv6(uint8_t *dest, int line_size, - extern void ff_simple_idct_add_armv6(uint8_t *dest, int line_size, - DCTELEM *data); - -+extern void ff_simple_idct_neon(DCTELEM *data); -+extern void ff_simple_idct_put_neon(uint8_t *dest, int line_size, -+ DCTELEM *data); -+extern void ff_simple_idct_add_neon(uint8_t *dest, int line_size, -+ DCTELEM *data); -+ - /* XXX: local hack */ - static void (*ff_put_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size); - static void (*ff_add_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size); -@@ -233,6 +239,8 @@ void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx) - if(idct_algo == FF_IDCT_AUTO){ - #if defined(HAVE_IPP) - idct_algo = FF_IDCT_IPP; -+#elif defined(HAVE_NEON) -+ idct_algo = FF_IDCT_SIMPLENEON; - #elif defined(HAVE_ARMV6) - idct_algo = FF_IDCT_SIMPLEARMV6; - #elif defined(HAVE_ARMV5TE) -@@ -273,6 +281,13 @@ void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx) - c->idct = simple_idct_ipp; - c->idct_permutation_type= FF_NO_IDCT_PERM; - #endif -+#ifdef HAVE_NEON -+ } else if (idct_algo==FF_IDCT_SIMPLENEON){ -+ c->idct_put= ff_simple_idct_put_neon; -+ c->idct_add= ff_simple_idct_add_neon; -+ c->idct = ff_simple_idct_neon; -+ c->idct_permutation_type = FF_PARTTRANS_IDCT_PERM; -+#endif - } - } - -diff --git a/libavcodec/armv4l/simple_idct_neon.S b/libavcodec/armv4l/simple_idct_neon.S -new file mode 100644 -index 0000000..44701f8 ---- /dev/null -+++ b/libavcodec/armv4l/simple_idct_neon.S -@@ -0,0 +1,411 @@ -+/* -+ * ARM NEON IDCT -+ * -+ * Copyright (c) 2008 Mans Rullgard -+ * -+ * Based on Simple IDCT -+ * Copyright (c) 2001 Michael Niedermayer -+ * -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+#define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 -+#define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 -+#define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 -+#define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 -+#define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 -+#define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 -+#define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 -+#define W4c ((1<<(COL_SHIFT-1))/W4) -+#define ROW_SHIFT 11 -+#define COL_SHIFT 20 -+ -+#define w1 d0[0] -+#define w2 d0[1] -+#define w3 d0[2] -+#define w4 d0[3] -+#define w5 d1[0] -+#define w6 d1[1] -+#define w7 d1[2] -+#define w4c d1[3] -+ -+ .fpu neon -+ -+ .macro idct_col4_top -+ vmull.s16 q7, d6, w2 /* q9 = W2 * col[2] */ -+ vmull.s16 q8, d6, w6 /* q10 = W6 * col[2] */ -+ vmull.s16 q9, d4, w1 /* q9 = W1 * col[1] */ -+ vadd.i32 q11, q15, q7 -+ vmull.s16 q10, d4, w3 /* q10 = W3 * col[1] */ -+ vadd.i32 q12, q15, q8 -+ vmull.s16 q5, d4, w5 /* q5 = W5 * col[1] */ -+ vsub.i32 q13, q15, q8 -+ vmull.s16 q6, d4, w7 /* q6 = W7 * col[1] */ -+ vsub.i32 q14, q15, q7 -+ -+ vmlal.s16 q9, d8, w3 /* q9 += W3 * col[3] */ -+ vmlsl.s16 q10, d8, w7 /* q10 -= W7 * col[3] */ -+ vmlsl.s16 q5, d8, w1 /* q5 -= W1 * col[3] */ -+ vmlsl.s16 q6, d8, w5 /* q6 -= W5 * col[3] */ -+ .endm -+ -+ .text -+ .align -+ .type idct_row4_neon, %function -+ .func idct_row4_neon -+idct_row4_neon: -+ vmov.i32 q15, #(1<<(ROW_SHIFT-1)) -+ vld1.64 {d2-d5}, [a3,:128]! -+ vmlal.s16 q15, d2, w4 /* q15 += W4 * col[0] */ -+ vld1.64 {d6,d7}, [a3,:128]! -+ vorr d10, d3, d5 -+ vld1.64 {d8,d9}, [a3,:128]! -+ add a3, a3, #-64 -+ -+ vorr d11, d7, d9 -+ vorr d10, d10, d11 -+ vmov a4, v1, d10 -+ -+ idct_col4_top -+ -+ orrs a4, a4, v1 -+ beq 1f -+ -+ vmull.s16 q7, d3, w4 /* q7 = W4 * col[4] */ -+ vmlal.s16 q9, d5, w5 /* q9 += W5 * col[5] */ -+ vmlsl.s16 q10, d5, w1 /* q10 -= W1 * col[5] */ -+ vmull.s16 q8, d7, w2 /* q8 = W2 * col[6] */ -+ vmlal.s16 q5, d5, w7 /* q5 += W7 * col[5] */ -+ vadd.i32 q11, q11, q7 -+ vsub.i32 q12, q12, q7 -+ vsub.i32 q13, q13, q7 -+ vadd.i32 q14, q14, q7 -+ vmlal.s16 q6, d5, w3 /* q6 += W3 * col[5] */ -+ vmull.s16 q7, d7, w6 /* q7 = W6 * col[6] */ -+ vmlal.s16 q9, d9, w7 -+ vmlsl.s16 q10, d9, w5 -+ vmlal.s16 q5, d9, w3 -+ vmlsl.s16 q6, d9, w1 -+ vadd.i32 q11, q11, q7 -+ vsub.i32 q12, q12, q8 -+ vadd.i32 q13, q13, q8 -+ vsub.i32 q14, q14, q7 -+ -+1: vadd.i32 q3, q11, q9 -+ vadd.i32 q4, q12, q10 -+ vshrn.i32 d2, q3, #ROW_SHIFT -+ vshrn.i32 d4, q4, #ROW_SHIFT -+ vadd.i32 q7, q13, q5 -+ vadd.i32 q8, q14, q6 -+ vtrn.16 d2, d4 -+ vshrn.i32 d6, q7, #ROW_SHIFT -+ vshrn.i32 d8, q8, #ROW_SHIFT -+ vsub.i32 q14, q14, q6 -+ vsub.i32 q11, q11, q9 -+ vtrn.16 d6, d8 -+ vsub.i32 q13, q13, q5 -+ vshrn.i32 d3, q14, #ROW_SHIFT -+ vtrn.32 d2, d6 -+ vsub.i32 q12, q12, q10 -+ vtrn.32 d4, d8 -+ vshrn.i32 d5, q13, #ROW_SHIFT -+ vshrn.i32 d7, q12, #ROW_SHIFT -+ vshrn.i32 d9, q11, #ROW_SHIFT -+ -+ vtrn.16 d3, d5 -+ vtrn.16 d7, d9 -+ vtrn.32 d3, d7 -+ vtrn.32 d5, d9 -+ -+ vst1.64 {d2-d5}, [a3,:128]! -+ vst1.64 {d6-d9}, [a3,:128]! -+ -+ bx lr -+ .endfunc -+ -+ .align -+ .type idct_col4_neon, %function -+ .func idct_col4_neon -+idct_col4_neon: -+ mov ip, #16 -+ vld1.64 {d2}, [a3,:64], ip /* d2 = col[0] */ -+ vdup.16 d30, w4c -+ vld1.64 {d4}, [a3,:64], ip /* d3 = col[1] */ -+ vadd.i16 d30, d30, d2 -+ vld1.64 {d6}, [a3,:64], ip /* d4 = col[2] */ -+ vmull.s16 q15, d30, w4 /* q15 = W4*(col[0]+(1< -Date: Sun, 3 Aug 2008 16:46:43 +0000 (+0100) -Subject: ARM: NEON optimised vector_fmul_window -X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=98feb31064dccfd16ce189ff4aec9ccedddf6b04 - -ARM: NEON optimised vector_fmul_window ---- - -diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c -index f9d32c0..6c44940 100644 ---- a/libavcodec/armv4l/dsputil_neon.c -+++ b/libavcodec/armv4l/dsputil_neon.c -@@ -91,6 +91,10 @@ void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, - void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride); - void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride); - -+void ff_vector_fmul_window_neon(float *dst, const float *src0, -+ const float *src1, const float *win, -+ float add_bias, int len); -+ - void ff_float_to_int16_neon(int16_t *, const float *, long); - void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int); - -@@ -164,6 +168,8 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) - c->h264_idct_add = ff_h264_idct_add_neon; - c->h264_idct_dc_add = ff_h264_idct_dc_add_neon; - -+ c->vector_fmul_window = ff_vector_fmul_window_neon; -+ - c->float_to_int16 = ff_float_to_int16_neon; - c->float_to_int16_interleave = ff_float_to_int16_interleave_neon; - -diff --git a/libavcodec/armv4l/dsputil_neon_s.S b/libavcodec/armv4l/dsputil_neon_s.S -index 6a54803..49a09b8 100644 ---- a/libavcodec/armv4l/dsputil_neon_s.S -+++ b/libavcodec/armv4l/dsputil_neon_s.S -@@ -324,6 +324,49 @@ extern ff_float_to_int16_interleave_neon - pop {r4,r5,pc} - .endfunc - -+extern ff_vector_fmul_window_neon -+ vld1.32 {d16[],d17[]}, [sp,:32] -+ push {r4,r5,lr} -+ ldr lr, [sp, #16] -+ sub r2, r2, #8 -+ sub r5, lr, #2 -+ add r2, r2, r5, lsl #2 -+ add r4, r3, r5, lsl #3 -+ add ip, r0, r5, lsl #3 -+ mov r5, #-16 -+ dmb -+ vld1.64 {d0,d1}, [r1,:128]! -+ vld1.64 {d2,d3}, [r2,:128], r5 -+ vld1.64 {d4,d5}, [r3,:128]! -+ vld1.64 {d6,d7}, [r4,:128], r5 -+1: vmov q10, q8 -+ vmov q11, q8 -+ vmla.f32 q11, q0, q2 -+ vrev64.32 q3, q3 -+ vswp d6, d7 -+ vmla.f32 q10, q0, q3 -+ vrev64.32 q1, q1 -+ vswp d2, d3 -+ subs lr, lr, #4 -+ vmla.f32 q11, q1, q3 -+ vmls.f32 q10, q1, q2 -+ beq 2f -+ vld1.64 {d0,d1}, [r1,:128]! -+ vld1.64 {d2,d3}, [r2,:128], r5 -+ vld1.64 {d4,d5}, [r3,:128]! -+ vld1.64 {d6,d7}, [r4,:128], r5 -+ vrev64.32 q11, q11 -+ vswp d22, d23 -+ vst1.64 {d20,d21}, [r0,:128]! -+ vst1.64 {d22,d23}, [ip,:128], r5 -+ b 1b -+2: vrev64.32 q11, q11 -+ vswp d22, d23 -+ vst1.64 {d20,d21}, [r0,:128]! -+ vst1.64 {d22,d23}, [ip,:128], r5 -+ pop {r4,r5,pc} -+ .endfunc -+ - #ifdef CONFIG_VORBIS_DECODER - extern ff_vorbis_inverse_coupling_neon - vmov.i32 q10, #(1<<31) diff --git a/packages/mplayer/files/mru-neon-vector-fmul.diff b/packages/mplayer/files/mru-neon-vector-fmul.diff deleted file mode 100644 index 2710f10443..0000000000 --- a/packages/mplayer/files/mru-neon-vector-fmul.diff +++ /dev/null @@ -1,56 +0,0 @@ -From: Mans Rullgard -Date: Sun, 3 Aug 2008 17:13:06 +0000 (+0100) -Subject: ARM: NEON optimised vector_fmul -X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=ba46eb14e3be96b627fd096aacaa4dbb2e186281 - -ARM: NEON optimised vector_fmul ---- - -diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c -index 6c44940..c6fc173 100644 ---- a/libavcodec/armv4l/dsputil_neon.c -+++ b/libavcodec/armv4l/dsputil_neon.c -@@ -91,6 +91,7 @@ void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, - void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride); - void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride); - -+void ff_vector_fmul_neon(float *dst, const float *src, int len); - void ff_vector_fmul_window_neon(float *dst, const float *src0, - const float *src1, const float *win, - float add_bias, int len); -@@ -168,6 +169,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) - c->h264_idct_add = ff_h264_idct_add_neon; - c->h264_idct_dc_add = ff_h264_idct_dc_add_neon; - -+ c->vector_fmul = ff_vector_fmul_neon; - c->vector_fmul_window = ff_vector_fmul_window_neon; - - c->float_to_int16 = ff_float_to_int16_neon; -diff --git a/libavcodec/armv4l/dsputil_neon_s.S b/libavcodec/armv4l/dsputil_neon_s.S -index 49a09b8..7310700 100644 ---- a/libavcodec/armv4l/dsputil_neon_s.S -+++ b/libavcodec/armv4l/dsputil_neon_s.S -@@ -324,6 +324,23 @@ extern ff_float_to_int16_interleave_neon - pop {r4,r5,pc} - .endfunc - -+extern ff_vector_fmul_neon -+ mov r3, r0 -+ vld1.64 {d0-d3}, [r0,:128]! -+ vld1.64 {d4-d7}, [r1,:128]! -+ dmb -+1: subs r2, r2, #8 -+ vmul.f32 q8, q0, q2 -+ vmul.f32 q9, q1, q3 -+ beq 2f -+ vld1.64 {d0-d3}, [r0,:128]! -+ vld1.64 {d4-d7}, [r1,:128]! -+ vst1.64 {d16-d19}, [r3,:128]! -+ b 1b -+2: vst1.64 {d16-d19}, [r3,:128]! -+ bx lr -+ .endfunc -+ - extern ff_vector_fmul_window_neon - vld1.32 {d16[],d17[]}, [sp,:32] - push {r4,r5,lr} diff --git a/packages/mplayer/files/mru-neon-vorbis-inverse.diff b/packages/mplayer/files/mru-neon-vorbis-inverse.diff deleted file mode 100644 index 6cd5dc0134..0000000000 --- a/packages/mplayer/files/mru-neon-vorbis-inverse.diff +++ /dev/null @@ -1,68 +0,0 @@ -From: Mans Rullgard -Date: Fri, 1 Aug 2008 02:28:34 +0000 (+0100) -Subject: ARM: NEON optimised vorbis_inverse_coupling -X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=ac234c5ad52d8478be5aaa7c276e423873453d8b - -ARM: NEON optimised vorbis_inverse_coupling ---- - -diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c -index b584e5b..f9d32c0 100644 ---- a/libavcodec/armv4l/dsputil_neon.c -+++ b/libavcodec/armv4l/dsputil_neon.c -@@ -94,6 +94,8 @@ void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride); - void ff_float_to_int16_neon(int16_t *, const float *, long); - void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int); - -+void ff_vorbis_inverse_coupling_neon(float *mag, float *ang, int blocksize); -+ - void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) - { - c->put_pixels_tab[0][0] = ff_put_pixels16_neon; -@@ -164,4 +166,8 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) - - c->float_to_int16 = ff_float_to_int16_neon; - c->float_to_int16_interleave = ff_float_to_int16_interleave_neon; -+ -+#ifdef CONFIG_VORBIS_DECODER -+ c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_neon; -+#endif - } -diff --git a/libavcodec/armv4l/dsputil_neon_s.S b/libavcodec/armv4l/dsputil_neon_s.S -index 44f75ba..6a54803 100644 ---- a/libavcodec/armv4l/dsputil_neon_s.S -+++ b/libavcodec/armv4l/dsputil_neon_s.S -@@ -19,6 +19,8 @@ - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -+#include "config.h" -+ - .fpu neon - .text - -@@ -321,3 +323,24 @@ extern ff_float_to_int16_interleave_neon - bne 3b - pop {r4,r5,pc} - .endfunc -+ -+#ifdef CONFIG_VORBIS_DECODER -+extern ff_vorbis_inverse_coupling_neon -+ vmov.i32 q10, #(1<<31) -+ dmb -+1: vld1.64 {d2,d3}, [r1,:128] -+ vld1.64 {d0,d1}, [r0,:128] -+ vcle.f32 q8, q1, #0 -+ vand q9, q0, q10 -+ veor q1, q1, q9 -+ vand q2, q1, q8 -+ vbic q3, q1, q8 -+ vadd.f32 q1, q0, q2 -+ vsub.f32 q0, q0, q3 -+ subs r2, r2, #4 -+ vst1.64 {d0,d1}, [r1,:128]! -+ vst1.64 {d2,d3}, [r0,:128]! -+ bgt 1b -+ bx lr -+ .endfunc -+#endif diff --git a/packages/mplayer/files/omapfb.patch b/packages/mplayer/files/omapfb.patch index 5c9bca7a6f..860cf070f4 100644 --- a/packages/mplayer/files/omapfb.patch +++ b/packages/mplayer/files/omapfb.patch @@ -1,5 +1,5 @@ ---- a/libvo/video_out.c 2008-11-07 11:59:48.000000000 -0800 -+++ b/libvo/video_out.c 2008-11-07 12:01:52.000000000 -0800 +--- /tmp/video_out.c 2009-01-14 16:39:38.000000000 +0100 ++++ trunk/libvo/video_out.c 2009-01-14 16:40:11.000000000 +0100 @@ -86,6 +86,7 @@ extern vo_functions_t video_out_bl; extern vo_functions_t video_out_fbdev; @@ -8,22 +8,3 @@ extern vo_functions_t video_out_svga; extern vo_functions_t video_out_png; extern vo_functions_t video_out_ggi; -@@ -172,6 +173,7 @@ - #ifdef CONFIG_FBDEV - &video_out_fbdev, - &video_out_fbdev2, -+ &video_out_omapfb, - #endif - #ifdef CONFIG_SVGALIB - &video_out_svga, ---- a/configure 2008-11-07 12:00:32.000000000 -0800 -+++ b/configure 2008-11-07 12:13:31.000000000 -0800 -@@ -4558,7 +4558,7 @@ - fi - if test "$_fbdev" = yes ; then - _def_fbdev='#define CONFIG_FBDEV 1' -- _vosrc="$_vosrc vo_fbdev.c vo_fbdev2.c" -+ _vosrc="$_vosrc vo_fbdev.c vo_fbdev2.c vo_omapfb.c yuv.S" - _vomodules="fbdev $_vomodules" - else - _def_fbdev='#undef CONFIG_FBDEV' diff --git a/packages/mplayer/files/pld-onlyarm5-svn.patch b/packages/mplayer/files/pld-onlyarm5-svn.patch new file mode 100644 index 0000000000..0924060c6c --- /dev/null +++ b/packages/mplayer/files/pld-onlyarm5-svn.patch @@ -0,0 +1,405 @@ +--- MPlayer-1.0pre8/libavcodec/arm/dsputil_arm_s.S.orig 2006-07-03 09:53:33.000000000 +0100 ++++ MPlayer-1.0pre8/libavcodec/arm/dsputil_arm_s.S 2006-07-03 10:06:58.000000000 +0100 +@@ -16,6 +16,13 @@ + @ License along with this library; if not, write to the Free Software + @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + @ ++#if defined(__ARM_ARCH_5__) || \ ++ defined(__ARM_ARCH_5T__) || \ ++ defined(__ARM_ARCH_5TE__) ++#define PLD(code...) code ++#else ++#define PLD(code...) ++#endif + + .macro ADJ_ALIGN_QUADWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4 + mov \Rd0, \Rn0, lsr #(\shift * 8) +@@ -74,7 +81,7 @@ + put_pixels16_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned +- pld [r1] ++ PLD ( pld [r1] ) + stmfd sp!, {r4-r11, lr} @ R14 is also called LR + adr r5, 5f + ands r4, r1, #3 +@@ -85,7 +92,7 @@ + ldmia r1, {r4-r7} + add r1, r1, r2 + stmia r0, {r4-r7} +- pld [r1] ++ PLD ( pld [r1] ) + subs r3, r3, #1 + add r0, r0, r2 + bne 1b +@@ -95,7 +102,7 @@ + ldmia r1, {r4-r8} + add r1, r1, r2 + ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8 +- pld [r1] ++ PLD ( pld [r1] ) + subs r3, r3, #1 + stmia r0, {r9-r12} + add r0, r0, r2 +@@ -106,7 +113,7 @@ + ldmia r1, {r4-r8} + add r1, r1, r2 + ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8 +- pld [r1] ++ PLD ( pld [r1] ) + subs r3, r3, #1 + stmia r0, {r9-r12} + add r0, r0, r2 +@@ -117,7 +124,7 @@ + ldmia r1, {r4-r8} + add r1, r1, r2 + ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8 +- pld [r1] ++ PLD ( pld [r1] ) + subs r3, r3, #1 + stmia r0, {r9-r12} + add r0, r0, r2 +@@ -136,7 +143,7 @@ + put_pixels8_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned +- pld [r1] ++ PLD ( pld [r1] ) + stmfd sp!, {r4-r5,lr} @ R14 is also called LR + adr r5, 5f + ands r4, r1, #3 +@@ -147,7 +154,7 @@ + ldmia r1, {r4-r5} + add r1, r1, r2 + subs r3, r3, #1 +- pld [r1] ++ PLD ( pld [r1] ) + stmia r0, {r4-r5} + add r0, r0, r2 + bne 1b +@@ -157,7 +164,7 @@ + ldmia r1, {r4-r5, r12} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12 +- pld [r1] ++ PLD ( pld [r1] ) + subs r3, r3, #1 + stmia r0, {r4-r5} + add r0, r0, r2 +@@ -168,7 +175,7 @@ + ldmia r1, {r4-r5, r12} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12 +- pld [r1] ++ PLD ( pld [r1] ) + subs r3, r3, #1 + stmia r0, {r4-r5} + add r0, r0, r2 +@@ -179,7 +186,7 @@ + ldmia r1, {r4-r5, r12} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12 +- pld [r1] ++ PLD ( pld [r1] ) + subs r3, r3, #1 + stmia r0, {r4-r5} + add r0, r0, r2 +@@ -198,7 +205,7 @@ + put_pixels8_x2_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned +- pld [r1] ++ PLD ( pld [r1] ) + stmfd sp!, {r4-r10,lr} @ R14 is also called LR + adr r5, 5f + ands r4, r1, #3 +@@ -210,7 +217,7 @@ + ldmia r1, {r4-r5, r10} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 +- pld [r1] ++ PLD ( pld [r1] ) + RND_AVG32 r8, r9, r4, r5, r6, r7, r12 + subs r3, r3, #1 + stmia r0, {r8-r9} +@@ -223,7 +230,7 @@ + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 + ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10 +- pld [r1] ++ PLD ( pld [r1] ) + RND_AVG32 r4, r5, r6, r7, r8, r9, r12 + subs r3, r3, #1 + stmia r0, {r4-r5} +@@ -236,7 +243,7 @@ + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10 + ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10 +- pld [r1] ++ PLD ( pld [r1] ) + RND_AVG32 r4, r5, r6, r7, r8, r9, r12 + subs r3, r3, #1 + stmia r0, {r4-r5} +@@ -248,7 +255,7 @@ + ldmia r1, {r4-r5, r10} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10 +- pld [r1] ++ PLD ( pld [r1] ) + RND_AVG32 r8, r9, r6, r7, r5, r10, r12 + subs r3, r3, #1 + stmia r0, {r8-r9} +@@ -267,7 +274,7 @@ + put_no_rnd_pixels8_x2_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned +- pld [r1] ++ PLD ( pld [r1] ) + stmfd sp!, {r4-r10,lr} @ R14 is also called LR + adr r5, 5f + ands r4, r1, #3 +@@ -279,7 +286,7 @@ + ldmia r1, {r4-r5, r10} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 +- pld [r1] ++ PLD ( pld [r1] ) + NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 + subs r3, r3, #1 + stmia r0, {r8-r9} +@@ -292,7 +299,7 @@ + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 + ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10 +- pld [r1] ++ PLD ( pld [r1] ) + NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 + subs r3, r3, #1 + stmia r0, {r4-r5} +@@ -305,7 +312,7 @@ + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10 + ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10 +- pld [r1] ++ PLD ( pld [r1] ) + NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 + subs r3, r3, #1 + stmia r0, {r4-r5} +@@ -317,7 +324,7 @@ + ldmia r1, {r4-r5, r10} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10 +- pld [r1] ++ PLD ( pld [r1] ) + NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12 + subs r3, r3, #1 + stmia r0, {r8-r9} +@@ -338,7 +345,7 @@ + put_pixels8_y2_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned +- pld [r1] ++ PLD ( pld [r1] ) + stmfd sp!, {r4-r11,lr} @ R14 is also called LR + adr r5, 5f + ands r4, r1, #3 +@@ -352,13 +359,13 @@ + add r1, r1, r2 + 6: ldmia r1, {r6-r7} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + RND_AVG32 r8, r9, r4, r5, r6, r7, r12 + ldmia r1, {r4-r5} + add r1, r1, r2 + stmia r0, {r8-r9} + add r0, r0, r2 +- pld [r1] ++ PLD ( pld [r1] ) + RND_AVG32 r8, r9, r6, r7, r4, r5, r12 + subs r3, r3, #1 + stmia r0, {r8-r9} +@@ -369,18 +376,18 @@ + 2: + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 + 6: ldmia r1, {r7-r9} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9 + RND_AVG32 r10, r11, r4, r5, r7, r8, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 + subs r3, r3, #1 + RND_AVG32 r10, r11, r7, r8, r4, r5, r12 +@@ -392,18 +399,18 @@ + 3: + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 + 6: ldmia r1, {r7-r9} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9 + RND_AVG32 r10, r11, r4, r5, r7, r8, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 + subs r3, r3, #1 + RND_AVG32 r10, r11, r7, r8, r4, r5, r12 +@@ -415,18 +422,18 @@ + 4: + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 + 6: ldmia r1, {r7-r9} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9 + RND_AVG32 r10, r11, r4, r5, r7, r8, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 + subs r3, r3, #1 + RND_AVG32 r10, r11, r7, r8, r4, r5, r12 +@@ -447,7 +454,7 @@ + put_no_rnd_pixels8_y2_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned +- pld [r1] ++ PLD ( pld [r1] ) + stmfd sp!, {r4-r11,lr} @ R14 is also called LR + adr r5, 5f + ands r4, r1, #3 +@@ -461,13 +468,13 @@ + add r1, r1, r2 + 6: ldmia r1, {r6-r7} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 + ldmia r1, {r4-r5} + add r1, r1, r2 + stmia r0, {r8-r9} + add r0, r0, r2 +- pld [r1] ++ PLD ( pld [r1] ) + NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12 + subs r3, r3, #1 + stmia r0, {r8-r9} +@@ -478,18 +485,18 @@ + 2: + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 + 6: ldmia r1, {r7-r9} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9 + NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 + subs r3, r3, #1 + NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 +@@ -501,18 +508,18 @@ + 3: + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 + 6: ldmia r1, {r7-r9} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9 + NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 + subs r3, r3, #1 + NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 +@@ -524,18 +531,18 @@ + 4: + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 + 6: ldmia r1, {r7-r9} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9 + NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 + subs r3, r3, #1 + NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 +@@ -562,7 +569,7 @@ + ldmia r1, {r8-r10} + .endif + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + .if \align == 0 + ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8 + .elseif \align == 1 +@@ -624,7 +631,7 @@ + put_pixels8_xy2_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned +- pld [r1] ++ PLD ( pld [r1] ) + stmfd sp!, {r4-r11,lr} @ R14 is also called LR + adrl r12, 5f + ands r4, r1, #3 +@@ -661,7 +668,7 @@ + put_no_rnd_pixels8_xy2_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned +- pld [r1] ++ PLD ( pld [r1] ) + stmfd sp!, {r4-r11,lr} @ R14 is also called LR + adrl r12, 5f + ands r4, r1, #3 diff --git a/packages/mplayer/mplayer_svn.bb b/packages/mplayer/mplayer_svn.bb index d532a9f018..fa39d796fd 100644 --- a/packages/mplayer/mplayer_svn.bb +++ b/packages/mplayer/mplayer_svn.bb @@ -2,34 +2,24 @@ DESCRIPTION = "Open Source multimedia player." SECTION = "multimedia" PRIORITY = "optional" HOMEPAGE = "http://www.mplayerhq.hu/" -DEPENDS = "virtual/libsdl ffmpeg xsp zlib libpng jpeg liba52 freetype fontconfig alsa-lib lzo ncurses lame libxv virtual/libx11 virtual/kernel \ - ${@base_conditional('ENTERPRISE_DISTRO', '1', '', 'libmad liba52 lame', d)}" +DEPENDS = "libdvdread libtheora virtual/libsdl ffmpeg xsp zlib libpng jpeg liba52 freetype fontconfig alsa-lib lzo ncurses lame libxv virtual/libx11 virtual/kernel \ + ${@base_conditional('ENTERPRISE_DISTRO', '1', '', 'libmad liba52 lame', d)}" RDEPENDS = "mplayer-common" LICENSE = "GPL" SRC_URI = "svn://svn.mplayerhq.hu/mplayer;module=trunk \ - file://Makefile-codec-cfg.patch;patch=1 \ - file://pld-onlyarm5.patch;patch=1 \ - file://makefile-nostrip-svn.patch;patch=1 \ - file://mru-neon-put-pixels.diff;patch=1 \ - file://mru-neon-simple-idct.diff;patch=1 \ - file://mru-neon-h264-chrome.diff;patch=1 \ - file://mru-neon-h264-loopfilter.diff;patch=1 \ - file://mru-neon-h264-qpel.diff;patch=1 \ - file://mru-neon-h264idctadd.diff;patch=1 \ - file://mru-neon-h264idct-dc.diff;patch=1 \ - file://mru-neon-float-to-int16.diff;patch=1 \ - file://mru-neon-vorbis-inverse.diff;patch=1 \ - file://mru-neon-vector-fmul-window.diff;patch=1 \ - file://mru-neon-vector-fmul.diff;patch=1 \ + file://Makefile-codec-cfg.patch;patch=1 \ + file://pld-onlyarm5-svn.patch;patch=1 \ + file://makefile-nostrip-svn.patch;patch=1 \ file://configh \ - file://configmak \ - " + file://configmak \ + " -SRC_URI_append_armv7a = " file://omapfb.patch;patch=1 \ - file://vo_omapfb.c \ - file://yuv.S \ - " +SRC_URI_append_armv7a = " \ +# file://omapfb.patch;patch=1 \ + file://vo_omapfb.c \ + file://yuv.S \ + " # This is required for the collie machine only as all stacks in that # machine seem to be set to executable by the toolchain. If someone @@ -47,7 +37,7 @@ RCONFLICTS_${PN} = "mplayer-atty" RREPLACES_${PN} = "mplayer-atty" PV = "0.0+1.0rc2+svnr${SRCREV}" -PR = "r8" +PR = "r9" DEFAULT_PREFERENCE = "-1" DEFAULT_PREFERENCE_armv7a = "1" @@ -66,9 +56,9 @@ inherit autotools pkgconfig STAGING_KERNEL_DIR = "${STAGING_DIR}/${MACHINE_ARCH}${TARGET_VENDOR}-${TARGET_OS}/kernel" EXTRA_OECONF = " \ - --prefix=/usr \ + --prefix=/usr \ --mandir=${mandir} \ - --target=${TARGET_SYS} \ + --target=${SIMPLE_TARGET_SYS} \ \ --enable-mencoder \ --disable-gui \ @@ -76,113 +66,113 @@ EXTRA_OECONF = " \ --disable-linux-devfs \ --disable-lirc \ --disable-lircc \ - --disable-joystick \ - --disable-vm \ - --disable-xf86keysym \ + --disable-joystick \ + --disable-vm \ + --disable-xf86keysym \ --enable-tv \ - --enable-tv-v4l1 \ + --enable-tv-v4l1 \ --enable-tv-v4l2 \ - --disable-tv-bsdbt848 \ + --disable-tv-bsdbt848 \ --enable-rtc \ - --enable-network \ + --enable-network \ --disable-smb \ - --disable-live \ + --disable-live \ --disable-dvdnav \ - --disable-dvdread \ - --disable-libdvdcss-internal \ + --enable-dvdread \ --disable-dvdread-internal \ - --disable-cdparanoia \ - --enable-freetype \ - --disable-menu \ - --enable-sortsub \ - --disable-fribidi \ - --disable-enca \ - --disable-macosx \ - --disable-macosx-bundle \ - --disable-ftp \ - --disable-vstream \ - \ - --disable-gif \ - --enable-png \ - --enable-jpeg \ - --disable-libcdio \ - --disable-liblzo \ - --disable-qtx \ - --disable-xanim \ - --disable-real \ - --disable-xvid \ - --disable-x264 \ - \ - --disable-libavutil_so \ - --disable-libavcodec_so \ - --disable-libavformat_so \ - --disable-libpostproc_so \ - \ + --enable-libdvdcss-internal \ + --disable-cdparanoia \ + --enable-freetype \ + --enable-menu \ + --enable-sortsub \ + --disable-fribidi \ + --disable-enca \ + --disable-macosx \ + --disable-macosx-bundle \ + --disable-ftp \ + --disable-vstream \ + \ + --disable-gif \ + --enable-png \ + --enable-jpeg \ + --disable-libcdio \ + --disable-liblzo \ + --disable-qtx \ + --disable-xanim \ + --disable-real \ + --disable-xvid \ + --disable-x264 \ + \ + --disable-libavutil_so \ + --disable-libavcodec_so \ + --disable-libavformat_so \ + --disable-libpostproc_so \ + \ --enable-tremor-low \ - \ - --disable-speex \ - --disable-theora \ - --disable-faac \ - --disable-ladspa \ - --disable-libdv \ - --enable-mad \ - --disable-toolame \ - --disable-twolame \ - --disable-xmms \ + \ + --disable-speex \ + --enable-theora \ + --disable-faac \ + --disable-ladspa \ + --disable-libdv \ + --enable-mad \ + --disable-toolame \ + --disable-twolame \ + --disable-xmms \ --disable-mp3lib \ - --enable-libmpeg2 \ - --disable-musepack \ + --enable-libmpeg2 \ + --disable-musepack \ \ - --disable-gl \ - --disable-vesa \ - --disable-svga \ + --disable-gl \ + --disable-vesa \ + --disable-svga \ --enable-sdl \ - --disable-aa \ - --disable-caca \ - --disable-ggi \ - --disable-ggiwmh \ - --disable-directx \ - --disable-dxr2 \ - --disable-dxr3 \ - --disable-dvb \ - --disable-dvbhead \ - --disable-mga \ - --disable-xmga \ - --enable-xv \ - --disable-xvmc \ - --disable-vm \ - --disable-xinerama \ - --enable-x11 \ + --disable-aa \ + --disable-caca \ + --disable-ggi \ + --disable-ggiwmh \ + --disable-directx \ + --disable-dxr2 \ + --disable-dxr3 \ + --disable-dvb \ + --disable-dvbhead \ + --disable-mga \ + --disable-xmga \ + --enable-xv \ + --disable-xvmc \ + --disable-vm \ + --disable-xinerama \ + --enable-x11 \ --enable-fbdev \ - --disable-mlib \ - --disable-3dfx \ - --disable-tdfxfb \ - --disable-s3fb \ - --disable-directfb \ - --disable-zr \ - --disable-bl \ - --disable-tdfxvid \ - --disable-tga \ - --disable-pnm \ - --disable-md5sum \ - \ - --enable-alsa \ - --enable-ossaudio \ - --disable-arts \ - --disable-esd \ - --disable-pulse \ - --disable-jack \ - --disable-openal \ - --disable-nas \ - --disable-sgiaudio \ - --disable-sunaudio \ - --disable-win32waveout \ - --enable-select \ - \ - " + --disable-mlib \ + --disable-3dfx \ + --disable-tdfxfb \ + --disable-s3fb \ + --disable-directfb \ + --disable-zr \ + --disable-bl \ + --disable-tdfxvid \ + --disable-tga \ + --disable-pnm \ + --disable-md5sum \ + \ + --enable-alsa \ + --enable-ossaudio \ + --disable-arts \ + --disable-esd \ + --disable-pulse \ + --disable-jack \ + --disable-openal \ + --disable-nas \ + --disable-sgiaudio \ + --disable-sunaudio \ + --disable-win32waveout \ + --enable-select \ + \ + " EXTRA_OECONF_append_arm = " --disable-decoder=vorbis_decoder \ - --disable-encoder=vorbis_encoder" + --disable-encoder=vorbis_encoder" EXTRA_OECONF_append_armv6 = " --enable-armv6" EXTRA_OECONF_append_armv7a = " --enable-armv6" @@ -210,14 +200,17 @@ do_configure_prepend_armv7a() { cp ${STAGING_KERNEL_DIR}/include/asm-arm/arch-omap/omapfb.h ${S}/libvo/omapfb.h || true } +CFLAGS_append = " -I${S}/libdvdread4 " + do_configure() { sed -i 's|/usr/include|${STAGING_INCDIR}|g' ${S}/configure sed -i 's|/usr/lib|${STAGING_LIBDIR}|g' ${S}/configure sed -i 's|/usr/\S*include[\w/]*||g' ${S}/configure sed -i 's|/usr/\S*lib[\w/]*||g' ${S}/configure - ./configure ${EXTRA_OECONF} - + export SIMPLE_TARGET_SYS="$(echo ${TARGET_SYS} | sed s:${TARGET_VENDOR}::g)" + ./configure ${EXTRA_OECONF} + cat ${WORKDIR}/configh >> ${S}/config.h cat ${WORKDIR}/configmak ${OPTSMAK} >> ${S}/config.mak -- cgit v1.2.3