diff options
| author | Koen Kooi <koen@openembedded.org> | 2008-09-24 18:14:11 +0000 |
|---|---|---|
| committer | Koen Kooi <koen@openembedded.org> | 2008-09-24 18:14:11 +0000 |
| commit | 05bc62adb05c2821ec7608f0513c149edc30952b (patch) | |
| tree | e1197178f004928b0d11ed6e2e43fad4b7ee8f98 /packages | |
| parent | f4da94042bddb70a89c709aa43589256130ef0b2 (diff) | |
mplayer svn: update SRCREV, remove w100 and pxafb patches that don't apply anymore, add patches for NEON
Diffstat (limited to 'packages')
24 files changed, 3194 insertions, 37 deletions
diff --git a/packages/mplayer/files/Makefile-codec-cfg.patch b/packages/mplayer/files/Makefile-codec-cfg.patch index 84c17a9017..9ce22a8839 100644 --- a/packages/mplayer/files/Makefile-codec-cfg.patch +++ b/packages/mplayer/files/Makefile-codec-cfg.patch @@ -1,11 +1,11 @@ ---- /tmp/Makefile 2008-06-10 20:55:43.100403024 +0200 -+++ trunk/Makefile 2008-06-10 20:56:10.881647093 +0200 -@@ -731,7 +731,7 @@ - $(CC) -o $@ $^ $(LDFLAGS_MENCODER) +--- /tmp/Makefile 2008-09-24 19:24:26.000000000 +0200 ++++ trunk/Makefile 2008-09-24 19:25:01.683198000 +0200 +@@ -752,7 +752,7 @@ + $(CC) -o $@ $^ $(LDFLAGS_MPLAYER) codec-cfg$(EXESUF): codec-cfg.c codec-cfg.h help_mp.h -- $(HOST_CC) -O -I. -DCODECS2HTML -o $@ $< -+ $(BUILD_CC) -O -I. -DCODECS2HTML -o $@ $< +- $(HOST_CC) -O -DCODECS2HTML $(EXTRA_INC) -o $@ $< ++ $(BUILD_CC) -O -DCODECS2HTML $(EXTRA_INC) -o $@ $< codecs.conf.h: codec-cfg$(EXESUF) etc/codecs.conf - ./codec-cfg$(EXESUF) ./etc/codecs.conf > $@ + ./$^ > $@ diff --git a/packages/mplayer/files/armv5te/.mtn2git_empty b/packages/mplayer/files/armv5te/.mtn2git_empty new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/packages/mplayer/files/armv5te/.mtn2git_empty diff --git a/packages/mplayer/files/armv5te/configh b/packages/mplayer/files/armv5te/configh new file mode 100644 index 0000000000..46c647e2d5 --- /dev/null +++ b/packages/mplayer/files/armv5te/configh @@ -0,0 +1,6 @@ +#define HAVE_LLRINT 1 +#define HAVE_ROUNDF 1 +#define ARCH_ARMV4L 1 +#define ENABLE_ARMV4L 1 +#define HAVE_ARMV5TE 1 +#define ENABLE_ARMV5TE 1 diff --git a/packages/mplayer/files/armv5te/configmak b/packages/mplayer/files/armv5te/configmak new file mode 100644 index 0000000000..aa9978515d --- /dev/null +++ b/packages/mplayer/files/armv5te/configmak @@ -0,0 +1,3 @@ +ARCH_ARMV4L=yes +HAVE_ARMV5TE=yes + diff --git a/packages/mplayer/files/armv6/.mtn2git_empty b/packages/mplayer/files/armv6/.mtn2git_empty new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/packages/mplayer/files/armv6/.mtn2git_empty diff --git a/packages/mplayer/files/armv6/configh b/packages/mplayer/files/armv6/configh new file mode 100644 index 0000000000..2301e723d6 --- /dev/null +++ b/packages/mplayer/files/armv6/configh @@ -0,0 +1,8 @@ +#define HAVE_LLRINT 1 +#define HAVE_ROUNDF 1 +#define ARCH_ARMV4L 1 +#define ENABLE_ARMV4L 1 +#define HAVE_ARMV5TE 1 +#define ENABLE_ARMV5TE 1 +#define HAVE_ARMV6 1 +#define ENABLE_ARMV6 1 diff --git a/packages/mplayer/files/armv6/configmak b/packages/mplayer/files/armv6/configmak new file mode 100644 index 0000000000..4db5dc0dfd --- /dev/null +++ b/packages/mplayer/files/armv6/configmak @@ -0,0 +1,3 @@ +ARCH_ARMV4L=yes +HAVE_ARMV5TE=yes +HAVE_ARMV6=yes diff --git a/packages/mplayer/files/armv7a/.mtn2git_empty b/packages/mplayer/files/armv7a/.mtn2git_empty new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/packages/mplayer/files/armv7a/.mtn2git_empty diff --git a/packages/mplayer/files/armv7a/configh b/packages/mplayer/files/armv7a/configh new file mode 100644 index 0000000000..245e40f56a --- /dev/null +++ b/packages/mplayer/files/armv7a/configh @@ -0,0 +1,14 @@ +#define HAVE_LLRINT 1 +#define HAVE_ROUNDF 1 +#define ARCH_ARMV4L 1 +#define ENABLE_ARMV4L 1 +#define HAVE_ARMV5TE 1 +#define ENABLE_ARMV5TE 1 +#define HAVE_ARMV6 1 +#define ENABLE_ARMV6 1 +#define HAVE_ARMV6T2 1 +#define ENABLE_ARMV6T2 1 +#define HAVE_ARMVFP 1 +#define ENABLE_ARMVFP 1 +#define HAVE_NEON 1 +#define ENABLE_NEON 1 diff --git a/packages/mplayer/files/armv7a/configmak b/packages/mplayer/files/armv7a/configmak new file mode 100644 index 0000000000..50d549f794 --- /dev/null +++ b/packages/mplayer/files/armv7a/configmak @@ -0,0 +1,6 @@ +ARCH_ARMV4L=yes +HAVE_ARMV5TE=yes +HAVE_ARMV6=yes +HAVE_ARMV6T2=yes +HAVE_ARMVFP=yes +HAVE_NEON=yes diff --git a/packages/mplayer/files/configh b/packages/mplayer/files/configh new file mode 100644 index 0000000000..2fe7658383 --- /dev/null +++ b/packages/mplayer/files/configh @@ -0,0 +1,2 @@ +#define HAVE_LLRINT 1 +#define HAVE_ROUNDF 1 diff --git a/packages/mplayer/files/configmak b/packages/mplayer/files/configmak new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/packages/mplayer/files/configmak diff --git a/packages/mplayer/files/mru-neon-float-to-int16.diff b/packages/mplayer/files/mru-neon-float-to-int16.diff new file mode 100644 index 0000000000..7a874cab30 --- /dev/null +++ b/packages/mplayer/files/mru-neon-float-to-int16.diff @@ -0,0 +1,107 @@ +From: Mans Rullgard <mans@mansr.com> +Date: Thu, 31 Jul 2008 02:35:42 +0000 (+0100) +Subject: ARM: NEON optimised float_to_int16 +X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=f16a738cfc3307cbcba2f9c8aff4b5aa43144731 + +ARM: NEON optimised float_to_int16 +--- + +diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c +index 6dbe835..b584e5b 100644 +--- a/libavcodec/armv4l/dsputil_neon.c ++++ b/libavcodec/armv4l/dsputil_neon.c +@@ -91,6 +91,9 @@ void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, + void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride); + void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride); + ++void ff_float_to_int16_neon(int16_t *, const float *, long); ++void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int); ++ + void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) + { + c->put_pixels_tab[0][0] = ff_put_pixels16_neon; +@@ -158,4 +161,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) + + c->h264_idct_add = ff_h264_idct_add_neon; + c->h264_idct_dc_add = ff_h264_idct_dc_add_neon; ++ ++ c->float_to_int16 = ff_float_to_int16_neon; ++ c->float_to_int16_interleave = ff_float_to_int16_interleave_neon; + } +diff --git a/libavcodec/armv4l/dsputil_neon_s.S b/libavcodec/armv4l/dsputil_neon_s.S +index fc5e401..44f75ba 100644 +--- a/libavcodec/armv4l/dsputil_neon_s.S ++++ b/libavcodec/armv4l/dsputil_neon_s.S +@@ -252,3 +252,72 @@ + defun2 put_pixels8_x2, _no_rnd, vhadd.u8 + defun2 put_pixels8_y2, _no_rnd, vhadd.u8 + defun2 put_pixels8_xy2, _no_rnd, vshrn.u16, 1 ++ ++extern ff_float_to_int16_neon ++ dmb ++1: vld1.64 {d0-d3}, [r1,:128]! ++ vcvt.s32.f32 q2, q0 ++ vcvt.s32.f32 q3, q1 ++ subs r2, r2, #8 ++ vqmovn.s32 d4, q2 ++ vqmovn.s32 d5, q3 ++ vst1.64 {d4-d5}, [r0,:128]! ++ bgt 1b ++ bx lr ++ .endfunc ++ ++extern ff_float_to_int16_interleave_neon ++ cmp r3, #2 ++ ldrlt r1, [r1] ++ blt ff_float_to_int16_neon ++ bne 2f ++ ++ ldr ip, [r1] ++ ldr r1, [r1, #4] ++ vld1.64 {d0-d3}, [ip,:128]! ++ vld1.64 {d4-d7}, [r1,:128]! ++ dmb ++1: vcvt.s32.f32 q8, q0 ++ vcvt.s32.f32 q9, q1 ++ vcvt.s32.f32 q10, q2 ++ vcvt.s32.f32 q11, q3 ++ subs r2, r2, #8 ++ vqmovn.s32 d16, q8 ++ vqmovn.s32 d17, q9 ++ vqmovn.s32 d18, q10 ++ vqmovn.s32 d19, q11 ++ beq 1f ++ vld1.64 {d0-d3}, [ip,:128]! ++ vld1.64 {d4-d7}, [r1,:128]! ++ vst2.16 {d16-d19}, [r0,:64]! ++ b 1b ++1: vst2.16 {d16-d19}, [r0,:64]! ++ bx lr ++ ++2: push {r4,r5,lr} ++ lsls r4, r3, #1 ++ dmb ++ b 4f ++3: vld1.64 {d0-d3}, [ip,:128]! ++ vcvt.s32.f32 q2, q0 ++ vcvt.s32.f32 q3, q1 ++ subs lr, lr, #8 ++ vqmovn.s32 d4, q2 ++ vqmovn.s32 d5, q3 ++ vst1.16 {d4[0]}, [r5,:16], r4 ++ vst1.16 {d4[1]}, [r5,:16], r4 ++ vst1.16 {d4[2]}, [r5,:16], r4 ++ vst1.16 {d4[3]}, [r5,:16], r4 ++ vst1.16 {d5[0]}, [r5,:16], r4 ++ vst1.16 {d5[1]}, [r5,:16], r4 ++ vst1.16 {d5[2]}, [r5,:16], r4 ++ vst1.16 {d5[3]}, [r5,:16], r4 ++ bgt 3b ++ subs r3, r3, #1 ++4: ldr ip, [r1], #4 ++ mov lr, r2 ++ mov r5, r0 ++ add r0, r0, #2 ++ bne 3b ++ pop {r4,r5,pc} ++ .endfunc diff --git a/packages/mplayer/files/mru-neon-h264-chrome.diff b/packages/mplayer/files/mru-neon-h264-chrome.diff new file mode 100644 index 0000000000..cb6c4ff991 --- /dev/null +++ b/packages/mplayer/files/mru-neon-h264-chrome.diff @@ -0,0 +1,364 @@ +From: Mans Rullgard <mans@mansr.com> +Date: Fri, 11 Jul 2008 01:20:07 +0000 (+0100) +Subject: ARM: NEON optimised {put,avg}_h264_chroma_mc[48] +X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=d3aa8f93b8a0061e0c3ac12aeed055961abfc113 + +ARM: NEON optimised {put,avg}_h264_chroma_mc[48] +--- + +diff --git a/libavcodec/Makefile b/libavcodec/Makefile +index 7fa02fa..36ba158 100644 +--- a/libavcodec/Makefile ++++ b/libavcodec/Makefile +@@ -437,6 +437,7 @@ OBJS-$(HAVE_NEON) += armv4l/dsputil_neon.o \ + + ASM_OBJS-$(HAVE_NEON) += armv4l/dsputil_neon_s.o \ + armv4l/simple_idct_neon.o \ ++ armv4l/h264dsp_neon.o \ + + OBJS-$(HAVE_VIS) += sparc/dsputil_vis.o \ + sparc/simple_idct_vis.o \ +diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c +index 8a10dde..a6d86cd 100644 +--- a/libavcodec/armv4l/dsputil_neon.c ++++ b/libavcodec/armv4l/dsputil_neon.c +@@ -42,6 +42,12 @@ void ff_put_pixels8_xy2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); + void ff_put_h264_qpel16_mc00_neon(uint8_t *, uint8_t *, int); + void ff_put_h264_qpel8_mc00_neon(uint8_t *, uint8_t *, int); + ++void ff_put_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int); ++void ff_put_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); ++ ++void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int); ++void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); ++ + void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) + { + c->put_pixels_tab[0][0] = ff_put_pixels16_neon; +@@ -62,6 +68,12 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) + c->put_no_rnd_pixels_tab[1][2] = ff_put_pixels8_y2_no_rnd_neon; + c->put_no_rnd_pixels_tab[1][3] = ff_put_pixels8_xy2_no_rnd_neon; + ++ c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon; ++ c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon; ++ ++ c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_neon; ++ c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_neon; ++ + c->put_h264_qpel_pixels_tab[0][0] = ff_put_h264_qpel16_mc00_neon; + c->put_h264_qpel_pixels_tab[1][0] = ff_put_h264_qpel8_mc00_neon; + } +diff --git a/libavcodec/armv4l/h264dsp_neon.S b/libavcodec/armv4l/h264dsp_neon.S +new file mode 100644 +index 0000000..28d9aa7 +--- /dev/null ++++ b/libavcodec/armv4l/h264dsp_neon.S +@@ -0,0 +1,308 @@ ++/* ++ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++ .fpu neon ++ ++/* chroma_mc8(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */ ++ .macro h264_chroma_mc8 avg=0 ++ push {r4-r7, lr} ++ ldrd r4, [sp, #20] ++.if \avg ++ mov lr, r0 ++.endif ++ pld [r1] ++ pld [r1, r2] ++ ++ muls r7, r4, r5 ++ rsb r6, r7, r5, lsl #3 ++ rsb ip, r7, r4, lsl #3 ++ sub r4, r7, r4, lsl #3 ++ sub r4, r4, r5, lsl #3 ++ add r4, r4, #64 ++ ++ dmb ++ ++ beq 2f ++ ++ add r5, r1, r2 ++ ++ vdup.8 d0, r4 ++ lsl r4, r2, #1 ++ vdup.8 d1, ip ++ vld1.64 {d4, d5}, [r1], r4 ++ vdup.8 d2, r6 ++ vld1.64 {d6, d7}, [r5], r4 ++ vdup.8 d3, r7 ++ ++ vext.8 d5, d4, d5, #1 ++ vext.8 d7, d6, d7, #1 ++ ++1: pld [r5] ++ vmull.u8 q8, d4, d0 ++ vmlal.u8 q8, d5, d1 ++ vld1.64 {d4, d5}, [r1], r4 ++ vmlal.u8 q8, d6, d2 ++ vext.8 d5, d4, d5, #1 ++ vmlal.u8 q8, d7, d3 ++ vmull.u8 q9, d6, d0 ++ subs r3, r3, #2 ++ vmlal.u8 q9, d7, d1 ++ vmlal.u8 q9, d4, d2 ++ vmlal.u8 q9, d5, d3 ++ vrshrn.u16 d16, q8, #6 ++ vld1.64 {d6, d7}, [r5], r4 ++ pld [r1] ++ vrshrn.u16 d17, q9, #6 ++.if \avg ++ vld1.64 {d20}, [lr,:64], r2 ++ vld1.64 {d21}, [lr,:64], r2 ++ vrhadd.u8 q8, q8, q10 ++.endif ++ vext.8 d7, d6, d7, #1 ++ vst1.64 {d16}, [r0,:64], r2 ++ vst1.64 {d17}, [r0,:64], r2 ++ bgt 1b ++ ++ pop {r4-r7, pc} ++ ++2: tst r6, r6 ++ add ip, ip, r6 ++ vdup.8 d0, r4 ++ vdup.8 d1, ip ++ ++ beq 4f ++ ++ add r5, r1, r2 ++ lsl r4, r2, #1 ++ vld1.64 {d4}, [r1], r4 ++ vld1.64 {d6}, [r5], r4 ++ ++3: pld [r5] ++ vmull.u8 q8, d4, d0 ++ vmlal.u8 q8, d6, d1 ++ vld1.64 {d4}, [r1], r4 ++ vmull.u8 q9, d6, d0 ++ vmlal.u8 q9, d4, d1 ++ vld1.64 {d6}, [r5], r4 ++ vrshrn.u16 d16, q8, #6 ++ vrshrn.u16 d17, q9, #6 ++.if \avg ++ vld1.64 {d20}, [lr,:64], r2 ++ vld1.64 {d21}, [lr,:64], r2 ++ vrhadd.u8 q8, q8, q10 ++.endif ++ subs r3, r3, #2 ++ pld [r1] ++ vst1.64 {d16}, [r0,:64], r2 ++ vst1.64 {d17}, [r0,:64], r2 ++ bgt 3b ++ ++ pop {r4-r7, pc} ++ ++4: vld1.64 {d4, d5}, [r1], r2 ++ vld1.64 {d6, d7}, [r1], r2 ++ vext.8 d5, d4, d5, #1 ++ vext.8 d7, d6, d7, #1 ++ ++5: pld [r1] ++ subs r3, r3, #2 ++ vmull.u8 q8, d4, d0 ++ vmlal.u8 q8, d5, d1 ++ vld1.64 {d4, d5}, [r1], r2 ++ vmull.u8 q9, d6, d0 ++ vmlal.u8 q9, d7, d1 ++ pld [r1] ++ vext.8 d5, d4, d5, #1 ++ vrshrn.u16 d16, q8, #6 ++ vrshrn.u16 d17, q9, #6 ++.if \avg ++ vld1.64 {d20}, [lr,:64], r2 ++ vld1.64 {d21}, [lr,:64], r2 ++ vrhadd.u8 q8, q8, q10 ++.endif ++ vld1.64 {d6, d7}, [r1], r2 ++ vext.8 d7, d6, d7, #1 ++ vst1.64 {d16}, [r0,:64], r2 ++ vst1.64 {d17}, [r0,:64], r2 ++ bgt 5b ++ ++ pop {r4-r7, pc} ++ .endm ++ ++/* chroma_mc4(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */ ++ .macro h264_chroma_mc4 avg=0 ++ push {r4-r7, lr} ++ ldrd r4, [sp, #20] ++.if \avg ++ mov lr, r0 ++.endif ++ pld [r1] ++ pld [r1, r2] ++ ++ muls r7, r4, r5 ++ rsb r6, r7, r5, lsl #3 ++ rsb ip, r7, r4, lsl #3 ++ sub r4, r7, r4, lsl #3 ++ sub r4, r4, r5, lsl #3 ++ add r4, r4, #64 ++ ++ dmb ++ ++ beq 2f ++ ++ add r5, r1, r2 ++ ++ vdup.8 d0, r4 ++ lsl r4, r2, #1 ++ vdup.8 d1, ip ++ vld1.64 {d4}, [r1], r4 ++ vdup.8 d2, r6 ++ vld1.64 {d6}, [r5], r4 ++ vdup.8 d3, r7 ++ ++ vext.8 d5, d4, d5, #1 ++ vext.8 d7, d6, d7, #1 ++ vtrn.32 d4, d5 ++ vtrn.32 d6, d7 ++ ++ vtrn.32 d0, d1 ++ vtrn.32 d2, d3 ++ ++1: pld [r5] ++ vmull.u8 q8, d4, d0 ++ vmlal.u8 q8, d6, d2 ++ vld1.64 {d4}, [r1], r4 ++ vext.8 d5, d4, d5, #1 ++ vtrn.32 d4, d5 ++ vmull.u8 q9, d6, d0 ++ vmlal.u8 q9, d4, d2 ++ vld1.64 {d6}, [r5], r4 ++ vadd.i16 d16, d16, d17 ++ vadd.i16 d17, d18, d19 ++ vrshrn.u16 d16, q8, #6 ++ subs r3, r3, #2 ++ pld [r1] ++.if \avg ++ vld1.32 {d20[0]}, [lr,:32], r2 ++ vld1.32 {d20[1]}, [lr,:32], r2 ++ vrhadd.u8 d16, d16, d20 ++.endif ++ vext.8 d7, d6, d7, #1 ++ vtrn.32 d6, d7 ++ vst1.32 {d16[0]}, [r0,:32], r2 ++ vst1.32 {d16[1]}, [r0,:32], r2 ++ bgt 1b ++ ++ pop {r4-r7, pc} ++ ++2: tst r6, r6 ++ add ip, ip, r6 ++ vdup.8 d0, r4 ++ vdup.8 d1, ip ++ vtrn.32 d0, d1 ++ ++ beq 4f ++ ++ vext.32 d1, d0, d1, #1 ++ add r5, r1, r2 ++ lsl r4, r2, #1 ++ vld1.32 {d4[0]}, [r1], r4 ++ vld1.32 {d4[1]}, [r5], r4 ++ ++3: pld [r5] ++ vmull.u8 q8, d4, d0 ++ vld1.32 {d4[0]}, [r1], r4 ++ vmull.u8 q9, d4, d1 ++ vld1.32 {d4[1]}, [r5], r4 ++ vadd.i16 d16, d16, d17 ++ vadd.i16 d17, d18, d19 ++ vrshrn.u16 d16, q8, #6 ++.if \avg ++ vld1.32 {d20[0]}, [lr,:32], r2 ++ vld1.32 {d20[1]}, [lr,:32], r2 ++ vrhadd.u8 d16, d16, d20 ++.endif ++ subs r3, r3, #2 ++ pld [r1] ++ vst1.32 {d16[0]}, [r0,:32], r2 ++ vst1.32 {d16[1]}, [r0,:32], r2 ++ bgt 3b ++ ++ pop {r4-r7, pc} ++ ++4: vld1.64 {d4}, [r1], r2 ++ vld1.64 {d6}, [r1], r2 ++ vext.8 d5, d4, d5, #1 ++ vext.8 d7, d6, d7, #1 ++ vtrn.32 d4, d5 ++ vtrn.32 d6, d7 ++ ++5: vmull.u8 q8, d4, d0 ++ vmull.u8 q9, d6, d0 ++ subs r3, r3, #2 ++ vld1.64 {d4}, [r1], r2 ++ vext.8 d5, d4, d5, #1 ++ vtrn.32 d4, d5 ++ vadd.i16 d16, d16, d17 ++ vadd.i16 d17, d18, d19 ++ pld [r1] ++ vrshrn.u16 d16, q8, #6 ++.if \avg ++ vld1.32 {d20[0]}, [lr,:32], r2 ++ vld1.32 {d20[1]}, [lr,:32], r2 ++ vrhadd.u8 d16, d16, d20 ++.endif ++ vld1.64 {d6}, [r1], r2 ++ vext.8 d7, d6, d7, #1 ++ vtrn.32 d6, d7 ++ pld [r1] ++ vst1.32 {d16[0]}, [r0,:32], r2 ++ vst1.32 {d16[1]}, [r0,:32], r2 ++ bgt 5b ++ ++ pop {r4-r7, pc} ++ .endm ++ ++ .text ++ .align ++ ++ .global ff_put_h264_chroma_mc8_neon ++ .func ff_put_h264_chroma_mc8_neon ++ff_put_h264_chroma_mc8_neon: ++ h264_chroma_mc8 ++ .endfunc ++ ++ .global ff_avg_h264_chroma_mc8_neon ++ .func ff_avg_h264_chroma_mc8_neon ++ff_avg_h264_chroma_mc8_neon: ++ h264_chroma_mc8 avg=1 ++ .endfunc ++ ++ .global ff_put_h264_chroma_mc4_neon ++ .func ff_put_h264_chroma_mc4_neon ++ff_put_h264_chroma_mc4_neon: ++ h264_chroma_mc4 ++ .endfunc ++ ++ .global ff_avg_h264_chroma_mc4_neon ++ .func ff_avg_h264_chroma_mc4_neon ++ff_avg_h264_chroma_mc4_neon: ++ h264_chroma_mc4 avg=1 ++ .endfunc diff --git a/packages/mplayer/files/mru-neon-h264-loopfilter.diff b/packages/mplayer/files/mru-neon-h264-loopfilter.diff new file mode 100644 index 0000000000..056702517b --- /dev/null +++ b/packages/mplayer/files/mru-neon-h264-loopfilter.diff @@ -0,0 +1,346 @@ +From: Mans Rullgard <mans@mansr.com> +Date: Fri, 15 Aug 2008 00:02:55 +0000 (+0100) +Subject: ARM: NEON optimised H.264 loop filter +X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=0c1b6bb0814587bd4c8a895c6d7dc2dd4cc2841a + +ARM: NEON optimised H.264 loop filter +--- + +diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c +index a6d86cd..68ecbe8 100644 +--- a/libavcodec/armv4l/dsputil_neon.c ++++ b/libavcodec/armv4l/dsputil_neon.c +@@ -48,6 +48,15 @@ void ff_put_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); + void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int); + void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); + ++void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha, ++ int beta, int8_t *tc0); ++void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha, ++ int beta, int8_t *tc0); ++void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, ++ int beta, int8_t *tc0); ++void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, ++ int beta, int8_t *tc0); ++ + void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) + { + c->put_pixels_tab[0][0] = ff_put_pixels16_neon; +@@ -76,4 +85,9 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) + + c->put_h264_qpel_pixels_tab[0][0] = ff_put_h264_qpel16_mc00_neon; + c->put_h264_qpel_pixels_tab[1][0] = ff_put_h264_qpel8_mc00_neon; ++ ++ c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon; ++ c->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon; ++ c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon; ++ c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon; + } +diff --git a/libavcodec/armv4l/h264dsp_neon.S b/libavcodec/armv4l/h264dsp_neon.S +index 28d9aa7..ac793b2 100644 +--- a/libavcodec/armv4l/h264dsp_neon.S ++++ b/libavcodec/armv4l/h264dsp_neon.S +@@ -306,3 +306,303 @@ ff_put_h264_chroma_mc4_neon: + ff_avg_h264_chroma_mc4_neon: + h264_chroma_mc4 avg=1 + .endfunc ++ ++ /* H.264 loop filter */ ++ ++ .macro h264_loop_filter_start ++ ldr ip, [sp] ++ tst r2, r2 ++ ldr ip, [ip] ++ tstne r3, r3 ++ vmov.32 d24[0], ip ++ and ip, ip, ip, lsl #16 ++ bxeq lr ++ ands ip, ip, ip, lsl #8 ++ bxlt lr ++ .endm ++ ++ .macro align_push_regs ++ and ip, sp, #15 ++ add ip, ip, #32 ++ sub sp, sp, ip ++ dmb ++ vst1.64 {d12-d15}, [sp,:128] ++ sub sp, sp, #32 ++ vst1.64 {d8-d11}, [sp,:128] ++ .endm ++ ++ .macro align_pop_regs ++ vld1.64 {d8-d11}, [sp,:128]! ++ vld1.64 {d12-d15}, [sp,:128], ip ++ .endm ++ ++ .macro h264_loop_filter_luma ++ vdup.8 q11, r2 @ alpha ++ vmovl.u8 q12, d24 ++ vabd.u8 q6, q8, q0 @ abs(p0 - q0) ++ vmovl.u16 q12, d24 ++ vabd.u8 q14, q9, q8 @ abs(p1 - p0) ++ vsli.16 q12, q12, #8 ++ vabd.u8 q15, q1, q0 @ abs(q1 - q0) ++ vsli.32 q12, q12, #16 ++ vclt.u8 q6, q6, q11 @ < alpha ++ vdup.8 q11, r3 @ beta ++ vclt.s8 q7, q12, #0 ++ vclt.u8 q14, q14, q11 @ < beta ++ vclt.u8 q15, q15, q11 @ < beta ++ vbic q6, q6, q7 ++ vabd.u8 q4, q10, q8 @ abs(p2 - p0) ++ vand q6, q6, q14 ++ vabd.u8 q5, q2, q0 @ abs(q2 - q0) ++ vclt.u8 q4, q4, q11 @ < beta ++ vand q6, q6, q15 ++ vclt.u8 q5, q5, q11 @ < beta ++ vand q4, q4, q6 ++ vand q5, q5, q6 ++ vand q12, q12, q6 ++ vrhadd.u8 q14, q8, q0 ++ vsub.i8 q6, q12, q4 ++ vqadd.u8 q7, q9, q12 ++ vhadd.u8 q10, q10, q14 ++ vsub.i8 q6, q6, q5 ++ vhadd.u8 q14, q2, q14 ++ vmin.u8 q7, q7, q10 ++ vqsub.u8 q11, q9, q12 ++ vqadd.u8 q2, q1, q12 ++ vmax.u8 q7, q7, q11 ++ vqsub.u8 q11, q1, q12 ++ vmin.u8 q14, q2, q14 ++ vmovl.u8 q2, d0 ++ vmax.u8 q14, q14, q11 ++ vmovl.u8 q10, d1 ++ vsubw.u8 q2, q2, d16 ++ vsubw.u8 q10, q10, d17 ++ vshl.i16 q2, q2, #2 ++ vshl.i16 q10, q10, #2 ++ vaddw.u8 q2, q2, d18 ++ vaddw.u8 q10, q10, d19 ++ vsubw.u8 q2, q2, d2 ++ vsubw.u8 q10, q10, d3 ++ vrshrn.i16 d4, q2, #3 ++ vrshrn.i16 d5, q10, #3 ++ vbsl q4, q7, q9 ++ vbsl q5, q14, q1 ++ vneg.s8 q7, q6 ++ vmovl.u8 q14, d16 ++ vmin.s8 q2, q2, q6 ++ vmovl.u8 q6, d17 ++ vmax.s8 q2, q2, q7 ++ vmovl.u8 q11, d0 ++ vmovl.u8 q12, d1 ++ vaddw.s8 q14, q14, d4 ++ vaddw.s8 q6, q6, d5 ++ vsubw.s8 q11, q11, d4 ++ vsubw.s8 q12, q12, d5 ++ vqmovun.s16 d16, q14 ++ vqmovun.s16 d17, q6 ++ vqmovun.s16 d0, q11 ++ vqmovun.s16 d1, q12 ++ .endm ++ ++ .global ff_h264_v_loop_filter_luma_neon ++ .func ff_h264_v_loop_filter_luma_neon ++ff_h264_v_loop_filter_luma_neon: ++ h264_loop_filter_start ++ ++ vld1.64 {d0, d1}, [r0,:128], r1 ++ vld1.64 {d2, d3}, [r0,:128], r1 ++ vld1.64 {d4, d5}, [r0,:128], r1 ++ sub r0, r0, r1, lsl #2 ++ sub r0, r0, r1, lsl #1 ++ vld1.64 {d20,d21}, [r0,:128], r1 ++ vld1.64 {d18,d19}, [r0,:128], r1 ++ vld1.64 {d16,d17}, [r0,:128], r1 ++ ++ align_push_regs ++ ++ h264_loop_filter_luma ++ ++ sub r0, r0, r1, lsl #1 ++ vst1.64 {d8, d9}, [r0,:128], r1 ++ vst1.64 {d16,d17}, [r0,:128], r1 ++ vst1.64 {d0, d1}, [r0,:128], r1 ++ vst1.64 {d10,d11}, [r0,:128] ++ ++ align_pop_regs ++ bx lr ++ .endfunc ++ ++ .global ff_h264_h_loop_filter_luma_neon ++ .func ff_h264_h_loop_filter_luma_neon ++ff_h264_h_loop_filter_luma_neon: ++ h264_loop_filter_start ++ ++ sub r0, r0, #4 ++ vld1.64 {d6}, [r0], r1 ++ vld1.64 {d20}, [r0], r1 ++ vld1.64 {d18}, [r0], r1 ++ vld1.64 {d16}, [r0], r1 ++ vld1.64 {d0}, [r0], r1 ++ vld1.64 {d2}, [r0], r1 ++ vld1.64 {d4}, [r0], r1 ++ vld1.64 {d26}, [r0], r1 ++ vld1.64 {d7}, [r0], r1 ++ vld1.64 {d21}, [r0], r1 ++ vld1.64 {d19}, [r0], r1 ++ vld1.64 {d17}, [r0], r1 ++ vld1.64 {d1}, [r0], r1 ++ vld1.64 {d3}, [r0], r1 ++ vld1.64 {d5}, |
