summaryrefslogtreecommitdiff
path: root/packages/mplayer/files/mru-neon-h264idct-dc.diff
diff options
context:
space:
mode:
Diffstat (limited to 'packages/mplayer/files/mru-neon-h264idct-dc.diff')
-rw-r--r--packages/mplayer/files/mru-neon-h264idct-dc.diff55
1 files changed, 55 insertions, 0 deletions
diff --git a/packages/mplayer/files/mru-neon-h264idct-dc.diff b/packages/mplayer/files/mru-neon-h264idct-dc.diff
new file mode 100644
index 0000000000..9f316b1b5b
--- /dev/null
+++ b/packages/mplayer/files/mru-neon-h264idct-dc.diff
@@ -0,0 +1,55 @@
+From: Mans Rullgard <mans@mansr.com>
+Date: Mon, 25 Aug 2008 00:05:54 +0000 (+0100)
+Subject: ARM: NEON optimised h264_idct_dc_add
+X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=1097c36b47b5019b2a8668f82796ffe76f482408
+
+ARM: NEON optimised h264_idct_dc_add
+---
+
+diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c
+index 74f9b4d..6dbe835 100644
+--- a/libavcodec/armv4l/dsputil_neon.c
++++ b/libavcodec/armv4l/dsputil_neon.c
+@@ -89,6 +89,7 @@ void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
+ int beta, int8_t *tc0);
+
+ void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride);
++void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride);
+
+ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
+ {
+@@ -156,4 +157,5 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
+ c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;
+
+ c->h264_idct_add = ff_h264_idct_add_neon;
++ c->h264_idct_dc_add = ff_h264_idct_dc_add_neon;
+ }
+diff --git a/libavcodec/armv4l/h264idct_neon.S b/libavcodec/armv4l/h264idct_neon.S
+index 8f456f3..34e217f 100644
+--- a/libavcodec/armv4l/h264idct_neon.S
++++ b/libavcodec/armv4l/h264idct_neon.S
+@@ -75,3 +75,24 @@ ff_h264_idct_add_neon:
+
+ bx lr
+ .endfunc
++
++ .global ff_h264_idct_dc_add_neon
++ .func ff_h264_idct_dc_add_neon
++ff_h264_idct_dc_add_neon:
++ vld1.16 {d2[],d3[]}, [r1,:16]
++ vrshr.s16 q1, q1, #6
++ vld1.32 {d0[0]}, [r0,:32], r2
++ vld1.32 {d0[1]}, [r0,:32], r2
++ vaddw.u8 q2, q1, d0
++ vld1.32 {d1[0]}, [r0,:32], r2
++ vld1.32 {d1[1]}, [r0,:32], r2
++ vaddw.u8 q1, q1, d1
++ vqmovun.s16 d0, q2
++ vqmovun.s16 d1, q1
++ sub r0, r0, r2, lsl #2
++ vst1.32 {d0[0]}, [r0,:32], r2
++ vst1.32 {d0[1]}, [r0,:32], r2
++ vst1.32 {d1[0]}, [r0,:32], r2
++ vst1.32 {d1[1]}, [r0,:32], r2
++ bx lr
++ .endfunc