diff options
Diffstat (limited to 'packages/mplayer/files/mru-neon-h264idct-dc.diff')
-rw-r--r-- | packages/mplayer/files/mru-neon-h264idct-dc.diff | 55 |
1 files changed, 55 insertions, 0 deletions
diff --git a/packages/mplayer/files/mru-neon-h264idct-dc.diff b/packages/mplayer/files/mru-neon-h264idct-dc.diff new file mode 100644 index 0000000000..9f316b1b5b --- /dev/null +++ b/packages/mplayer/files/mru-neon-h264idct-dc.diff @@ -0,0 +1,55 @@ +From: Mans Rullgard <mans@mansr.com> +Date: Mon, 25 Aug 2008 00:05:54 +0000 (+0100) +Subject: ARM: NEON optimised h264_idct_dc_add +X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=1097c36b47b5019b2a8668f82796ffe76f482408 + +ARM: NEON optimised h264_idct_dc_add +--- + +diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c +index 74f9b4d..6dbe835 100644 +--- a/libavcodec/armv4l/dsputil_neon.c ++++ b/libavcodec/armv4l/dsputil_neon.c +@@ -89,6 +89,7 @@ void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, + int beta, int8_t *tc0); + + void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride); ++void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride); + + void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) + { +@@ -156,4 +157,5 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) + c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon; + + c->h264_idct_add = ff_h264_idct_add_neon; ++ c->h264_idct_dc_add = ff_h264_idct_dc_add_neon; + } +diff --git a/libavcodec/armv4l/h264idct_neon.S b/libavcodec/armv4l/h264idct_neon.S +index 8f456f3..34e217f 100644 +--- a/libavcodec/armv4l/h264idct_neon.S ++++ b/libavcodec/armv4l/h264idct_neon.S +@@ -75,3 +75,24 @@ ff_h264_idct_add_neon: + + bx lr + .endfunc ++ ++ .global ff_h264_idct_dc_add_neon ++ .func ff_h264_idct_dc_add_neon ++ff_h264_idct_dc_add_neon: ++ vld1.16 {d2[],d3[]}, [r1,:16] ++ vrshr.s16 q1, q1, #6 ++ vld1.32 {d0[0]}, [r0,:32], r2 ++ vld1.32 {d0[1]}, [r0,:32], r2 ++ vaddw.u8 q2, q1, d0 ++ vld1.32 {d1[0]}, [r0,:32], r2 ++ vld1.32 {d1[1]}, [r0,:32], r2 ++ vaddw.u8 q1, q1, d1 ++ vqmovun.s16 d0, q2 ++ vqmovun.s16 d1, q1 ++ sub r0, r0, r2, lsl #2 ++ vst1.32 {d0[0]}, [r0,:32], r2 ++ vst1.32 {d0[1]}, [r0,:32], r2 ++ vst1.32 {d1[0]}, [r0,:32], r2 ++ vst1.32 {d1[1]}, [r0,:32], r2 ++ bx lr ++ .endfunc |