diff options
Diffstat (limited to 'packages/mplayer/files/mru-neon-vector-fmul.diff')
-rw-r--r-- | packages/mplayer/files/mru-neon-vector-fmul.diff | 56 |
1 files changed, 56 insertions, 0 deletions
diff --git a/packages/mplayer/files/mru-neon-vector-fmul.diff b/packages/mplayer/files/mru-neon-vector-fmul.diff new file mode 100644 index 0000000000..2710f10443 --- /dev/null +++ b/packages/mplayer/files/mru-neon-vector-fmul.diff @@ -0,0 +1,56 @@ +From: Mans Rullgard <mans@mansr.com> +Date: Sun, 3 Aug 2008 17:13:06 +0000 (+0100) +Subject: ARM: NEON optimised vector_fmul +X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=ba46eb14e3be96b627fd096aacaa4dbb2e186281 + +ARM: NEON optimised vector_fmul +--- + +diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c +index 6c44940..c6fc173 100644 +--- a/libavcodec/armv4l/dsputil_neon.c ++++ b/libavcodec/armv4l/dsputil_neon.c +@@ -91,6 +91,7 @@ void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, + void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride); + void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride); + ++void ff_vector_fmul_neon(float *dst, const float *src, int len); + void ff_vector_fmul_window_neon(float *dst, const float *src0, + const float *src1, const float *win, + float add_bias, int len); +@@ -168,6 +169,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) + c->h264_idct_add = ff_h264_idct_add_neon; + c->h264_idct_dc_add = ff_h264_idct_dc_add_neon; + ++ c->vector_fmul = ff_vector_fmul_neon; + c->vector_fmul_window = ff_vector_fmul_window_neon; + + c->float_to_int16 = ff_float_to_int16_neon; +diff --git a/libavcodec/armv4l/dsputil_neon_s.S b/libavcodec/armv4l/dsputil_neon_s.S +index 49a09b8..7310700 100644 +--- a/libavcodec/armv4l/dsputil_neon_s.S ++++ b/libavcodec/armv4l/dsputil_neon_s.S +@@ -324,6 +324,23 @@ extern ff_float_to_int16_interleave_neon + pop {r4,r5,pc} + .endfunc + ++extern ff_vector_fmul_neon ++ mov r3, r0 ++ vld1.64 {d0-d3}, [r0,:128]! ++ vld1.64 {d4-d7}, [r1,:128]! ++ dmb ++1: subs r2, r2, #8 ++ vmul.f32 q8, q0, q2 ++ vmul.f32 q9, q1, q3 ++ beq 2f ++ vld1.64 {d0-d3}, [r0,:128]! ++ vld1.64 {d4-d7}, [r1,:128]! ++ vst1.64 {d16-d19}, [r3,:128]! ++ b 1b ++2: vst1.64 {d16-d19}, [r3,:128]! ++ bx lr ++ .endfunc ++ + extern ff_vector_fmul_window_neon + vld1.32 {d16[],d17[]}, [sp,:32] + push {r4,r5,lr} |