diff options
Diffstat (limited to 'packages/mplayer/files/mru-neon-vector-fmul-window.diff')
-rw-r--r-- | packages/mplayer/files/mru-neon-vector-fmul-window.diff | 86 |
1 files changed, 86 insertions, 0 deletions
diff --git a/packages/mplayer/files/mru-neon-vector-fmul-window.diff b/packages/mplayer/files/mru-neon-vector-fmul-window.diff new file mode 100644 index 0000000000..03ac55bc56 --- /dev/null +++ b/packages/mplayer/files/mru-neon-vector-fmul-window.diff @@ -0,0 +1,86 @@ +From: Mans Rullgard <mans@mansr.com> +Date: Sun, 3 Aug 2008 16:46:43 +0000 (+0100) +Subject: ARM: NEON optimised vector_fmul_window +X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=98feb31064dccfd16ce189ff4aec9ccedddf6b04 + +ARM: NEON optimised vector_fmul_window +--- + +diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c +index f9d32c0..6c44940 100644 +--- a/libavcodec/armv4l/dsputil_neon.c ++++ b/libavcodec/armv4l/dsputil_neon.c +@@ -91,6 +91,10 @@ void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, + void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride); + void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride); + ++void ff_vector_fmul_window_neon(float *dst, const float *src0, ++ const float *src1, const float *win, ++ float add_bias, int len); ++ + void ff_float_to_int16_neon(int16_t *, const float *, long); + void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int); + +@@ -164,6 +168,8 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) + c->h264_idct_add = ff_h264_idct_add_neon; + c->h264_idct_dc_add = ff_h264_idct_dc_add_neon; + ++ c->vector_fmul_window = ff_vector_fmul_window_neon; ++ + c->float_to_int16 = ff_float_to_int16_neon; + c->float_to_int16_interleave = ff_float_to_int16_interleave_neon; + +diff --git a/libavcodec/armv4l/dsputil_neon_s.S b/libavcodec/armv4l/dsputil_neon_s.S +index 6a54803..49a09b8 100644 +--- a/libavcodec/armv4l/dsputil_neon_s.S ++++ b/libavcodec/armv4l/dsputil_neon_s.S +@@ -324,6 +324,49 @@ extern ff_float_to_int16_interleave_neon + pop {r4,r5,pc} + .endfunc + ++extern ff_vector_fmul_window_neon ++ vld1.32 {d16[],d17[]}, [sp,:32] ++ push {r4,r5,lr} ++ ldr lr, [sp, #16] ++ sub r2, r2, #8 ++ sub r5, lr, #2 ++ add r2, r2, r5, lsl #2 ++ add r4, r3, r5, lsl #3 ++ add ip, r0, r5, lsl #3 ++ mov r5, #-16 ++ dmb ++ vld1.64 {d0,d1}, [r1,:128]! ++ vld1.64 {d2,d3}, [r2,:128], r5 ++ vld1.64 {d4,d5}, [r3,:128]! ++ vld1.64 {d6,d7}, [r4,:128], r5 ++1: vmov q10, q8 ++ vmov q11, q8 ++ vmla.f32 q11, q0, q2 ++ vrev64.32 q3, q3 ++ vswp d6, d7 ++ vmla.f32 q10, q0, q3 ++ vrev64.32 q1, q1 ++ vswp d2, d3 ++ subs lr, lr, #4 ++ vmla.f32 q11, q1, q3 ++ vmls.f32 q10, q1, q2 ++ beq 2f ++ vld1.64 {d0,d1}, [r1,:128]! ++ vld1.64 {d2,d3}, [r2,:128], r5 ++ vld1.64 {d4,d5}, [r3,:128]! ++ vld1.64 {d6,d7}, [r4,:128], r5 ++ vrev64.32 q11, q11 ++ vswp d22, d23 ++ vst1.64 {d20,d21}, [r0,:128]! ++ vst1.64 {d22,d23}, [ip,:128], r5 ++ b 1b ++2: vrev64.32 q11, q11 ++ vswp d22, d23 ++ vst1.64 {d20,d21}, [r0,:128]! ++ vst1.64 {d22,d23}, [ip,:128], r5 ++ pop {r4,r5,pc} ++ .endfunc ++ + #ifdef CONFIG_VORBIS_DECODER + extern ff_vorbis_inverse_coupling_neon + vmov.i32 q10, #(1<<31) |