summaryrefslogtreecommitdiff
path: root/packages/mplayer/files/mru-neon-vector-fmul-window.diff
diff options
context:
space:
mode:
Diffstat (limited to 'packages/mplayer/files/mru-neon-vector-fmul-window.diff')
-rw-r--r--packages/mplayer/files/mru-neon-vector-fmul-window.diff86
1 files changed, 86 insertions, 0 deletions
diff --git a/packages/mplayer/files/mru-neon-vector-fmul-window.diff b/packages/mplayer/files/mru-neon-vector-fmul-window.diff
new file mode 100644
index 0000000000..03ac55bc56
--- /dev/null
+++ b/packages/mplayer/files/mru-neon-vector-fmul-window.diff
@@ -0,0 +1,86 @@
+From: Mans Rullgard <mans@mansr.com>
+Date: Sun, 3 Aug 2008 16:46:43 +0000 (+0100)
+Subject: ARM: NEON optimised vector_fmul_window
+X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=98feb31064dccfd16ce189ff4aec9ccedddf6b04
+
+ARM: NEON optimised vector_fmul_window
+---
+
+diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c
+index f9d32c0..6c44940 100644
+--- a/libavcodec/armv4l/dsputil_neon.c
++++ b/libavcodec/armv4l/dsputil_neon.c
+@@ -91,6 +91,10 @@ void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
+ void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride);
+ void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride);
+
++void ff_vector_fmul_window_neon(float *dst, const float *src0,
++ const float *src1, const float *win,
++ float add_bias, int len);
++
+ void ff_float_to_int16_neon(int16_t *, const float *, long);
+ void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int);
+
+@@ -164,6 +168,8 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
+ c->h264_idct_add = ff_h264_idct_add_neon;
+ c->h264_idct_dc_add = ff_h264_idct_dc_add_neon;
+
++ c->vector_fmul_window = ff_vector_fmul_window_neon;
++
+ c->float_to_int16 = ff_float_to_int16_neon;
+ c->float_to_int16_interleave = ff_float_to_int16_interleave_neon;
+
+diff --git a/libavcodec/armv4l/dsputil_neon_s.S b/libavcodec/armv4l/dsputil_neon_s.S
+index 6a54803..49a09b8 100644
+--- a/libavcodec/armv4l/dsputil_neon_s.S
++++ b/libavcodec/armv4l/dsputil_neon_s.S
+@@ -324,6 +324,49 @@ extern ff_float_to_int16_interleave_neon
+ pop {r4,r5,pc}
+ .endfunc
+
++extern ff_vector_fmul_window_neon
++ vld1.32 {d16[],d17[]}, [sp,:32]
++ push {r4,r5,lr}
++ ldr lr, [sp, #16]
++ sub r2, r2, #8
++ sub r5, lr, #2
++ add r2, r2, r5, lsl #2
++ add r4, r3, r5, lsl #3
++ add ip, r0, r5, lsl #3
++ mov r5, #-16
++ dmb
++ vld1.64 {d0,d1}, [r1,:128]!
++ vld1.64 {d2,d3}, [r2,:128], r5
++ vld1.64 {d4,d5}, [r3,:128]!
++ vld1.64 {d6,d7}, [r4,:128], r5
++1: vmov q10, q8
++ vmov q11, q8
++ vmla.f32 q11, q0, q2
++ vrev64.32 q3, q3
++ vswp d6, d7
++ vmla.f32 q10, q0, q3
++ vrev64.32 q1, q1
++ vswp d2, d3
++ subs lr, lr, #4
++ vmla.f32 q11, q1, q3
++ vmls.f32 q10, q1, q2
++ beq 2f
++ vld1.64 {d0,d1}, [r1,:128]!
++ vld1.64 {d2,d3}, [r2,:128], r5
++ vld1.64 {d4,d5}, [r3,:128]!
++ vld1.64 {d6,d7}, [r4,:128], r5
++ vrev64.32 q11, q11
++ vswp d22, d23
++ vst1.64 {d20,d21}, [r0,:128]!
++ vst1.64 {d22,d23}, [ip,:128], r5
++ b 1b
++2: vrev64.32 q11, q11
++ vswp d22, d23
++ vst1.64 {d20,d21}, [r0,:128]!
++ vst1.64 {d22,d23}, [ip,:128], r5
++ pop {r4,r5,pc}
++ .endfunc
++
+ #ifdef CONFIG_VORBIS_DECODER
+ extern ff_vorbis_inverse_coupling_neon
+ vmov.i32 q10, #(1<<31)