diff options
Diffstat (limited to 'packages/mplayer/files/mru-neon-vector-fmul-window.diff')
-rw-r--r-- | packages/mplayer/files/mru-neon-vector-fmul-window.diff | 86 |
1 files changed, 0 insertions, 86 deletions
diff --git a/packages/mplayer/files/mru-neon-vector-fmul-window.diff b/packages/mplayer/files/mru-neon-vector-fmul-window.diff deleted file mode 100644 index 03ac55bc56..0000000000 --- a/packages/mplayer/files/mru-neon-vector-fmul-window.diff +++ /dev/null @@ -1,86 +0,0 @@ -From: Mans Rullgard <mans@mansr.com> -Date: Sun, 3 Aug 2008 16:46:43 +0000 (+0100) -Subject: ARM: NEON optimised vector_fmul_window -X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=98feb31064dccfd16ce189ff4aec9ccedddf6b04 - -ARM: NEON optimised vector_fmul_window ---- - -diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c -index f9d32c0..6c44940 100644 ---- a/libavcodec/armv4l/dsputil_neon.c -+++ b/libavcodec/armv4l/dsputil_neon.c -@@ -91,6 +91,10 @@ void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, - void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride); - void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride); - -+void ff_vector_fmul_window_neon(float *dst, const float *src0, -+ const float *src1, const float *win, -+ float add_bias, int len); -+ - void ff_float_to_int16_neon(int16_t *, const float *, long); - void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int); - -@@ -164,6 +168,8 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) - c->h264_idct_add = ff_h264_idct_add_neon; - c->h264_idct_dc_add = ff_h264_idct_dc_add_neon; - -+ c->vector_fmul_window = ff_vector_fmul_window_neon; -+ - c->float_to_int16 = ff_float_to_int16_neon; - c->float_to_int16_interleave = ff_float_to_int16_interleave_neon; - -diff --git a/libavcodec/armv4l/dsputil_neon_s.S b/libavcodec/armv4l/dsputil_neon_s.S -index 6a54803..49a09b8 100644 ---- a/libavcodec/armv4l/dsputil_neon_s.S -+++ b/libavcodec/armv4l/dsputil_neon_s.S -@@ -324,6 +324,49 @@ extern ff_float_to_int16_interleave_neon - pop {r4,r5,pc} - .endfunc - -+extern ff_vector_fmul_window_neon -+ vld1.32 {d16[],d17[]}, [sp,:32] -+ push {r4,r5,lr} -+ ldr lr, [sp, #16] -+ sub r2, r2, #8 -+ sub r5, lr, #2 -+ add r2, r2, r5, lsl #2 -+ add r4, r3, r5, lsl #3 -+ add ip, r0, r5, lsl #3 -+ mov r5, #-16 -+ dmb -+ vld1.64 {d0,d1}, [r1,:128]! -+ vld1.64 {d2,d3}, [r2,:128], r5 -+ vld1.64 {d4,d5}, [r3,:128]! -+ vld1.64 {d6,d7}, [r4,:128], r5 -+1: vmov q10, q8 -+ vmov q11, q8 -+ vmla.f32 q11, q0, q2 -+ vrev64.32 q3, q3 -+ vswp d6, d7 -+ vmla.f32 q10, q0, q3 -+ vrev64.32 q1, q1 -+ vswp d2, d3 -+ subs lr, lr, #4 -+ vmla.f32 q11, q1, q3 -+ vmls.f32 q10, q1, q2 -+ beq 2f -+ vld1.64 {d0,d1}, [r1,:128]! -+ vld1.64 {d2,d3}, [r2,:128], r5 -+ vld1.64 {d4,d5}, [r3,:128]! -+ vld1.64 {d6,d7}, [r4,:128], r5 -+ vrev64.32 q11, q11 -+ vswp d22, d23 -+ vst1.64 {d20,d21}, [r0,:128]! -+ vst1.64 {d22,d23}, [ip,:128], r5 -+ b 1b -+2: vrev64.32 q11, q11 -+ vswp d22, d23 -+ vst1.64 {d20,d21}, [r0,:128]! -+ vst1.64 {d22,d23}, [ip,:128], r5 -+ pop {r4,r5,pc} -+ .endfunc -+ - #ifdef CONFIG_VORBIS_DECODER - extern ff_vorbis_inverse_coupling_neon - vmov.i32 q10, #(1<<31) |