diff options
Diffstat (limited to 'packages/mplayer/files/mru-neon-float-to-int16.diff')
-rw-r--r-- | packages/mplayer/files/mru-neon-float-to-int16.diff | 107 |
1 files changed, 0 insertions, 107 deletions
diff --git a/packages/mplayer/files/mru-neon-float-to-int16.diff b/packages/mplayer/files/mru-neon-float-to-int16.diff deleted file mode 100644 index 7a874cab30..0000000000 --- a/packages/mplayer/files/mru-neon-float-to-int16.diff +++ /dev/null @@ -1,107 +0,0 @@ -From: Mans Rullgard <mans@mansr.com> -Date: Thu, 31 Jul 2008 02:35:42 +0000 (+0100) -Subject: ARM: NEON optimised float_to_int16 -X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=f16a738cfc3307cbcba2f9c8aff4b5aa43144731 - -ARM: NEON optimised float_to_int16 ---- - -diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c -index 6dbe835..b584e5b 100644 ---- a/libavcodec/armv4l/dsputil_neon.c -+++ b/libavcodec/armv4l/dsputil_neon.c -@@ -91,6 +91,9 @@ void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, - void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride); - void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride); - -+void ff_float_to_int16_neon(int16_t *, const float *, long); -+void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int); -+ - void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) - { - c->put_pixels_tab[0][0] = ff_put_pixels16_neon; -@@ -158,4 +161,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) - - c->h264_idct_add = ff_h264_idct_add_neon; - c->h264_idct_dc_add = ff_h264_idct_dc_add_neon; -+ -+ c->float_to_int16 = ff_float_to_int16_neon; -+ c->float_to_int16_interleave = ff_float_to_int16_interleave_neon; - } -diff --git a/libavcodec/armv4l/dsputil_neon_s.S b/libavcodec/armv4l/dsputil_neon_s.S -index fc5e401..44f75ba 100644 ---- a/libavcodec/armv4l/dsputil_neon_s.S -+++ b/libavcodec/armv4l/dsputil_neon_s.S -@@ -252,3 +252,72 @@ - defun2 put_pixels8_x2, _no_rnd, vhadd.u8 - defun2 put_pixels8_y2, _no_rnd, vhadd.u8 - defun2 put_pixels8_xy2, _no_rnd, vshrn.u16, 1 -+ -+extern ff_float_to_int16_neon -+ dmb -+1: vld1.64 {d0-d3}, [r1,:128]! -+ vcvt.s32.f32 q2, q0 -+ vcvt.s32.f32 q3, q1 -+ subs r2, r2, #8 -+ vqmovn.s32 d4, q2 -+ vqmovn.s32 d5, q3 -+ vst1.64 {d4-d5}, [r0,:128]! -+ bgt 1b -+ bx lr -+ .endfunc -+ -+extern ff_float_to_int16_interleave_neon -+ cmp r3, #2 -+ ldrlt r1, [r1] -+ blt ff_float_to_int16_neon -+ bne 2f -+ -+ ldr ip, [r1] -+ ldr r1, [r1, #4] -+ vld1.64 {d0-d3}, [ip,:128]! -+ vld1.64 {d4-d7}, [r1,:128]! -+ dmb -+1: vcvt.s32.f32 q8, q0 -+ vcvt.s32.f32 q9, q1 -+ vcvt.s32.f32 q10, q2 -+ vcvt.s32.f32 q11, q3 -+ subs r2, r2, #8 -+ vqmovn.s32 d16, q8 -+ vqmovn.s32 d17, q9 -+ vqmovn.s32 d18, q10 -+ vqmovn.s32 d19, q11 -+ beq 1f -+ vld1.64 {d0-d3}, [ip,:128]! -+ vld1.64 {d4-d7}, [r1,:128]! -+ vst2.16 {d16-d19}, [r0,:64]! -+ b 1b -+1: vst2.16 {d16-d19}, [r0,:64]! -+ bx lr -+ -+2: push {r4,r5,lr} -+ lsls r4, r3, #1 -+ dmb -+ b 4f -+3: vld1.64 {d0-d3}, [ip,:128]! -+ vcvt.s32.f32 q2, q0 -+ vcvt.s32.f32 q3, q1 -+ subs lr, lr, #8 -+ vqmovn.s32 d4, q2 -+ vqmovn.s32 d5, q3 -+ vst1.16 {d4[0]}, [r5,:16], r4 -+ vst1.16 {d4[1]}, [r5,:16], r4 -+ vst1.16 {d4[2]}, [r5,:16], r4 -+ vst1.16 {d4[3]}, [r5,:16], r4 -+ vst1.16 {d5[0]}, [r5,:16], r4 -+ vst1.16 {d5[1]}, [r5,:16], r4 -+ vst1.16 {d5[2]}, [r5,:16], r4 -+ vst1.16 {d5[3]}, [r5,:16], r4 -+ bgt 3b -+ subs r3, r3, #1 -+4: ldr ip, [r1], #4 -+ mov lr, r2 -+ mov r5, r0 -+ add r0, r0, #2 -+ bne 3b -+ pop {r4,r5,pc} -+ .endfunc |