summaryrefslogtreecommitdiff
path: root/packages/mplayer/files/mru-neon-put-pixels.diff
diff options
context:
space:
mode:
Diffstat (limited to 'packages/mplayer/files/mru-neon-put-pixels.diff')
-rw-r--r--packages/mplayer/files/mru-neon-put-pixels.diff376
1 files changed, 0 insertions, 376 deletions
diff --git a/packages/mplayer/files/mru-neon-put-pixels.diff b/packages/mplayer/files/mru-neon-put-pixels.diff
deleted file mode 100644
index 85650d913b..0000000000
--- a/packages/mplayer/files/mru-neon-put-pixels.diff
+++ /dev/null
@@ -1,376 +0,0 @@
-From: Mans Rullgard <mans@mansr.com>
-Date: Fri, 13 Jun 2008 01:21:58 +0000 (+0100)
-Subject: ARM: NEON optimised put_pixels functions
-X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=86410ed1948118a29c70946d5294df9feb04dfef
-
-ARM: NEON optimised put_pixels functions
----
-
-diff --git a/libavcodec/Makefile b/libavcodec/Makefile
-index d91185e..27746df 100644
---- a/libavcodec/Makefile
-+++ b/libavcodec/Makefile
-@@ -433,6 +433,10 @@ ASM_OBJS-$(HAVE_ARMV5TE) += armv4l/simple_idct_armv5te.o \
-
- ASM_OBJS-$(HAVE_ARMV6) += armv4l/simple_idct_armv6.o \
-
-+OBJS-$(HAVE_NEON) += armv4l/dsputil_neon.o \
-+
-+ASM_OBJS-$(HAVE_NEON) += armv4l/dsputil_neon_s.o \
-+
- OBJS-$(HAVE_VIS) += sparc/dsputil_vis.o \
- sparc/simple_idct_vis.o \
-
-diff --git a/libavcodec/armv4l/dsputil_arm.c b/libavcodec/armv4l/dsputil_arm.c
-index 100b89e..89b51e7 100644
---- a/libavcodec/armv4l/dsputil_arm.c
-+++ b/libavcodec/armv4l/dsputil_arm.c
-@@ -26,6 +26,7 @@
-
- extern void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx);
- extern void ff_float_init_arm_vfp(DSPContext* c, AVCodecContext *avctx);
-+extern void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx);
-
- extern void j_rev_dct_ARM(DCTELEM *data);
- extern void simple_idct_ARM(DCTELEM *data);
-@@ -302,4 +303,7 @@ void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx)
- #ifdef HAVE_ARMVFP
- ff_float_init_arm_vfp(c, avctx);
- #endif
-+#ifdef HAVE_NEON
-+ ff_dsputil_init_neon(c, avctx);
-+#endif
- }
-diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c
-new file mode 100644
-index 0000000..8a10dde
---- /dev/null
-+++ b/libavcodec/armv4l/dsputil_neon.c
-@@ -0,0 +1,67 @@
-+/*
-+ * ARM NEON optimised DSP functions
-+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
-+ *
-+ * This file is part of FFmpeg.
-+ *
-+ * FFmpeg is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU Lesser General Public
-+ * License as published by the Free Software Foundation; either
-+ * version 2.1 of the License, or (at your option) any later version.
-+ *
-+ * FFmpeg is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ * Lesser General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU Lesser General Public
-+ * License along with FFmpeg; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
-+
-+#include <stdint.h>
-+
-+#include "libavcodec/avcodec.h"
-+#include "libavcodec/dsputil.h"
-+
-+void ff_put_pixels16_neon(uint8_t *, const uint8_t *, int, int);
-+void ff_put_pixels16_x2_neon(uint8_t *, const uint8_t *, int, int);
-+void ff_put_pixels16_y2_neon(uint8_t *, const uint8_t *, int, int);
-+void ff_put_pixels16_xy2_neon(uint8_t *, const uint8_t *, int, int);
-+void ff_put_pixels8_neon(uint8_t *, const uint8_t *, int, int);
-+void ff_put_pixels8_x2_neon(uint8_t *, const uint8_t *, int, int);
-+void ff_put_pixels8_y2_neon(uint8_t *, const uint8_t *, int, int);
-+void ff_put_pixels8_xy2_neon(uint8_t *, const uint8_t *, int, int);
-+void ff_put_pixels16_x2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
-+void ff_put_pixels16_y2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
-+void ff_put_pixels16_xy2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
-+void ff_put_pixels8_x2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
-+void ff_put_pixels8_y2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
-+void ff_put_pixels8_xy2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
-+
-+void ff_put_h264_qpel16_mc00_neon(uint8_t *, uint8_t *, int);
-+void ff_put_h264_qpel8_mc00_neon(uint8_t *, uint8_t *, int);
-+
-+void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
-+{
-+ c->put_pixels_tab[0][0] = ff_put_pixels16_neon;
-+ c->put_pixels_tab[0][1] = ff_put_pixels16_x2_neon;
-+ c->put_pixels_tab[0][2] = ff_put_pixels16_y2_neon;
-+ c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_neon;
-+ c->put_pixels_tab[1][0] = ff_put_pixels8_neon;
-+ c->put_pixels_tab[1][1] = ff_put_pixels8_x2_neon;
-+ c->put_pixels_tab[1][2] = ff_put_pixels8_y2_neon;
-+ c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_neon;
-+
-+ c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_neon;
-+ c->put_no_rnd_pixels_tab[0][1] = ff_put_pixels16_x2_no_rnd_neon;
-+ c->put_no_rnd_pixels_tab[0][2] = ff_put_pixels16_y2_no_rnd_neon;
-+ c->put_no_rnd_pixels_tab[0][3] = ff_put_pixels16_xy2_no_rnd_neon;
-+ c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_neon;
-+ c->put_no_rnd_pixels_tab[1][1] = ff_put_pixels8_x2_no_rnd_neon;
-+ c->put_no_rnd_pixels_tab[1][2] = ff_put_pixels8_y2_no_rnd_neon;
-+ c->put_no_rnd_pixels_tab[1][3] = ff_put_pixels8_xy2_no_rnd_neon;
-+
-+ c->put_h264_qpel_pixels_tab[0][0] = ff_put_h264_qpel16_mc00_neon;
-+ c->put_h264_qpel_pixels_tab[1][0] = ff_put_h264_qpel8_mc00_neon;
-+}
-diff --git a/libavcodec/armv4l/dsputil_neon_s.S b/libavcodec/armv4l/dsputil_neon_s.S
-new file mode 100644
-index 0000000..fc5e401
---- /dev/null
-+++ b/libavcodec/armv4l/dsputil_neon_s.S
-@@ -0,0 +1,254 @@
-+/*
-+ * ARM NEON optimised DSP functions
-+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
-+ *
-+ * This file is part of FFmpeg.
-+ *
-+ * FFmpeg is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU Lesser General Public
-+ * License as published by the Free Software Foundation; either
-+ * version 2.1 of the License, or (at your option) any later version.
-+ *
-+ * FFmpeg is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ * Lesser General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU Lesser General Public
-+ * License along with FFmpeg; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
-+
-+ .fpu neon
-+ .text
-+
-+ .macro put_pixels16
-+ dmb
-+1: vld1.64 {d0, d1}, [r1], r2
-+ vld1.64 {d2, d3}, [r1], r2
-+ vld1.64 {d4, d5}, [r1], r2
-+ vld1.64 {d6, d7}, [r1], r2
-+ pld [r1]
-+ subs r3, r3, #4
-+ vst1.64 {d0, d1}, [r0,:128], r2
-+ vst1.64 {d2, d3}, [r0,:128], r2
-+ vst1.64 {d4, d5}, [r0,:128], r2
-+ vst1.64 {d6, d7}, [r0,:128], r2
-+ bne 1b
-+ bx lr
-+ .endm
-+
-+ .macro put_pixels16_x2 vhadd=vrhadd.u8
-+ dmb
-+1: vld1.64 {d0-d2}, [r1], r2
-+ vld1.64 {d4-d6}, [r1], r2
-+ pld [r1]
-+ subs r3, r3, #2
-+ vext.8 q1, q0, q1, #1
-+ vext.8 q3, q2, q3, #1
-+ \vhadd q0, q0, q1
-+ \vhadd q2, q2, q3
-+ vst1.64 {d0, d1}, [r0,:128], r2
-+ vst1.64 {d4, d5}, [r0,:128], r2
-+ bne 1b
-+ bx lr
-+ .endm
-+
-+ .macro put_pixels16_y2 vhadd=vrhadd.u8
-+ push {lr}
-+ add ip, r1, r2
-+ lsl lr, r2, #1
-+ vld1.64 {d0, d1}, [r1], lr
-+ vld1.64 {d2, d3}, [ip], lr
-+ dmb
-+1: subs r3, r3, #2
-+ \vhadd q2, q0, q1
-+ vld1.64 {d0, d1}, [r1], lr
-+ vst1.64 {d4, d5}, [r0,:128], r2
-+ \vhadd q2, q0, q1
-+ vld1.64 {d2, d3}, [ip], lr
-+ vst1.64 {d4, d5}, [r0,:128], r2
-+ bne 1b
-+ pop {pc}
-+ .endm
-+
-+ .macro put_pixels16_xy2 vshrn=vrshrn.u16 no_rnd=0
-+ push {lr}
-+ lsl lr, r2, #1
-+ add ip, r1, r2
-+ vld1.64 {d0-d2}, [r1], lr
-+ vld1.64 {d4-d6}, [ip], lr
-+ .if \no_rnd
-+ vmov.i16 q13, #1
-+ .endif
-+ pld [r1]
-+ pld [ip]
-+ vext.8 q1, q0, q1, #1
-+ vext.8 q3, q2, q3, #1
-+ vaddl.u8 q8, d0, d2
-+ vaddl.u8 q10, d1, d3
-+ vaddl.u8 q9, d4, d6
-+ vaddl.u8 q11, d5, d7
-+ dmb
-+1: subs r3, r3, #2
-+ vld1.64 {d0-d2}, [r1], lr
-+ vadd.u16 q12, q8, q9
-+ pld [r1]
-+ .if \no_rnd
-+ vadd.u16 q12, q12, q13
-+ .endif
-+ vext.8 q15, q0, q1, #1
-+ vadd.u16 q1 , q10, q11
-+ \vshrn d28, q12, #2
-+ .if \no_rnd
-+ vadd.u16 q1, q1, q13
-+ .endif
-+ \vshrn d29, q1, #2
-+ vaddl.u8 q8, d0, d30
-+ vld1.64 {d2-d4}, [ip], lr
-+ vaddl.u8 q10, d1, d31
-+ vst1.64 {d28,d29}, [r0,:128], r2
-+ vadd.u16 q12, q8, q9
-+ pld [ip]
-+ .if \no_rnd
-+ vadd.u16 q12, q12, q13
-+ .endif
-+ vext.8 q2, q1, q2, #1
-+ vadd.u16 q0, q10, q11
-+ \vshrn d30, q12, #2
-+ .if \no_rnd
-+ vadd.u16 q0, q0, q13
-+ .endif
-+ \vshrn d31, q0, #2
-+ vaddl.u8 q9, d2, d4
-+ vaddl.u8 q11, d3, d5
-+ vst1.64 {d30,d31}, [r0,:128], r2
-+ bgt 1b
-+ pop {pc}
-+ .endm
-+
-+ .macro put_pixels8
-+ dmb
-+1: vld1.64 {d0}, [r1], r2
-+ vld1.64 {d1}, [r1], r2
-+ vld1.64 {d2}, [r1], r2
-+ vld1.64 {d3}, [r1], r2
-+ subs r3, r3, #4
-+ vst1.64 {d0}, [r0,:64], r2
-+ vst1.64 {d1}, [r0,:64], r2
-+ vst1.64 {d2}, [r0,:64], r2
-+ vst1.64 {d3}, [r0,:64], r2
-+ bne 1b
-+ bx lr
-+ .endm
-+
-+ .macro put_pixels8_x2 vhadd=vrhadd.u8
-+ dmb
-+1: vld1.64 {d0, d1}, [r1], r2
-+ vld1.64 {d2, d3}, [r1], r2
-+ pld [r1]
-+ subs r3, r3, #2
-+ vext.8 d1, d0, d1, #1
-+ vext.8 d3, d2, d3, #1
-+ vswp d1, d2
-+ \vhadd q0, q0, q1
-+ vst1.64 {d0}, [r0,:64], r2
-+ vst1.64 {d1}, [r0,:64], r2
-+ bne 1b
-+ bx lr
-+ .endm
-+
-+ .macro put_pixels8_y2 vhadd=vrhadd.u8
-+ push {lr}
-+ add ip, r1, r2
-+ lsl lr, r2, #1
-+ vld1.64 {d0}, [r1], lr
-+ vld1.64 {d1}, [ip], lr
-+ dmb
-+1: subs r3, r3, #2
-+ \vhadd d4, d0, d1
-+ vld1.64 {d0}, [r1], lr
-+ vst1.64 {d4}, [r0,:64], r2
-+ \vhadd d4, d0, d1
-+ vld1.64 {d1}, [ip], lr
-+ vst1.64 {d4}, [r0,:64], r2
-+ bne 1b
-+ pop {pc}
-+ .endm
-+
-+ .macro put_pixels8_xy2 vshrn=vrshrn.u16 no_rnd=0
-+ push {lr}
-+ lsl lr, r2, #1
-+ add ip, r1, r2
-+ vld1.64 {d0, d1}, [r1], lr
-+ vld1.64 {d2, d3}, [ip], lr
-+ .if \no_rnd
-+ vmov.i16 q11, #1
-+ .endif
-+ pld [r1]
-+ pld [ip]
-+ vext.8 d4, d0, d1, #1
-+ vext.8 d6, d2, d3, #1
-+ vaddl.u8 q8, d0, d4
-+ vaddl.u8 q9, d2, d6
-+ dmb
-+1: subs r3, r3, #2
-+ vld1.64 {d0, d1}, [r1], lr
-+ pld [r1]
-+ vadd.u16 q10, q8, q9
-+ vext.8 d4, d0, d1, #1
-+ .if \no_rnd
-+ vadd.u16 q10, q10, q11
-+ .endif
-+ vaddl.u8 q8, d0, d4
-+ \vshrn d5, q10, #2
-+ vld1.64 {d2, d3}, [ip], lr
-+ vadd.u16 q10, q8, q9
-+ pld [ip]
-+ .if \no_rnd
-+ vadd.u16 q10, q10, q11
-+ .endif
-+ vst1.64 {d5}, [r0,:64], r2
-+ \vshrn d7, q10, #2
-+ vext.8 d6, d2, d3, #1
-+ vaddl.u8 q9, d2, d6
-+ vst1.64 {d7}, [r0,:64], r2
-+ bgt 1b
-+ pop {pc}
-+ .endm
-+
-+ .macro extern name
-+ .global \name
-+ .type \name, %function
-+ .func \name
-+\name:
-+ .endm
-+
-+ .macro defun name suf rnd_op args:vararg
-+ extern ff_\name\suf\()_neon
-+ \name \rnd_op \args
-+ .endfunc
-+ .endm
-+
-+ .macro defun2 name args:vararg
-+ defun \name
-+ defun \name \args
-+ .endm
-+
-+ extern ff_put_h264_qpel16_mc00_neon
-+ mov r3, #16
-+ .endfunc
-+
-+ defun put_pixels16
-+ defun2 put_pixels16_x2, _no_rnd, vhadd.u8
-+ defun2 put_pixels16_y2, _no_rnd, vhadd.u8
-+ defun2 put_pixels16_xy2, _no_rnd, vshrn.u16, 1
-+
-+ extern ff_put_h264_qpel8_mc00_neon
-+ mov r3, #8
-+ .endfunc
-+
-+ defun put_pixels8
-+ defun2 put_pixels8_x2, _no_rnd, vhadd.u8
-+ defun2 put_pixels8_y2, _no_rnd, vhadd.u8
-+ defun2 put_pixels8_xy2, _no_rnd, vshrn.u16, 1