diff options
Diffstat (limited to 'recipes/mplayer/files')
38 files changed, 16265 insertions, 0 deletions
diff --git a/recipes/mplayer/files/Makefile-codec-cfg.patch b/recipes/mplayer/files/Makefile-codec-cfg.patch new file mode 100644 index 0000000000..9ce22a8839 --- /dev/null +++ b/recipes/mplayer/files/Makefile-codec-cfg.patch @@ -0,0 +1,11 @@ +--- /tmp/Makefile 2008-09-24 19:24:26.000000000 +0200 ++++ trunk/Makefile 2008-09-24 19:25:01.683198000 +0200 +@@ -752,7 +752,7 @@ + $(CC) -o $@ $^ $(LDFLAGS_MPLAYER) + + codec-cfg$(EXESUF): codec-cfg.c codec-cfg.h help_mp.h +- $(HOST_CC) -O -DCODECS2HTML $(EXTRA_INC) -o $@ $< ++ $(BUILD_CC) -O -DCODECS2HTML $(EXTRA_INC) -o $@ $< + + codecs.conf.h: codec-cfg$(EXESUF) etc/codecs.conf + ./$^ > $@ diff --git a/recipes/mplayer/files/Makefile.patch b/recipes/mplayer/files/Makefile.patch new file mode 100644 index 0000000000..f0464b9176 --- /dev/null +++ b/recipes/mplayer/files/Makefile.patch @@ -0,0 +1,13 @@ +Index: MPlayer-1.0rc1/Makefile +=================================================================== +--- MPlayer-1.0rc1.orig/Makefile ++++ MPlayer-1.0rc1/Makefile +@@ -399,7 +399,7 @@ osdep/mplayer-rc.o: osdep/mplayer.rc + windres -o $@ osdep/mplayer.rc + + codec-cfg: codec-cfg.c codec-cfg.h help_mp.h +- $(HOST_CC) -I. -DCODECS2HTML codec-cfg.c -o $@ ++ $(BUILD_CC) -I. -DCODECS2HTML codec-cfg.c -o $@ + + codecs.conf.h: codec-cfg etc/codecs.conf + ./codec-cfg ./etc/codecs.conf > $@ diff --git a/recipes/mplayer/files/armv5te/configh b/recipes/mplayer/files/armv5te/configh new file mode 100644 index 0000000000..46c647e2d5 --- /dev/null +++ b/recipes/mplayer/files/armv5te/configh @@ -0,0 +1,6 @@ +#define HAVE_LLRINT 1 +#define HAVE_ROUNDF 1 +#define ARCH_ARMV4L 1 +#define ENABLE_ARMV4L 1 +#define HAVE_ARMV5TE 1 +#define ENABLE_ARMV5TE 1 diff --git a/recipes/mplayer/files/armv5te/configmak b/recipes/mplayer/files/armv5te/configmak new file mode 100644 index 0000000000..aa9978515d --- /dev/null +++ b/recipes/mplayer/files/armv5te/configmak @@ -0,0 +1,3 @@ +ARCH_ARMV4L=yes +HAVE_ARMV5TE=yes + diff --git a/recipes/mplayer/files/armv6/configh b/recipes/mplayer/files/armv6/configh new file mode 100644 index 0000000000..2301e723d6 --- /dev/null +++ b/recipes/mplayer/files/armv6/configh @@ -0,0 +1,8 @@ +#define HAVE_LLRINT 1 +#define HAVE_ROUNDF 1 +#define ARCH_ARMV4L 1 +#define ENABLE_ARMV4L 1 +#define HAVE_ARMV5TE 1 +#define ENABLE_ARMV5TE 1 +#define HAVE_ARMV6 1 +#define ENABLE_ARMV6 1 diff --git a/recipes/mplayer/files/armv6/configmak b/recipes/mplayer/files/armv6/configmak new file mode 100644 index 0000000000..4db5dc0dfd --- /dev/null +++ b/recipes/mplayer/files/armv6/configmak @@ -0,0 +1,3 @@ +ARCH_ARMV4L=yes +HAVE_ARMV5TE=yes +HAVE_ARMV6=yes diff --git a/recipes/mplayer/files/armv7a/configh b/recipes/mplayer/files/armv7a/configh new file mode 100644 index 0000000000..245e40f56a --- /dev/null +++ b/recipes/mplayer/files/armv7a/configh @@ -0,0 +1,14 @@ +#define HAVE_LLRINT 1 +#define HAVE_ROUNDF 1 +#define ARCH_ARMV4L 1 +#define ENABLE_ARMV4L 1 +#define HAVE_ARMV5TE 1 +#define ENABLE_ARMV5TE 1 +#define HAVE_ARMV6 1 +#define ENABLE_ARMV6 1 +#define HAVE_ARMV6T2 1 +#define ENABLE_ARMV6T2 1 +#define HAVE_ARMVFP 1 +#define ENABLE_ARMVFP 1 +#define HAVE_NEON 1 +#define ENABLE_NEON 1 diff --git a/recipes/mplayer/files/armv7a/configmak b/recipes/mplayer/files/armv7a/configmak new file mode 100644 index 0000000000..50d549f794 --- /dev/null +++ b/recipes/mplayer/files/armv7a/configmak @@ -0,0 +1,6 @@ +ARCH_ARMV4L=yes +HAVE_ARMV5TE=yes +HAVE_ARMV6=yes +HAVE_ARMV6T2=yes +HAVE_ARMVFP=yes +HAVE_NEON=yes diff --git a/recipes/mplayer/files/configh b/recipes/mplayer/files/configh new file mode 100644 index 0000000000..2fe7658383 --- /dev/null +++ b/recipes/mplayer/files/configh @@ -0,0 +1,2 @@ +#define HAVE_LLRINT 1 +#define HAVE_ROUNDF 1 diff --git a/recipes/mplayer/files/configmak b/recipes/mplayer/files/configmak new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/recipes/mplayer/files/configmak diff --git a/recipes/mplayer/files/disable-executable-stack-test.patch b/recipes/mplayer/files/disable-executable-stack-test.patch new file mode 100644 index 0000000000..dc8871b6ae --- /dev/null +++ b/recipes/mplayer/files/disable-executable-stack-test.patch @@ -0,0 +1,30 @@ +Removes the "noexecstack" check from configure so we don't end up with: + + mplayer: error while loading shared libraries: libmad.so.0: cannot + enable executable stack as shared object requires: Error 14 + +at runtime. + +# +# Patch managed by http://www.holgerschurig.de/patcher.html +# + +--- MPlayer-1.0pre8/configure~disable-executable-stack-test ++++ MPlayer-1.0pre8/configure +@@ -7193,15 +7193,7 @@ + fi + + echocheck "compiler support for noexecstack" +-cat > $TMPC <<EOF +-int main(void) { return 0; } +-EOF +-if cc_check -Wl,-z,noexecstack ; then +- _ld_extra="-Wl,-z,noexecstack $_ld_extra" +- echores "yes" +-else +- echores "no" +-fi ++echores "no" + + echocheck "ftello()" + # if we don't have ftello use the osdep/ compatibility module diff --git a/recipes/mplayer/files/imageon-video_out.patch b/recipes/mplayer/files/imageon-video_out.patch new file mode 100644 index 0000000000..fcb7953c1b --- /dev/null +++ b/recipes/mplayer/files/imageon-video_out.patch @@ -0,0 +1,20 @@ +--- mplayer/libvo/video_out.c.orig 2006-11-27 12:49:51.000000000 -0800 ++++ mplayer/libvo/video_out.c 2006-11-27 12:41:59.000000000 -0800 +@@ -87,6 +87,7 @@ extern vo_functions_t video_out_syncfb; + extern vo_functions_t video_out_fbdev; + extern vo_functions_t video_out_fbdev2; + extern vo_functions_t video_out_w100; ++extern vo_functions_t video_out_imageon; + extern vo_functions_t video_out_svga; + extern vo_functions_t video_out_png; + extern vo_functions_t video_out_ggi; +@@ -200,6 +201,9 @@ vo_functions_t* video_out_drivers[] = + #ifdef HAVE_W100 + &video_out_w100, + #endif ++#ifdef HAVE_IMAGEON ++ &video_out_imageon, ++#endif + #ifdef HAVE_SVGALIB + &video_out_svga, + #endif diff --git a/recipes/mplayer/files/libmpdemux-ogg-include-svn.patch b/recipes/mplayer/files/libmpdemux-ogg-include-svn.patch new file mode 100644 index 0000000000..52b7029bc5 --- /dev/null +++ b/recipes/mplayer/files/libmpdemux-ogg-include-svn.patch @@ -0,0 +1,11 @@ +--- trunk/libmpdemux/Makefile.orig 2006-07-24 10:11:06.000000000 +0100 ++++ trunk/libmpdemux/Makefile 2006-07-24 10:12:02.000000000 +0100 +@@ -208,7 +208,7 @@ + OBJS = $(SRCS:.c=.o) + OBJS += $(CPLUSPLUSSRCS:.cpp=.o) + INCLUDE = -I.. -I../loader $(LIBAV_INC) +-CFLAGS = $(OPTFLAGS) $(INCLUDE) $(XMMS_CFLAGS) $(CDPARANOIA_INC) $(DVB_INC) ++CFLAGS = $(INCLUDE) $(OPTFLAGS) $(XMMS_CFLAGS) $(CDPARANOIA_INC) $(DVB_INC) + CPLUSPLUSFLAGS = $(CFLAGS) $(CPLUSPLUSINCLUDE) -D__STDC_LIMIT_MACROS + CPLUSPLUS = $(CC) + diff --git a/recipes/mplayer/files/makefile-nostrip-rc2.patch b/recipes/mplayer/files/makefile-nostrip-rc2.patch new file mode 100644 index 0000000000..d7c147565f --- /dev/null +++ b/recipes/mplayer/files/makefile-nostrip-rc2.patch @@ -0,0 +1,24 @@ +upstream: not applicable, I think (unless somebody claims there is a more portable way to stripping) + +Index: MPlayer-1.0rc2/Makefile +=================================================================== +--- MPlayer-1.0rc2.orig/Makefile 2008-04-19 10:31:18.000000000 +0200 ++++ MPlayer-1.0rc2/Makefile 2008-04-19 10:31:55.000000000 +0200 +@@ -253,7 +253,7 @@ + if test -f $(CONFDIR)/codecs.conf ; then mv -f $(CONFDIR)/codecs.conf $(CONFDIR)/codecs.conf.old ; fi + + install-mplayer: mplayer$(EXESUF) +- $(INSTALL) -m 755 $(INSTALLSTRIP) mplayer$(EXESUF) $(BINDIR) ++ $(INSTALL) -m 755 mplayer$(EXESUF) $(BINDIR) + + install-mplayer-man: + for i in $(MAN_LANG); do \ +@@ -266,7 +266,7 @@ + done + + install-mencoder: mencoder$(EXESUF) +- $(INSTALL) -m 755 $(INSTALLSTRIP) mencoder$(EXESUF) $(BINDIR) ++ $(INSTALL) -m 755 mencoder$(EXESUF) $(BINDIR) + for i in $(MAN_LANG); do \ + if test "$$i" = en ; then \ + cd $(MANDIR)/man1 && ln -sf mplayer.1 mencoder.1 ; \ diff --git a/recipes/mplayer/files/makefile-nostrip-svn.patch b/recipes/mplayer/files/makefile-nostrip-svn.patch new file mode 100644 index 0000000000..c684001792 --- /dev/null +++ b/recipes/mplayer/files/makefile-nostrip-svn.patch @@ -0,0 +1,11 @@ +--- /tmp/Makefile 2008-06-10 21:05:55.613676241 +0200 ++++ trunk/Makefile 2008-06-10 21:06:30.701172833 +0200 +@@ -797,7 +797,7 @@ + $(INSTALL) -d $(BINDIR) $(CONFDIR) + + install-%: %$(EXESUF) install-dirs +- $(INSTALL) -m 755 $(INSTALLSTRIP) $< $(BINDIR) ++ $(INSTALL) -m 755 $< $(BINDIR) + + install-mplayer-man: $(foreach lang,$(MAN_LANG_ALL),install-mplayer-man-$(lang)) + install-mencoder-man: $(foreach lang,$(MAN_LANG_ALL),install-mencoder-man-$(lang)) diff --git a/recipes/mplayer/files/makefile-nostrip.patch b/recipes/mplayer/files/makefile-nostrip.patch new file mode 100644 index 0000000000..604433938b --- /dev/null +++ b/recipes/mplayer/files/makefile-nostrip.patch @@ -0,0 +1,24 @@ +upstream: not applicable, I think (unless somebody claims there is a more portable way to stripping) + +Index: MPlayer-1.0rc1/Makefile +=================================================================== +--- MPlayer-1.0rc1.orig/Makefile ++++ MPlayer-1.0rc1/Makefile +@@ -416,7 +416,7 @@ ifeq ($(VIDIX),yes) + $(MAKE) -C vidix install + endif + $(INSTALL) -d $(BINDIR) +- $(INSTALL) -m 755 $(INSTALLSTRIP) $(PRG) $(BINDIR)/$(PRG) ++ $(INSTALL) -m 755 $(PRG) $(BINDIR)/$(PRG) + ifeq ($(GUI),yes) + -ln -sf $(PRG) $(BINDIR)/gmplayer + endif +@@ -430,7 +430,7 @@ endif + fi ; \ + done + ifeq ($(MENCODER),yes) +- $(INSTALL) -m 755 $(INSTALLSTRIP) $(PRG_MENCODER) $(BINDIR)/$(PRG_MENCODER) ++ $(INSTALL) -m 755 $(PRG_MENCODER) $(BINDIR)/$(PRG_MENCODER) + for i in $(MAN_LANG); do \ + if test "$$i" = en ; then \ + cd $(MANDIR)/man1 && ln -sf mplayer.1 mencoder.1 ; \ diff --git a/recipes/mplayer/files/motion-comp-pld.patch b/recipes/mplayer/files/motion-comp-pld.patch new file mode 100644 index 0000000000..6d4160c7bb --- /dev/null +++ b/recipes/mplayer/files/motion-comp-pld.patch @@ -0,0 +1,163 @@ +Index: MPlayer-1.0rc2/libmpeg2/motion_comp_arm_s.S +=================================================================== +--- MPlayer-1.0rc2.orig/libmpeg2/motion_comp_arm_s.S 2007-11-02 14:16:50.000000000 +0000 ++++ MPlayer-1.0rc2/libmpeg2/motion_comp_arm_s.S 2007-11-02 14:23:53.000000000 +0000 +@@ -18,6 +18,14 @@ + @ along with this program; if not, write to the Free Software + @ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + ++#if defined(__ARM_ARCH_5__) || \ ++ defined(__ARM_ARCH_5T__) || \ ++ defined(__ARM_ARCH_5TE__) ++#define PLD(code...) code ++#else ++#define PLD(code...) ++#endif ++ + .text + + @ ---------------------------------------------------------------- +@@ -25,7 +33,7 @@ + .global MC_put_o_16_arm + MC_put_o_16_arm: + @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) +- pld [r1] ++ PLD ( pld [r1] ) + stmfd sp!, {r4-r11, lr} @ R14 is also called LR + and r4, r1, #3 + adr r5, MC_put_o_16_arm_align_jt +@@ -35,7 +43,7 @@ + MC_put_o_16_arm_align0: + ldmia r1, {r4-r7} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + stmia r0, {r4-r7} + subs r3, r3, #1 + add r0, r0, r2 +@@ -46,7 +54,7 @@ + ldmia r1, {r4-r8} + add r1, r1, r2 + mov r9, r4, lsr #(\shift) +- pld [r1] ++ PLD ( pld [r1] ) + mov r10, r5, lsr #(\shift) + orr r9, r9, r5, lsl #(32-\shift) + mov r11, r6, lsr #(\shift) +@@ -85,7 +93,7 @@ + .global MC_put_o_8_arm + MC_put_o_8_arm: + @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) +- pld [r1] ++ PLD ( pld [r1] ) + stmfd sp!, {r4-r10, lr} @ R14 is also called LR + and r4, r1, #3 + adr r5, MC_put_o_8_arm_align_jt +@@ -94,7 +102,7 @@ + MC_put_o_8_arm_align0: + ldmia r1, {r4-r5} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + stmia r0, {r4-r5} + add r0, r0, r2 + subs r3, r3, #1 +@@ -105,7 +113,7 @@ + ldmia r1, {r4-r6} + add r1, r1, r2 + mov r9, r4, lsr #(\shift) +- pld [r1] ++ PLD ( pld [r1] ) + mov r10, r5, lsr #(\shift) + orr r9, r9, r5, lsl #(32-\shift) + orr r10, r10, r6, lsl #(32-\shift) +@@ -154,7 +162,7 @@ + .global MC_put_x_16_arm + MC_put_x_16_arm: + @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) +- pld [r1] ++ PLD ( pld [r1] ) + stmfd sp!, {r4-r11,lr} @ R14 is also called LR + and r4, r1, #3 + adr r5, MC_put_x_16_arm_align_jt +@@ -179,7 +187,7 @@ + MC_put_x_16_arm_align0: + ldmia r1, {r4-r8} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + AVG_PW r7, r8 + AVG_PW r6, r7 + AVG_PW r5, r6 +@@ -193,7 +201,7 @@ + and r1, r1, #0xFFFFFFFC + 1: ldmia r1, {r4-r8} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_QW 8, r4, r5, r6, r7, r8 + AVG_PW r7, r8 + AVG_PW r6, r7 +@@ -208,7 +216,7 @@ + and r1, r1, #0xFFFFFFFC + 1: ldmia r1, {r4-r8} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_QW 16, r4, r5, r6, r7, r8 + AVG_PW r7, r8 + AVG_PW r6, r7 +@@ -223,7 +231,7 @@ + and r1, r1, #0xFFFFFFFC + 1: ldmia r1, {r4-r8} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_QW 24, r4, r5, r6, r7, r8 + AVG_PW r7, r8 + AVG_PW r6, r7 +@@ -246,7 +254,7 @@ + .global MC_put_x_8_arm + MC_put_x_8_arm: + @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) +- pld [r1] ++ PLD ( pld [r1] ) + stmfd sp!, {r4-r11,lr} @ R14 is also called LR + and r4, r1, #3 + adr r5, MC_put_x_8_arm_align_jt +@@ -267,7 +275,7 @@ + MC_put_x_8_arm_align0: + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + AVG_PW r5, r6 + AVG_PW r4, r5 + stmia r0, {r5-r6} +@@ -279,7 +287,7 @@ + and r1, r1, #0xFFFFFFFC + 1: ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DW 8, r4, r5, r6 + AVG_PW r5, r6 + AVG_PW r4, r5 +@@ -292,7 +300,7 @@ + and r1, r1, #0xFFFFFFFC + 1: ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DW 16, r4, r5, r6 + AVG_PW r5, r6 + AVG_PW r4, r5 +@@ -305,7 +313,7 @@ + and r1, r1, #0xFFFFFFFC + 1: ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DW 24, r4, r5, r6 + AVG_PW r5, r6 + AVG_PW r4, r5 diff --git a/recipes/mplayer/files/mplayer-1.0rc1-atmel.2.patch b/recipes/mplayer/files/mplayer-1.0rc1-atmel.2.patch new file mode 100644 index 0000000000..800f43e8eb --- /dev/null +++ b/recipes/mplayer/files/mplayer-1.0rc1-atmel.2.patch @@ -0,0 +1,6444 @@ + cfg-common.h | 4 + + cfg-mencoder.h | 4 + + cfg-mplayer.h | 4 + + configure | 13 +- + libaf/af_format.c | 7 + + libavcodec/Makefile | 7 + + libavcodec/avr32/dsputil_avr32.c | 2678 ++++++++++++++++++++++++++++++++++++++ + libavcodec/avr32/fdct.S | 541 ++++++++ + libavcodec/avr32/h264idct.S | 451 +++++++ + libavcodec/avr32/idct.S | 829 ++++++++++++ + libavcodec/avr32/mc.S | 434 ++++++ + libavcodec/avr32/pico.h | 260 ++++ + libavcodec/bitstream.h | 77 +- + libavcodec/dsputil.c | 3 + + libavcodec/h264.c | 15 + + libavutil/common.h | 16 + + libavutil/internal.h | 9 + + libfaad2/common.h | 2 +- + libmpcodecs/ad_libmad.c | 5 + + libswscale/pico-avr32.h | 137 ++ + libswscale/swscale_internal.h | 2 +- + libswscale/yuv2rgb.c | 14 + + libswscale/yuv2rgb_avr32.c | 416 ++++++ + libvo/vo_fbdev2.c | 101 ++- + version.sh | 2 +- + 25 files changed, 6011 insertions(+), 20 deletions(-) + create mode 100644 libavcodec/avr32/dsputil_avr32.c + create mode 100644 libavcodec/avr32/fdct.S + create mode 100644 libavcodec/avr32/h264idct.S + create mode 100644 libavcodec/avr32/idct.S + create mode 100644 libavcodec/avr32/mc.S + create mode 100644 libavcodec/avr32/pico.h + create mode 100644 libswscale/pico-avr32.h + create mode 100644 libswscale/yuv2rgb_avr32.c + +diff --git a/cfg-common.h b/cfg-common.h +index 780df38..7d878a8 100644 +--- a/cfg-common.h ++++ b/cfg-common.h +@@ -235,6 +235,10 @@ + {"tsprobe", &ts_probe, CONF_TYPE_POSITION, 0, 0, TS_MAX_PROBE_SIZE, NULL}, + {"tskeepbroken", &ts_keep_broken, CONF_TYPE_FLAG, 0, 0, 1, NULL}, + ++#ifdef ARCH_AVR32 ++ {"use-pico", &avr32_use_pico, CONF_TYPE_FLAG, 0, 0, 1, NULL}, ++ {"nouse-pico", &avr32_use_pico, CONF_TYPE_FLAG, 0, 1, 0, NULL}, ++#endif + // draw by slices or whole frame (useful with libmpeg2/libavcodec) + {"slices", &vd_use_slices, CONF_TYPE_FLAG, 0, 0, 1, NULL}, + {"noslices", &vd_use_slices, CONF_TYPE_FLAG, 0, 1, 0, NULL}, +diff --git a/cfg-mencoder.h b/cfg-mencoder.h +index 411b748..addf791 100644 +--- a/cfg-mencoder.h ++++ b/cfg-mencoder.h +@@ -5,6 +5,10 @@ + + #include "cfg-common.h" + ++#ifdef ARCH_AVR32 ++extern int avr32_use_pico; ++#endif ++ + #ifdef USE_FAKE_MONO + extern int fakemono; // defined in dec_audio.c + #endif +diff --git a/cfg-mplayer.h b/cfg-mplayer.h +index 62b6eac..31499c2 100644 +--- a/cfg-mplayer.h ++++ b/cfg-mplayer.h +@@ -4,6 +4,10 @@ + + #include "cfg-common.h" + ++#ifdef ARCH_AVR32 ++extern int avr32_use_pico; ++#endif ++ + extern int noconsolecontrols; + + #if defined(HAVE_FBDEV)||defined(HAVE_VESA) +diff --git a/configure b/configure +index 29002c8..56c6fe4 100755 +--- a/configure ++++ b/configure +@@ -1203,6 +1203,15 @@ EOF + _optimizing="$proc" + ;; + ++ avr32) ++ _def_arch='#define ARCH_AVR32' ++ _target_arch='TARGET_ARCH_AVR32 = yes' ++ iproc='avr32' ++ proc='' ++ _march='' ++ _mcpu='' ++ _optimizing='' ++ ;; + arm|armv4l|armv5tel) + _def_arch='#define ARCH_ARMV4L 1' + _target_arch='TARGET_ARCH_ARMV4L = yes' +@@ -1533,7 +1542,7 @@ echores $_named_asm_args + # Checking for CFLAGS + _stripbinaries=yes + if test "$_profile" != "" || test "$_debug" != "" ; then +- CFLAGS="-W -Wall -O2 $_march $_mcpu $_debug $_profile" ++ CFLAGS="-W -Wall -O4 $_march $_mcpu $_debug $_profile" + if test "$_cc_major" -ge "3" ; then + CFLAGS=`echo "$CFLAGS" | sed -e 's/\(-Wall\)/\1 -Wno-unused-parameter/'` + fi +@@ -3794,7 +3803,7 @@ fi + + + echocheck "X11 headers presence" +- for I in `echo $_inc_extra | sed s/-I//g` /usr/X11/include /usr/X11R6/include /usr/include/X11R6 /usr/include /usr/openwin/include ; do ++ for I in `echo $_inc_extra | sed s/-I//g`; do + if test -f "$I/X11/Xlib.h" ; then + _inc_x11="-I$I" + _x11_headers="yes" +diff --git a/libaf/af_format.c b/libaf/af_format.c +index e5b7cc9..5d7ea6d 100644 +--- a/libaf/af_format.c ++++ b/libaf/af_format.c +@@ -20,7 +20,14 @@ + // Integer to float conversion through lrintf() + #ifdef HAVE_LRINTF + #include <math.h> ++ ++#ifdef ARCH_AVR32 ++#define lrintf(x) rint(x) ++#define llrint(x) (long long)rint(x) ++#else + long int lrintf(float); ++#endif ++ + #else + #define lrintf(x) ((int)(x)) + #endif +diff --git a/libavcodec/Makefile b/libavcodec/Makefile +index 17b6c45..8e1dc96 100644 +--- a/libavcodec/Makefile ++++ b/libavcodec/Makefile +@@ -360,6 +360,12 @@ OBJS-$(TARGET_ARCH_SPARC) += sparc/dsputil_vis.o \ + + sparc/dsputil_vis.o: CFLAGS += -mcpu=ultrasparc -mtune=ultrasparc + ++# avr32 specific stuff ++ifeq ($(TARGET_ARCH_AVR32),yes) ++ASM_OBJS += avr32/idct.o avr32/fdct.o avr32/mc.o avr32/h264idct.o ++OBJS += avr32/dsputil_avr32.o ++endif ++ + # sun mediaLib specific stuff + OBJS-$(HAVE_MLIB) += mlib/dsputil_mlib.o \ + +@@ -419,6 +425,7 @@ tests: apiexample $(TESTS) + clean:: + rm -f \ + i386/*.o i386/*~ \ ++ avr32/*.o avr32/*~ \ + armv4l/*.o armv4l/*~ \ + mlib/*.o mlib/*~ \ + alpha/*.o alpha/*~ \ +diff --git a/libavcodec/avr32/dsputil_avr32.c b/libavcodec/avr32/dsputil_avr32.c +new file mode 100644 +index 0000000..200284d +--- /dev/null ++++ b/libavcodec/avr32/dsputil_avr32.c +@@ -0,0 +1,2678 @@ ++/* ++ * Copyright (c) 2007 Atmel Corporation. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. The name of ATMEL may not be used to endorse or promote products ++ * derived from this software without specific prior written ++ * permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY ATMEL ``AS IS'' AND ANY EXPRESS OR ++ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ++ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE EXPRESSLY AND SPECIFICALLY DISCLAIMED. IN NO EVENT SHALL ATMEL ++ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, ++ * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, ++ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR ++ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY ++ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE ++ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH ++ * DAMAGE. ++ */ ++ ++#include "../dsputil.h" ++#include "pico.h" ++ ++int avr32_use_pico = 1; ++ ++//#define CHECK_DSP_FUNCS_AGAINST_C ++ ++#ifdef CHECK_DSP_FUNCS_AGAINST_C ++#define DSP_FUNC_NAME(name) test_ ## name ++#else ++#define DSP_FUNC_NAME(name) name ++#endif ++ ++union doubleword { ++ int64_t doubleword; ++ struct { ++ int32_t top; ++ int32_t bottom; ++ } words; ++}; ++ ++#undef LD16 ++#undef LD32 ++#undef LD64 ++ ++#define LD16(a) (*((uint16_t*)(a))) ++#define LD32(a) (*((uint32_t*)(a))) ++#define LD64(a) (*((uint64_t*)(a))) ++#define LD64_UNALIGNED(a) \ ++ ({ union doubleword __tmp__; \ ++ __tmp__.words.top = LD32(a); \ ++ __tmp__.words.bottom = LD32(a + 4); \ ++ __tmp__.doubleword; }) ++ ++#undef ST32 ++#undef ST16 ++ ++#define ST16(a, b) *((uint16_t*)(a)) = (b) ++#define ST32(a, b) *((uint32_t*)(a)) = (b) ++ ++#undef rnd_avg32 ++#define rnd_avg32(a, b) \ ++ ({ uint32_t __tmp__;\ ++ asm("pavg.ub\t%0, %1, %2" : "=r"(__tmp__) : "r"(a), "r"(b));\ ++ __tmp__;}) ++ ++void idct_avr32(DCTELEM *data); ++void fdct_avr32(DCTELEM *data); ++ ++void idct_put_avr32(uint8_t *dest, int line_size, DCTELEM *data); ++void idct_add_avr32(uint8_t *dest, int line_size, DCTELEM *data); ++ ++void h264_idct_add_avr32(uint8_t *dest, DCTELEM *data, int stride); ++void h264_idct8_add_avr32(uint8_t *dest, DCTELEM *data, int stride); ++ ++#define extern_dspfunc(PFX, NUM) \ ++ void PFX ## _pixels ## NUM ## _avr32(uint8_t *dst, const uint8_t *pixels, int line_size, int h ); \ ++ void PFX ## _pixels ## NUM ## _h_avr32(uint8_t *dst, const uint8_t *pixels, int line_size, int h ); \ ++ void PFX ## _pixels ## NUM ## _v_avr32(uint8_t *dst, const uint8_t *pixels, int line_size, int h ); \ ++ void PFX ## _pixels ## NUM ## _hv_avr32(uint8_t *dst, const uint8_t *pixels, int line_size, int h ) ++ ++extern_dspfunc(put, 8); ++extern_dspfunc(put_no_rnd, 8); ++extern_dspfunc(avg, 8); ++extern_dspfunc(avg_no_rnd, 8); ++#undef extern_dspfunc ++ ++#ifdef CHECK_DSP_FUNCS_AGAINST_C ++#define extern_dspfunc(PFX, NUM) \ ++ void PFX ## _pixels ## NUM ## _c(uint8_t *dst, const uint8_t *pixels, int line_size, int h ); \ ++ void PFX ## _pixels ## NUM ## _x2_c(uint8_t *dst, const uint8_t *pixels, int line_size, int h ); \ ++ void PFX ## _pixels ## NUM ## _y2_c(uint8_t *dst, const uint8_t *pixels, int line_size, int h ); \ ++ void PFX ## _pixels ## NUM ## _xy2_c(uint8_t *dst, const uint8_t *pixels, int line_size, int h ) ++ ++extern_dspfunc(put, 4); ++extern_dspfunc(put_no_rnd, 4); ++extern_dspfunc(put, 8); ++extern_dspfunc(put_no_rnd, 8); ++extern_dspfunc(put, 16); ++extern_dspfunc(put_no_rnd, 16); ++extern_dspfunc(avg, 8); ++extern_dspfunc(avg_no_rnd, 8); ++extern_dspfunc(avg, 16); ++extern_dspfunc(avg_no_rnd, 16); ++ ++ ++#undef extern_dspfunc ++#define extern_dspfunc(PFX, NUM) \ ++void PFX ## NUM ## _mc00_c(uint8_t *dst, uint8_t *src, int stride); \ ++void PFX ## NUM ## _mc10_c(uint8_t *dst, uint8_t *src, int stride); \ ++void PFX ## NUM ## _mc20_c(uint8_t *dst, uint8_t *src, int stride); \ ++void PFX ## NUM ## _mc30_c(uint8_t *dst, uint8_t *src, int stride); \ ++void PFX ## NUM ## _mc01_c(uint8_t *dst, uint8_t *src, int stride); \ ++void PFX ## NUM ## _mc11_c(uint8_t *dst, uint8_t *src, int stride); \ ++void PFX ## NUM ## _mc21_c(uint8_t *dst, uint8_t *src, int stride); \ ++void PFX ## NUM ## _mc31_c(uint8_t *dst, uint8_t *src, int stride); \ ++void PFX ## NUM ## _mc02_c(uint8_t *dst, uint8_t *src, int stride); \ ++void PFX ## NUM ## _mc12_c(uint8_t *dst, uint8_t *src, int stride); \ ++void PFX ## NUM ## _mc22_c(uint8_t *dst, uint8_t *src, int stride); \ ++void PFX ## NUM ## _mc32_c(uint8_t *dst, uint8_t *src, int stride); \ ++void PFX ## NUM ## _mc03_c(uint8_t *dst, uint8_t *src, int stride); \ ++void PFX ## NUM ## _mc13_c(uint8_t *dst, uint8_t *src, int stride); \ ++void PFX ## NUM ## _mc23_c(uint8_t *dst, uint8_t *src, int stride); \ ++void PFX ## NUM ## _mc33_c(uint8_t *dst, uint8_t *src, int stride); \ ++ ++extern_dspfunc(put_h264_qpel, 16); ++extern_dspfunc(put_h264_qpel, 8); ++extern_dspfunc(put_h264_qpel, 4); ++extern_dspfunc(avg_h264_qpel, 16); ++extern_dspfunc(avg_h264_qpel, 8); ++extern_dspfunc(avg_h264_qpel, 4); ++ ++#undef extern_dspfunc ++ ++void put_h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y); ++void put_h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y); ++void put_h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y); ++ ++void avg_h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y); ++void avg_h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y); ++void avg_h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y); ++ ++ ++void dump_block8(uint8_t *block, int line_size, int h); ++void dump_block4(uint8_t *block, int line_size, int h); ++void dump_block(uint8_t *block, int line_size, int h, int w); ++ ++void check_block8(uint8_t *test, uint8_t *correct, int line_size_test, int line_size_correct, ++ int h, char *name, int max_dev); ++void check_block4(uint8_t *test, uint8_t *correct, int line_size_test, int line_size_correct, ++ int h, char *name, int max_dev); ++void check_block(uint8_t *test, uint8_t *correct, int line_size_test, int line_size_correct, ++ int h, int width, char *name, int max_dev); ++ ++#define PIXOP2( OPNAME, OP ) \ ++void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ ++ int i;\ ++ for(i=0; i<h; i++){\ ++ OP(*((uint32_t*)(block )), LD32(pixels ));\ ++ pixels+=line_size;\ ++ block +=line_size;\ ++ }\ ++}\ ++void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ ++ int src_stride1, int src_stride2, int h){\ ++ int i;\ ++ for(i=0; i<h; i++){\ ++ uint32_t a,b;\ ++ a= LD32(&src1[i*src_stride1 ]);\ ++ b= LD32(&src2[i*src_stride2 ]);\ ++ OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ ++ a= LD32(&src1[i*src_stride1+4]);\ ++ b= LD32(&src2[i*src_stride2+4]);\ ++ OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\ ++ }\ ++}\ ++\ ++void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ ++ int src_stride1, int src_stride2, int h){\ ++ int i;\ ++ for(i=0; i<h; i++){\ ++ uint32_t a,b;\ ++ a= LD32(&src1[i*src_stride1 ]);\ ++ b= LD32(&src2[i*src_stride2 ]);\ ++ OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ ++ }\ ++}\ ++\ ++void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ ++ int src_stride1, int src_stride2, int h){\ ++ OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ ++ OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ ++}\ ++ ++#else ++#define PIXOP2( OPNAME, OP ) \ ++static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ ++ int i;\ ++ for(i=0; i<h; i++){\ ++ OP(*((uint32_t*)(block )), LD32(pixels ));\ ++ pixels+=line_size;\ ++ block +=line_size;\ ++ }\ ++}\ ++static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ ++ int i;\ ++ for(i=0; i<h; i++){\ ++ OP(*((uint32_t*)(block )), LD32(pixels ));\ ++ OP(*((uint32_t*)(block+4)), LD32(pixels+4));\ ++ pixels+=line_size;\ ++ block +=line_size;\ ++ }\ ++}\ ++static void OPNAME ## _pixels16_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ ++ int i;\ ++ for(i=0; i<h; i++){\ ++ OP(*((uint32_t*)(block )), LD32(pixels ));\ ++ OP(*((uint32_t*)(block+4)), LD32(pixels+4));\ ++ OP(*((uint32_t*)(block+8)), LD32(pixels+8));\ ++ OP(*((uint32_t*)(block+12)), LD32(pixels+12));\ ++ pixels+=line_size;\ ++ block +=line_size;\ ++ }\ ++}\ ++static void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ ++ int src_stride1, int src_stride2, int h){\ ++ int i;\ ++ for(i=0; i<h; i++){\ ++ uint32_t a,b;\ ++ a= LD32(&src1[i*src_stride1 ]);\ ++ b= LD32(&src2[i*src_stride2 ]);\ ++ OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ ++ a= LD32(&src1[i*src_stride1+4]);\ ++ b= LD32(&src2[i*src_stride2+4]);\ ++ OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\ ++ }\ ++}\ ++\ ++static void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ ++ int src_stride1, int src_stride2, int h){\ ++ int i;\ ++ for(i=0; i<h; i++){\ ++ uint32_t a,b;\ ++ a= LD32(&src1[i*src_stride1 ]);\ ++ b= LD32(&src2[i*src_stride2 ]);\ ++ OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ ++ }\ ++}\ ++\ ++static void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ ++ int src_stride1, int src_stride2, int h){\ ++ OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ ++ OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ ++}\ ++ ++#endif ++ ++#define op_avg(a, b) a = rnd_avg32(a, b) ++#define op_put(a, b) a = b ++ ++PIXOP2(avg, op_avg) ++PIXOP2(put, op_put) ++#undef op_avg ++#undef op_put ++ ++ ++ ++static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) ++{ ++ int i; ++ for(i=0; i<h; i++) ++ { ++ ST32(dst , LD32(src )); ++ dst+=dstStride; ++ src+=srcStride; ++ } ++} ++ ++static void clear_blocks_avr32(DCTELEM *blocks) ++{ ++ int n = 12; ++ uint64_t tmp1, tmp2; ++ blocks += 6*64; ++ asm volatile ( "mov\t%1, 0\n" ++ "mov\t%m1, 0\n" ++ "mov\t%2, 0\n" ++ "mov\t%m2, 0\n" ++ "0:\n" ++ "stm\t--%3, %1, %m1, %2, %m2\n" ++ "stm\t--%3, %1, %m1, %2, %m2\n" ++ "stm\t--%3, %1, %m1, %2, %m2\n" ++ "stm\t--%3, %1, %m1, %2, %m2\n" ++ "sub\t%0, 1\n" ++ "brne\t0b\n" ++ : "+r"(n), "=&r"(tmp1), "=&r"(tmp2), ++ "+r"(blocks)); ++} ++ ++ ++static inline void copy_block8(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) ++{ ++ int i; ++ for(i=0; i<h; i++) ++ { ++ ST32(dst , LD32(src )); ++ ST32(dst+4 , LD32(src+4 )); ++ dst+=dstStride; ++ src+=srcStride; ++ } ++} ++ ++static inline void copy_block16(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) ++{ ++ int i; ++ for(i=0; i<h; i++) ++ { ++ ST32(dst , LD32(src )); ++ ST32(dst+4 , LD32(src+4 )); ++ ST32(dst+8 , LD32(src+8 )); ++ ST32(dst+12, LD32(src+12)); ++ dst+=dstStride; ++ src+=srcStride; ++ } ++} ++ ++ ++static void put_h264_chroma_mc2_pico(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){ ++ const int A=(8-x)*(8-y); ++ const int B=( x)*(8-y); ++ const int C=(8-x)*( y); ++ const int D=( x)*( y); ++ int i; ++ ++ PICO_PUT_W(PICO_COEFF0_A, (A << 16) | (B & 0xFFFF)); ++ PICO_PUT_W(PICO_COEFF0_B, 32); ++ PICO_PUT_W(PICO_COEFF1_A, (C << 16) | (D & 0xFFFF)); ++ PICO_PUT_W(PICO_COEFF1_B, 0); ++ PICO_PUT_W(PICO_COEFF2_A, 0); ++ PICO_PUT_W(PICO_COEFF2_B, 0); ++ PICO_PUT_W(PICO_CONFIG, ++ PICO_OUTPUT_MODE(PICO_PLANAR_MODE) ++ | PICO_INPUT_MODE(PICO_HOR_FILTER_MODE) ++ | PICO_COEFF_FRAC_BITS(6) ++ | PICO_OFFSET_FRAC_BITS(6)); ++ ++ for(i=0; i<h; i++) ++ { ++ ++ int src0 = LD32(src); ++ int src1 = LD32(src + stride); ++ ++ PICO_MVRC_W(PICO_INPIX0, src0); ++ PICO_MVRC_W(PICO_INPIX1, src1); ++ PICO_OP(PICO_SINGLE_VECTOR, 2, 0, 4, 0); ++ PICO_OP(PICO_SINGLE_VECTOR, 3, 1, 5, 0); ++ src += stride; ++ ST16(dst,(short)PICO_GET_W(PICO_OUTPIX0)); ++ dst += stride; ++ } ++} ++ ++ ++static void put_h264_chroma_mc4_pico(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){ ++ const int A=(8-x)*(8-y);\ ++ const int B=( x)*(8-y); ++ const int C=(8-x)*( y); ++ const int D=( x)*( y); ++ int i; ++ ++ PICO_PUT_W(PICO_COEFF0_A, (A << 16) | (B & 0xFFFF)); ++ PICO_PUT_W(PICO_COEFF0_B, 32); ++ PICO_PUT_W(PICO_COEFF1_A, (C << 16) | (D & 0xFFFF)); ++ PICO_PUT_W(PICO_COEFF1_B, 0); ++ PICO_PUT_W(PICO_COEFF2_A, 0); ++ PICO_PUT_W(PICO_COEFF2_B, 0); ++ PICO_PUT_W(PICO_CONFIG, ++ PICO_OUTPUT_MODE(PICO_PLANAR_MODE) ++ | PICO_INPUT_MODE(PICO_HOR_FILTER_MODE) ++ | PICO_COEFF_FRAC_BITS(6) ++ | PICO_OFFSET_FRAC_BITS(6)); ++ ++ for(i=0; i<h; i++) ++ { ++ /* ++ OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1])); ++ OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2])); ++ OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3])); ++ OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4])); ++ dst+= stride; ++ src+= stride; ++ */ ++ ++ int src0 = LD32(src); ++ int src1 = (((int)src[4] << 24) | (int)src[stride]); ++ int src2 = LD32(src + stride + 1); ++ ++ PICO_MVRC_W(PICO_INPIX0, src0); ++ PICO_MVRC_W(PICO_INPIX1, src1); ++ PICO_MVRC_W(PICO_INPIX2, src2); ++ PICO_OP(PICO_SINGLE_VECTOR, 0, 0, 7, 0); ++ PICO_OP(PICO_SINGLE_VECTOR, 1, 1, 8, 0); ++ PICO_OP(PICO_SINGLE_VECTOR, 2, 2, 9, 0); ++ PICO_OP(PICO_SINGLE_VECTOR, 3, 3, 10, 0); ++ src += stride; ++ ST32(dst, PICO_GET_W(PICO_OUTPIX0)); ++ ++ dst += stride; ++ } ++} ++ ++static void put_h264_chroma_mc8_pico(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){ ++ const int A=(8-x)*(8-y); ++ const int B=( x)*(8-y); ++ const int C=(8-x)*( y); ++ const int D=( x)*( y); ++ int i; ++ ++ PICO_PUT_W(PICO_COEFF0_A, (A << 16) | (B & 0xFFFF)); ++ PICO_PUT_W(PICO_COEFF0_B, 32); ++ PICO_PUT_W(PICO_COEFF1_A, (C << 16) | (D & 0xFFFF)); ++ PICO_PUT_W(PICO_COEFF1_B, 0); ++ PICO_PUT_W(PICO_COEFF2_A, 0); ++ PICO_PUT_W(PICO_COEFF2_B, 0); ++ PICO_PUT_W(PICO_CONFIG, ++ PICO_OUTPUT_MODE(PICO_PLANAR_MODE) ++ | PICO_INPUT_MODE(PICO_HOR_FILTER_MODE) ++ | PICO_COEFF_FRAC_BITS(6) ++ | PICO_OFFSET_FRAC_BITS(6)); ++ ++ for(i=0; i<h; i++) ++ { ++ /* ++ OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1])); ++ OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2])); ++ OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3])); ++ OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4])); ++ OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5])); ++ OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6])); ++ OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7])); ++ OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8])); ++ dst+= stride; ++ src+= stride; ++ */ ++ int src0 = LD32(src); ++ int src1 = (((int)src[4] << 24) | (int)src[stride]); ++ int src2 = LD32(src + stride + 1); ++ ++ PICO_MVRC_W(PICO_INPIX0, src0); ++ PICO_MVRC_W(PICO_INPIX1, src1); ++ PICO_MVRC_W(PICO_INPIX2, src2); ++ PICO_OP(PICO_SINGLE_VECTOR, 0, 0, 7, 0); ++ PICO_OP(PICO_SINGLE_VECTOR, 1, 1, 8, 0); ++ PICO_OP(PICO_SINGLE_VECTOR, 2, 2, 9, 0); ++ PICO_OP(PICO_SINGLE_VECTOR, 3, 3, 10, 0); ++ ST32(dst, PICO_GET_W(PICO_OUTPIX0)); ++ ++ src0 = LD32(src + 4); ++ src1 = (src[8] << 24) | src[stride + 4]; ++ src2 = LD32(src + stride + 5); ++ ++ PICO_MVRC_W(PICO_INPIX0, src0); ++ PICO_MVRC_W(PICO_INPIX1, src1); ++ PICO_MVRC_W(PICO_INPIX2, src2); ++ PICO_OP(PICO_SINGLE_VECTOR, 0, 0, 7, 0); ++ PICO_OP(PICO_SINGLE_VECTOR, 1, 1, 8, 0); ++ PICO_OP(PICO_SINGLE_VECTOR, 2, 2, 9, 0); ++ PICO_OP(PICO_SINGLE_VECTOR, 3, 3, 10, 0); ++ src += stride; ++ ST32(dst + 4, PICO_GET_W(PICO_OUTPIX0)); ++ ++ dst += stride; ++ } ++} ++ ++ ++static void avg_h264_chroma_mc2_pico(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){ ++ const int A=(8-x)*(8-y); ++ const int B=( x)*(8-y); ++ const int C=(8-x)*( y); ++ const int D=( x)*( y); ++ int i; ++ ++ PICO_PUT_W(PICO_COEFF0_A, (A << 16) | (B & 0xFFFF)); ++ PICO_PUT_W(PICO_COEFF0_B, 32); ++ PICO_PUT_W(PICO_COEFF1_A, (C << 16) | (D & 0xFFFF)); ++ PICO_PUT_W(PICO_COEFF1_B, 0); ++ PICO_PUT_W(PICO_COEFF2_A, 0); ++ PICO_PUT_W(PICO_COEFF2_B, 0); ++ PICO_PUT_W(PICO_CONFIG, ++ PICO_OUTPUT_MODE(PICO_PLANAR_MODE) ++ | PICO_INPUT_MODE(PICO_HOR_FILTER_MODE) ++ | PICO_COEFF_FRAC_BITS(6) ++ | PICO_OFFSET_FRAC_BITS(6)); ++ ++ for(i=0; i<h; i++) ++ { ++ int src0 = LD32(src); ++ int src1 = LD32(src + stride); ++ ++ PICO_MVRC_W(PICO_INPIX0, src0); ++ PICO_MVRC_W(PICO_INPIX1, src1); ++ PICO_OP(PICO_SINGLE_VECTOR, 2, 0, 4, 0); ++ PICO_OP(PICO_SINGLE_VECTOR, 3, 1, 5, 0); ++ src += stride; ++ ST16(dst, rnd_avg32(LD16(dst), PICO_GET_W(PICO_OUTPIX0))); ++ dst += stride; ++ } ++} ++ ++ ++static void avg_h264_chroma_mc4_pico(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){ ++ const int A=(8-x)*(8-y);\ ++ const int B=( x)*(8-y); ++ const int C=(8-x)*( y); ++ const int D=( x)*( y); ++ int i; ++ ++ PICO_PUT_W(PICO_COEFF0_A, (A << 16) | (B & 0xFFFF)); ++ PICO_PUT_W(PICO_COEFF0_B, 32); ++ PICO_PUT_W(PICO_COEFF1_A, (C << 16) | (D & 0xFFFF)); ++ PICO_PUT_W(PICO_COEFF1_B, 0); ++ PICO_PUT_W(PICO_COEFF2_A, 0); ++ PICO_PUT_W(PICO_COEFF2_B, 0); ++ PICO_PUT_W(PICO_CONFIG, ++ PICO_OUTPUT_MODE(PICO_PLANAR_MODE) ++ | PICO_INPUT_MODE(PICO_HOR_FILTER_MODE) ++ | PICO_COEFF_FRAC_BITS(6) ++ | PICO_OFFSET_FRAC_BITS(6)); ++ ++ for(i=0; i<h; i++) ++ { ++ /* ++ OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1])); ++ OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2])); ++ OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3])); ++ OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4])); ++ dst+= stride; ++ src+= stride; ++ */ ++ ++ int src0 = *((int *)src); ++ int src1 = (int)((src[4] << 24) | src[stride]); ++ int src2 = *((int *)(src + stride + 1)); ++ ++ PICO_MVRC_W(PICO_INPIX0, src0); ++ PICO_MVRC_W(PICO_INPIX1, src1); ++ PICO_MVRC_W(PICO_INPIX2, src2); ++ PICO_OP(PICO_SINGLE_VECTOR, 0, 0, 7, 0); ++ PICO_OP(PICO_SINGLE_VECTOR, 1, 1, 8, 0); ++ PICO_OP(PICO_SINGLE_VECTOR, 2, 2, 9, 0); ++ PICO_OP(PICO_SINGLE_VECTOR, 3, 3, 10, 0); ++ src += stride; ++ ST32(dst, rnd_avg32(LD32(dst), PICO_GET_W(PICO_OUTPIX0))); ++ dst += stride; ++ } ++} ++ ++static void avg_h264_chroma_mc8_pico(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){ ++ const int A=(8-x)*(8-y); ++ const int B=( x)*(8-y); ++ const int C=(8-x)*( y); ++ const int D=( x)*( y); ++ int i; ++ ++ PICO_PUT_W(PICO_COEFF0_A, (A << 16) | (B & 0xFFFF)); ++ PICO_PUT_W(PICO_COEFF0_B, 32); ++ PICO_PUT_W(PICO_COEFF1_A, (C << 16) | (D & 0xFFFF)); ++ PICO_PUT_W(PICO_COEFF1_B, 0); ++ PICO_PUT_W(PICO_COEFF2_A, 0); ++ PICO_PUT_W(PICO_COEFF2_B, 0); ++ PICO_PUT_W(PICO_CONFIG, ++ PICO_OUTPUT_MODE(PICO_PLANAR_MODE) ++ | PICO_INPUT_MODE(PICO_HOR_FILTER_MODE) ++ | PICO_COEFF_FRAC_BITS(6) ++ | PICO_OFFSET_FRAC_BITS(6)); ++ ++ for(i=0; i<h; i++) ++ { ++ /* ++ OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1])); ++ OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2])); ++ OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3])); ++ OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4])); ++ OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5])); ++ OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6])); ++ OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7])); ++ OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8])); ++ dst+= stride; ++ src+= stride; ++ */ ++ int src0 = *((int *)src); ++ int src1 = (volatile int)((src[4] << 24) | src[stride]); ++ int src2 = *((int *)(src + stride + 1)); ++ ++ PICO_MVRC_W(PICO_INPIX0, src0); ++ PICO_MVRC_W(PICO_INPIX1, src1); ++ PICO_MVRC_W(PICO_INPIX2, src2); ++ PICO_OP(PICO_SINGLE_VECTOR, 0, 0, 7, 0); ++ PICO_OP(PICO_SINGLE_VECTOR, 1, 1, 8, 0); ++ PICO_OP(PICO_SINGLE_VECTOR, 2, 2, 9, 0); ++ PICO_OP(PICO_SINGLE_VECTOR, 3, 3, 10, 0); ++ ST32(dst, rnd_avg32(LD32(dst), PICO_GET_W(PICO_OUTPIX0))); ++ ++ src0 = *((int *)(src + 4)); ++ src1 = (int)((src[8] << 24) | src[stride + 4]); ++ src2 = *((int *)(src + stride + 5)); ++ ++ PICO_MVRC_W(PICO_INPIX0, src0); ++ PICO_MVRC_W(PICO_INPIX1, src1); ++ PICO_MVRC_W(PICO_INPIX2, src2); ++ PICO_OP(PICO_SINGLE_VECTOR, 0, 0, 7, 0); ++ PICO_OP(PICO_SINGLE_VECTOR, 1, 1, 8, 0); ++ PICO_OP(PICO_SINGLE_VECTOR, 2, 2, 9, 0); ++ PICO_OP(PICO_SINGLE_VECTOR, 3, 3, 10, 0); ++ src += stride; ++ ST32(dst + 4, rnd_avg32(LD32(dst + 4), PICO_GET_W(PICO_OUTPIX0))); ++ dst += stride; ++ } ++} ++ ++static struct pico_config_t h264_qpel4_h_lowpass_config = { ++ .input_mode = PICO_HOR_FILTER_MODE, ++ .output_mode = PICO_PLANAR_MODE, ++ .coeff_frac_bits = 5, ++ .offset_frac_bits = 5, ++ .coeff0_0 = 1, ++ .coeff0_1 = -5, ++ .coeff0_2 = 20, ++ .coeff0_3 = 16, ++ .coeff1_0 = 20, ++ .coeff1_1 = -5, ++ .coeff1_2 = 1, ++ .coeff1_3 = 0, ++ .coeff2_0 = 0, ++ .coeff2_1 = 0, ++ .coeff2_2 = 0, ++ .coeff2_3 = 0 ++}; ++ ++ ++ ++static void put_h264_qpel4_h_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){ ++ const int h=4; ++ int i; ++ ++ set_pico_config(&h264_qpel4_h_lowpass_config); ++ ++ for(i=0; i<h; i++){ ++ ++ /* ++ OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ ++ OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ ++ OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\ ++ OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\ ++ dst+=dstStride;\ ++ src+=srcStride;\ */ ++ PICO_MVRC_W(PICO_INPIX0, LD32(src - 2)); ++ PICO_MVRC_D(PICO_INPIX2, LD64_UNALIGNED(src + 2)); ++ PICO_OP(PICO_SINGLE_VECTOR, 0, 0, 3, 6); ++ PICO_OP(PICO_SINGLE_VECTOR, 1, 1, 4, 7); ++ PICO_OP(PICO_SINGLE_VECTOR, 2, 2, 5, 8); ++ PICO_OP(PICO_SINGLE_VECTOR, 3, 3, 6, 9); ++ src += srcStride; ++ ST32(dst, PICO_GET_W(PICO_OUTPIX0)); ++ dst += dstStride; ++ } ++} ++ ++static void avg_h264_qpel4_h_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){ ++ const int h=4; ++ int i; ++ ++ set_pico_config(&h264_qpel4_h_lowpass_config); ++ ++ for(i=0; i<h; i++){ ++ ++ /* ++ OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ ++ OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ ++ OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\ ++ OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\ ++ dst+=dstStride;\ ++ src+=srcStride;\ */ ++ ++ PICO_MVRC_W(PICO_INPIX0, LD32(src - 2)); ++ PICO_MVRC_D(PICO_INPIX2, LD64_UNALIGNED(src + 2)); ++ PICO_OP(PICO_SINGLE_VECTOR, 0, 0, 3, 6); ++ PICO_OP(PICO_SINGLE_VECTOR, 1, 1, 4, 7); ++ PICO_OP(PICO_SINGLE_VECTOR, 2, 2, 5, 8); ++ PICO_OP(PICO_SINGLE_VECTOR, 3, 3, 6, 9); ++ src += srcStride; ++ ST32(dst, rnd_avg32(LD32(dst), PICO_GET_W(PICO_OUTPIX0))); ++ dst += dstStride; ++ } ++} ++ ++static struct pico_config_t h264_qpel4_v_lowpass_config1 = { ++ .input_mode = PICO_VERT_FILTER_MODE, ++ .output_mode = PICO_PACKED_MODE, ++ .coeff_frac_bits = 5, ++ .offset_frac_bits = 5, ++ .coeff0_0 = 1, ++ .coeff0_1 = -5, ++ .coeff0_2 = 20, ++ .coeff0_3 = 16, ++ .coeff1_0 = 1, ++ .coeff1_1 = -5, ++ .coeff1_2 = 20, ++ .coeff1_3 = 16, ++ .coeff2_0 = 1, ++ .coeff2_1 = -5, ++ .coeff2_2 = 20, ++ .coeff2_3 = 16 ++}; ++ ++ ++ ++static struct pico_config_t h264_qpel4_v_lowpass_config2 = { ++ .input_mode = PICO_VERT_FILTER_MODE, ++ .output_mode = PICO_PLANAR_MODE, ++ .coeff_frac_bits = 5, ++ .offset_frac_bits = 5, ++ .coeff0_0 = 1, ++ .coeff0_1 = -5, ++ .coeff0_2 = 20, ++ .coeff0_3 = 16, ++ .coeff1_0 = 20, ++ .coeff1_1 = -5, ++ .coeff1_2 = 1, ++ .coeff1_3 = 0, ++ .coeff2_0 = 0, ++ .coeff2_1 = 0, ++ .coeff2_2 = 0, ++ .coeff2_3 = 0 ++}; ++ ++static void put_h264_qpel4_v_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){ ++ ++ /* ++ const int w=4; ++ uint8_t *cm = cropTbl + MAX_NEG_CROP; ++ int i; ++ for(i=0; i<w; i++) ++ { ++ const int srcB= src[-2*srcStride];\ ++ const int srcA= src[-1*srcStride];\ ++ const int src0= src[0 *srcStride];\ ++ const int src1= src[1 *srcStride];\ ++ const int src2= src[2 *srcStride];\ ++ const int src3= src[3 *srcStride];\ ++ const int src4= src[4 *srcStride];\ ++ const int src5= src[5 *srcStride];\ ++ const int src6= src[6 *srcStride];\ ++ OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ ++ OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ ++ OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ ++ OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ ++ dst++;\ ++ src++;\ ++ */ ++ ++ set_pico_config(&h264_qpel4_v_lowpass_config1); ++ ++ { ++ int srcB= LD32(src - 2*srcStride); ++ int srcA= LD32(src - 1*srcStride); ++ int src0= LD32(src + 0 *srcStride); ++ int src1= LD32(src + 1 *srcStride); ++ int src2= LD32(src + 2 *srcStride); ++ int src3= LD32(src + 3 *srcStride); ++ int src4= LD32(src + 4 *srcStride); ++ int src5= LD32(src + 5 *srcStride); ++ int src6= LD32(src + 6 *srcStride); ++ ++ /* First compute the leftmost three colums */ ++ PICO_MVRC_W(PICO_INPIX0, srcB); ++ PICO_MVRC_W(PICO_INPIX1, srcA); ++ PICO_MVRC_W(PICO_INPIX2, src0); ++ PICO_OP(0, 0, 0, 3, 6); ++ PICO_MVRC_W(PICO_INPIX2, src1); ++ PICO_MVRC_W(PICO_INPIX1, src2); ++ PICO_MVRC_W(PICO_INPIX0, src3); ++ PICO_OP(PICO_USE_ACC, 0, 0, 3, 6); ++ ST32(dst, PICO_GET_W(PICO_OUTPIX0)); ++ dst += dstStride; ++ PICO_MVRC_W(PICO_INPIX0, srcA); ++ PICO_MVRC_W(PICO_INPIX1, src0); ++ PICO_MVRC_W(PICO_INPIX2, src1); ++ PICO_OP(0, 0, 0, 3, 6); ++ PICO_MVRC_W(PICO_INPIX2, src2); ++ PICO_MVRC_W(PICO_INPIX1, src3); ++ PICO_MVRC_W(PICO_INPIX0, src4); ++ PICO_OP(PICO_USE_ACC, 0, 0, 3, 6); ++ ST32(dst, PICO_GET_W(PICO_OUTPIX0)); ++ dst += dstStride; ++ PICO_MVRC_W(PICO_INPIX0, src0); ++ PICO_MVRC_W(PICO_INPIX1, src1); ++ PICO_MVRC_W(PICO_INPIX2, src2); ++ PICO_OP(0, 0, 0, 3, 6); ++ PICO_MVRC_W(PICO_INPIX2, src3); ++ PICO_MVRC_W(PICO_INPIX1, src4); ++ PICO_MVRC_W(PICO_INPIX0, src5); ++ PICO_OP(PICO_USE_ACC, 0, 0, 3, 6); ++ ST32(dst, PICO_GET_W(PICO_OUTPIX0)); ++ dst += dstStride; ++ PICO_MVRC_W(PICO_INPIX0, src1); ++ PICO_MVRC_W(PICO_INPIX1, src2); ++ PICO_MVRC_W(PICO_INPIX2, src3); ++ PICO_OP(0, 0, 0, 3, 6); ++ PICO_MVRC_W(PICO_INPIX2, src4); ++ PICO_MVRC_W(PICO_INPIX1, src5); ++ PICO_MVRC_W(PICO_INPIX0, src6); ++ PICO_OP(PICO_USE_ACC, 0, 0, 3, 6); ++ ST32(dst, PICO_GET_W(PICO_OUTPIX0)); ++ /* Now compute the last column */ ++ ++ union wordbytes { ++ int word; ++ struct { ++ unsigned int t:8; ++ unsigned int u:8; ++ unsigned int l:8; ++ unsigned int b:8; ++ } bytes; } tmp1, tmp2, tmp3; ++ ++ ++ tmp1.bytes.t = srcB; ++ tmp1.bytes.u = src1; ++ tmp1.bytes.l = src4; ++ ++ tmp2.bytes.t = srcA; ++ tmp2.bytes.u = src2; ++ tmp2.bytes.l = src5; ++ ++ tmp3.bytes.t = src0; ++ tmp3.bytes.u = src3; ++ tmp3.bytes.l = src6; ++ ++ PICO_MVRC_W(PICO_INPIX0, tmp1.word); ++ PICO_MVRC_W(PICO_INPIX1, tmp2.word); ++ PICO_MVRC_W(PICO_INPIX2, tmp3.word); ++ set_pico_config(&h264_qpel4_v_lowpass_config2); ++ ++ ++ PICO_OP(PICO_SINGLE_VECTOR, 0, 0, 3, 6); ++ PICO_OP(PICO_SINGLE_VECTOR, 1, 1, 4, 7); ++ PICO_OP(PICO_SINGLE_VECTOR, 2, 2, 5, 8); ++ PICO_OP(PICO_SINGLE_VECTOR, 3, 3, 6, 9); ++ ++ PICO_MVCR_W(tmp1.word, PICO_OUTPIX0); ++ dst[3] = (char)(tmp1.bytes.b); ++ dst[3 - dstStride] = (char)(tmp1.bytes.l); ++ dst[3 - 2*dstStride] = (char)(tmp1.bytes.u); ++ dst[3 - 3*dstStride] = (char)(tmp1.bytes.t); ++ ++ } ++ /*} ++ ++ ++ }*/ ++} ++ ++static void avg_h264_qpel4_v_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){ ++ ++ /* ++ const int w=4; ++ uint8_t *cm = cropTbl + MAX_NEG_CROP; ++ int i; ++ for(i=0; i<w; i++) ++ { ++ const int srcB= src[-2*srcStride];\ ++ const int srcA= src[-1*srcStride];\ ++ const int src0= src[0 *srcStride];\ ++ const int src1= src[1 *srcStride];\ ++ const int src2= src[2 *srcStride];\ ++ const int src3= src[3 *srcStride];\ ++ const int src4= src[4 *srcStride];\ ++ const int src5= src[5 *srcStride];\ ++ const int src6= src[6 *srcStride];\ ++ OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ ++ OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ ++ OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ ++ OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ ++ dst++;\ ++ src++;\ ++ */ ++ uint8_t tmp_block[4*4]; ++ ++ set_pico_config(&h264_qpel4_v_lowpass_config1); ++ ++ { ++ int srcB= LD32(src - 2*srcStride); ++ int srcA= LD32(src - 1*srcStride); ++ int src0= LD32(src + 0 *srcStride); ++ int src1= LD32(src + 1 *srcStride); ++ int src2= LD32(src + 2 *srcStride); ++ int src3= LD32(src + 3 *srcStride); ++ int src4= LD32(src + 4 *srcStride); ++ int src5= LD32(src + 5 *srcStride); ++ int src6= LD32(src + 6 *srcStride); ++ ++ /* First compute the leftmost three colums */ ++ PICO_MVRC_W(PICO_INPIX0, srcB); ++ PICO_MVRC_W(PICO_INPIX1, srcA); ++ PICO_MVRC_W(PICO_INPIX2, src0); ++ PICO_OP(0, 0, 0, 3, 6); ++ PICO_MVRC_W(PICO_INPIX2, src1); ++ PICO_MVRC_W(PICO_INPIX1, src2); ++ PICO_MVRC_W(PICO_INPIX0, src3); ++ PICO_OP(PICO_USE_ACC, 0, 0, 3, 6); ++ ST32(tmp_block, PICO_GET_W(PICO_OUTPIX0)); ++ PICO_MVRC_W(PICO_INPIX0, srcA); ++ PICO_MVRC_W(PICO_INPIX1, src0); ++ PICO_MVRC_W(PICO_INPIX2, src1); ++ PICO_OP(0, 0, 0, 3, 6); ++ PICO_MVRC_W(PICO_INPIX2, src2); ++ PICO_MVRC_W(PICO_INPIX1, src3); ++ PICO_MVRC_W(PICO_INPIX0, src4); ++ PICO_OP(PICO_USE_ACC, 0, 0, 3, 6); ++ ST32(tmp_block + 4, PICO_GET_W(PICO_OUTPIX0)); ++ PICO_MVRC_W(PICO_INPIX0, src0); ++ PICO_MVRC_W(PICO_INPIX1, src1); ++ PICO_MVRC_W(PICO_INPIX2, src2); ++ PICO_OP(0, 0, 0, 3, 6); ++ PICO_MVRC_W(PICO_INPIX2, src3); ++ PICO_MVRC_W(PICO_INPIX1, src4); ++ PICO_MVRC_W(PICO_INPIX0, src5); ++ PICO_OP(PICO_USE_ACC, 0, 0, 3, 6); ++ ST32(tmp_block + 8, PICO_GET_W(PICO_OUTPIX0)); ++ PICO_MVRC_W(PICO_INPIX0, src1); ++ PICO_MVRC_W(PICO_INPIX1, src2); ++ PICO_MVRC_W(PICO_INPIX2, src3); ++ PICO_OP(0, 0, 0, 3, 6); ++ PICO_MVRC_W(PICO_INPIX2, src4); ++ PICO_MVRC_W(PICO_INPIX1, src5); ++ PICO_MVRC_W(PICO_INPIX0, src6); ++ PICO_OP(PICO_USE_ACC, 0, 0, 3, 6); ++ ST32(tmp_block + 12, PICO_GET_W(PICO_OUTPIX0)); ++ /* Now compute the last column */ ++ ++ union wordbytes { ++ int word; ++ struct { ++ unsigned int t:8; ++ unsigned int u:8; ++ unsigned int l:8; ++ unsigned int b:8; ++ } bytes; } tmp1, tmp2, tmp3; ++ ++ ++ tmp1.bytes.t = srcB; ++ tmp1.bytes.u = src1; ++ tmp1.bytes.l = src4; ++ ++ tmp2.bytes.t = srcA; ++ tmp2.bytes.u = src2; ++ tmp2.bytes.l = src5; ++ ++ tmp3.bytes.t = src0; ++ tmp3.bytes.u = src3; ++ tmp3.bytes.l = src6; ++ ++ PICO_MVRC_W(PICO_INPIX0, tmp1.word); ++ PICO_MVRC_W(PICO_INPIX1, tmp2.word); ++ PICO_MVRC_W(PICO_INPIX2, tmp3.word); ++ set_pico_config(&h264_qpel4_v_lowpass_config2); ++ ++ ++ PICO_OP(PICO_SINGLE_VECTOR, 0, 0, 3, 6); ++ PICO_OP(PICO_SINGLE_VECTOR, 1, 1, 4, 7); ++ PICO_OP(PICO_SINGLE_VECTOR, 2, 2, 5, 8); ++ PICO_OP(PICO_SINGLE_VECTOR, 3, 3, 6, 9); ++ ++ PICO_MVCR_W(tmp1.word, PICO_OUTPIX0); ++ tmp_block[3 + 3*4] = (char)(tmp1.bytes.b); ++ tmp_block[3 + 2*4] = (char)(tmp1.bytes.l); ++ tmp_block[3 + 1*4] = (char)(tmp1.bytes.u); ++ tmp_block[3] = (char)(tmp1.bytes.t); ++ ++ /* Compute the average */ ++ srcB= LD32(dst); ++ srcA= LD32(dst + dstStride); ++ src0= LD32(dst + dstStride*2); ++ src1= LD32(dst + dstStride*3); ++ ++ src2= LD32(tmp_block); ++ src3= LD32(tmp_block + 4); ++ src4= LD32(tmp_block + 8); ++ src5= LD32(tmp_block + 12); ++ ++ ST32(dst, rnd_avg32(srcB, src2)); ++ ST32(dst + dstStride, rnd_avg32(srcA, src3)); ++ ST32(dst + 2*dstStride, rnd_avg32(src0, src4)); ++ ST32(dst + 3*dstStride, rnd_avg32(src1, src5)); ++ } ++} ++ ++static struct pico_config_t h264_qpel4_hv_lowpass_config = { ++ .input_mode = PICO_HOR_FILTER_MODE, ++ .output_mode = PICO_PACKED_MODE, ++ .coeff_frac_bits = 10, ++ .offset_frac_bits = 10, ++ .coeff0_0 = 1, ++ .coeff0_1 = -5, ++ .coeff0_2 = 20, ++ .coeff0_3 = 512, ++ .coeff1_0 = -5, ++ .coeff1_1 = 25, ++ .coeff1_2 = -100, ++ .coeff1_3 = 0, ++ .coeff2_0 = 20, ++ .coeff2_1 = -100, ++ .coeff2_2 = 400, ++ .coeff2_3 = 0 ++}; ++ ++static void put_h264_qpel4_hv_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){ ++ ++ int32_t tmp_block[48]; ++ int32_t *tmp = tmp_block; ++ int i; ++ ++ set_pico_config(&h264_qpel4_hv_lowpass_config); ++ ++ src -= 2; ++ for ( i = 0; i < 2; i++ ){ ++ int srcB= LD32(src - 2*srcStride); ++ int srcA= LD32(src - 1*srcStride); ++ int src0= LD32(src + 0 *srcStride); ++ int src1= LD32(src + 1 *srcStride); ++ int src2= LD32(src + 2 *srcStride); ++ int src3= LD32(src + 3 *srcStride); ++ int src4= LD32(src + 4 *srcStride); ++ int src5= LD32(src + 5 *srcStride); ++ int src6= LD32(src + 6 *srcStride); ++ ++ PICO_MVRC_W(PICO_INPIX0, srcB); ++ PICO_MVRC_W(PICO_INPIX1, srcA); ++ PICO_MVRC_W(PICO_INPIX2, src0); ++ PICO_OP(0, 0, 0, 4, 8); ++ PICO_MVRC_W(PICO_INPIX2, src1); ++ PICO_MVRC_W(PICO_INPIX1, src2); ++ PICO_MVRC_W(PICO_INPIX0, src3); ++ PICO_OP(PICO_USE_ACC, 0, 0, 4, 8); ++ PICO_STCM_W(tmp, ++ PICO_REGVECT_VMU0_OUT, ++ PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT); ++ tmp += 3; ++ ++ PICO_OP(0, 0, 1, 5, 9); ++ PICO_MVRC_W(PICO_INPIX0, srcB); ++ PICO_MVRC_W(PICO_INPIX1, srcA); ++ PICO_MVRC_W(PICO_INPIX2, src0); ++ PICO_OP(PICO_USE_ACC, 0, 1, 5, 9); ++ PICO_STCM_W(tmp, ++ PICO_REGVECT_VMU0_OUT, ++ PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT); ++ tmp += 3; ++ ++ PICO_MVRC_W(PICO_INPIX0, src1); ++ PICO_OP(0, 0, 4, 8, 0); ++ PICO_MVRC_W(PICO_INPIX2, src2); ++ PICO_MVRC_W(PICO_INPIX1, src3); ++ PICO_MVRC_W(PICO_INPIX0, src4); ++ PICO_OP(PICO_USE_ACC, 0, 0, 4, 8); ++ PICO_STCM_W(tmp, ++ PICO_REGVECT_VMU0_OUT, ++ PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT); ++ tmp += 3; ++ ++ PICO_OP(0, 0, 1, 5, 9); ++ PICO_MVRC_W(PICO_INPIX0, srcA); ++ PICO_MVRC_W(PICO_INPIX1, src0); ++ PICO_MVRC_W(PICO_INPIX2, src1); ++ PICO_OP(PICO_USE_ACC, 0, 1, 5, 9); ++ PICO_STCM_W(tmp, ++ PICO_REGVECT_VMU0_OUT, ++ PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT); ++ tmp += 3; ++ ++ PICO_MVRC_W(PICO_INPIX0, src2); ++ PICO_OP(0, 0, 4, 8, 0); ++ PICO_MVRC_W(PICO_INPIX2, src3); ++ PICO_MVRC_W(PICO_INPIX1, src4); ++ PICO_MVRC_W(PICO_INPIX0, src5); ++ PICO_OP(PICO_USE_ACC, 0, 0, 4, 8); ++ PICO_STCM_W(tmp, ++ PICO_REGVECT_VMU0_OUT, ++ PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT); ++ tmp += 3; ++ ++ PICO_OP(0, 0, 1, 5, 9); ++ PICO_MVRC_W(PICO_INPIX0, src0); ++ PICO_MVRC_W(PICO_INPIX1, src1); ++ PICO_MVRC_W(PICO_INPIX2, src2); ++ PICO_OP(PICO_USE_ACC, 0, 1, 5, 9); ++ PICO_STCM_W(tmp, ++ PICO_REGVECT_VMU0_OUT, ++ PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT); ++ tmp += 3; ++ ++ PICO_MVRC_W(PICO_INPIX0, src3); ++ PICO_OP(0, 0, 4, 8, 0); ++ PICO_MVRC_W(PICO_INPIX2, src4); ++ PICO_MVRC_W(PICO_INPIX1, src5); ++ PICO_MVRC_W(PICO_INPIX0, src6); ++ PICO_OP(PICO_USE_ACC, 0, 0, 4, 8); ++ PICO_STCM_W(tmp, ++ PICO_REGVECT_VMU0_OUT, ++ PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT); ++ tmp += 3; ++ ++ PICO_OP(0, 0, 1, 5, 9); ++ PICO_MVRC_W(PICO_INPIX0, src1); ++ PICO_MVRC_W(PICO_INPIX1, src2); ++ PICO_MVRC_W(PICO_INPIX2, src3); ++ PICO_OP(PICO_USE_ACC, 0, 1, 5, 9); ++ PICO_STCM_W(tmp, ++ PICO_REGVECT_VMU0_OUT, ++ PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT); ++ tmp += 3; ++ src += 2; ++ } ++ ++ src -= 1; ++ tmp -= 48; ++ ++ ++ PICO_PUT_W(PICO_CONFIG, ++ PICO_OUTPUT_MODE(PICO_PLANAR_MODE) ++ | PICO_INPUT_MODE(PICO_VERT_FILTER_MODE) ++ | PICO_COEFF_FRAC_BITS(10) ++ | PICO_OFFSET_FRAC_BITS(10)); ++ ++ for ( i = 0; i < 2; i++ ){ ++ int srcB= LD32(src - 2*srcStride); ++ int srcA= LD32(src - 1*srcStride); ++ int src0= LD32(src + 0 *srcStride); ++ int src1= LD32(src + 1 *srcStride); ++ int src2= LD32(src + 2 *srcStride); ++ int src3= LD32(src + 3 *srcStride); ++ int src4= LD32(src + 4 *srcStride); ++ int src5= LD32(src + 5 *srcStride); ++ int src6= LD32(src + 6 *srcStride); ++ ++ ++ PICO_LDCM_W_INC(tmp, ++ PICO_REGVECT_VMU0_OUT, ++ PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT); ++ PICO_MVRC_W(PICO_INPIX0, srcB); ++ PICO_MVRC_W(PICO_INPIX1, srcA); ++ PICO_MVRC_W(PICO_INPIX2, src0); ++ PICO_OP(PICO_USE_ACC, 0, 6, 3, 0); ++ PICO_MVRC_W(PICO_INPIX2, src1); ++ PICO_MVRC_W(PICO_INPIX1, src2); ++ PICO_MVRC_W(PICO_INPIX0, src3); ++ PICO_OP(PICO_USE_ACC | PICO_SINGLE_VECTOR, 0, 6, 3, 0); ++ ++ PICO_LDCM_W_INC(tmp, ++ PICO_REGVECT_VMU0_OUT, ++ PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT); ++ PICO_OP(PICO_USE_ACC, 1, 9, 6, 3); ++ PICO_MVRC_W(PICO_INPIX0, srcB); ++ PICO_MVRC_W(PICO_INPIX1, srcA); ++ PICO_MVRC_W(PICO_INPIX2, src0); ++ PICO_OP(PICO_USE_ACC | PICO_SINGLE_VECTOR, 1, 9, 6, 3); ++ ++ PICO_LDCM_W_INC(tmp, ++ PICO_REGVECT_VMU0_OUT, ++ PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT); ++ PICO_MVRC_W(PICO_INPIX0, srcA); ++ PICO_MVRC_W(PICO_INPIX1, src0); ++ PICO_MVRC_W(PICO_INPIX2, src1); ++ PICO_OP(PICO_USE_ACC, 2, 6, 3, 0); ++ PICO_MVRC_W(PICO_INPIX2, src2); ++ PICO_MVRC_W(PICO_INPIX1, src3); ++ PICO_MVRC_W(PICO_INPIX0, src4); ++ PICO_OP(PICO_USE_ACC | PICO_SINGLE_VECTOR, 2, 6, 3, 0); ++ ++ PICO_LDCM_W_INC(tmp, ++ PICO_REGVECT_VMU0_OUT, ++ PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT); ++ PICO_OP(PICO_USE_ACC, 3, 9, 6, 3); ++ PICO_MVRC_W(PICO_INPIX0, srcA); ++ PICO_MVRC_W(PICO_INPIX1, src0); ++ PICO_MVRC_W(PICO_INPIX2, src1); ++ PICO_OP(PICO_USE_ACC | PICO_SINGLE_VECTOR, 3, 9, 6, 3); ++ ++ ST16(dst + 0*dstStride, (short)(PICO_GET_W(PICO_OUTPIX0) >> 16)); ++ ST16(dst + 1*dstStride, (short)PICO_GET_W(PICO_OUTPIX0)); ++ ++ ++ PICO_LDCM_W_INC(tmp, ++ PICO_REGVECT_VMU0_OUT, ++ PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT); ++ PICO_MVRC_W(PICO_INPIX0, src0); ++ PICO_MVRC_W(PICO_INPIX1, src1); ++ PICO_MVRC_W(PICO_INPIX2, src2); ++ PICO_OP(PICO_USE_ACC, 0, 6, 3, 0); ++ PICO_MVRC_W(PICO_INPIX2, src3); ++ PICO_MVRC_W(PICO_INPIX1, src4); ++ PICO_MVRC_W(PICO_INPIX0, src5); ++ PICO_OP(PICO_USE_ACC| PICO_SINGLE_VECTOR, 0, 6, 3, 0); ++ ++ PICO_LDCM_W_INC(tmp, ++ PICO_REGVECT_VMU0_OUT, ++ PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT); ++ PICO_OP(PICO_USE_ACC, 1, 9, 6, 3); ++ PICO_MVRC_W(PICO_INPIX0, src0); ++ PICO_MVRC_W(PICO_INPIX1, src1); ++ PICO_MVRC_W(PICO_INPIX2, src2); ++ PICO_OP(PICO_USE_ACC| PICO_SINGLE_VECTOR, 1, 9, 6, 3); ++ ++ PICO_LDCM_W_INC(tmp, ++ PICO_REGVECT_VMU0_OUT, ++ PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT); ++ PICO_MVRC_W(PICO_INPIX0, src1); ++ PICO_MVRC_W(PICO_INPIX1, src2); ++ PICO_MVRC_W(PICO_INPIX2, src3); ++ PICO_OP(PICO_USE_ACC, 2, 6, 3, 0); ++ PICO_MVRC_W(PICO_INPIX2, src4); ++ PICO_MVRC_W(PICO_INPIX1, src5); ++ PICO_MVRC_W(PICO_INPIX0, src6); ++ PICO_OP(PICO_USE_ACC| PICO_SINGLE_VECTOR, 2, 6, 3, 0); ++ ++ PICO_LDCM_W_INC(tmp, ++ PICO_REGVECT_VMU0_OUT, ++ PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT); ++ PICO_OP(PICO_USE_ACC, 3, 9, 6, 3); ++ PICO_MVRC_W(PICO_INPIX0, src1); ++ PICO_MVRC_W(PICO_INPIX1, src2); ++ PICO_MVRC_W(PICO_INPIX2, src3); ++ PICO_OP(PICO_USE_ACC| PICO_SINGLE_VECTOR, 3, 9, 6, 3); ++ ++ ST16(dst + 2*dstStride, (short)(PICO_GET_W(PICO_OUTPIX0) >> 16)); ++ ST16(dst + 3*dstStride, (short)PICO_GET_W(PICO_OUTPIX0)); ++ ++ dst += 2; ++ src += 2; ++ } ++} ++ ++ ++ ++ ++static void avg_h264_qpel4_hv_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){ ++ ++ int32_t tmp_block[48]; ++ int32_t *tmp = tmp_block; ++ int i; ++ ++ set_pico_config(&h264_qpel4_hv_lowpass_config); ++ ++ src -= 2; ++ for ( i = 0; i < 2; i++ ){ ++ int srcB= LD32(src - 2*srcStride); ++ int srcA= LD32(src - 1*srcStride); ++ int src0= LD32(src + 0 *srcStride); ++ int src1= LD32(src + 1 *srcStride); ++ int src2= LD32(src + 2 *srcStride); ++ int src3= LD32(src + 3 *srcStride); ++ int src4= LD32(src + 4 *srcStride); ++ int src5= LD32(src + 5 *srcStride); ++ int src6= LD32(src + 6 *srcStride); ++ ++ PICO_MVRC_W(PICO_INPIX0, srcB); ++ PICO_MVRC_W(PICO_INPIX1, srcA); ++ PICO_MVRC_W(PICO_INPIX2, src0); ++ PICO_OP(0, 0, 0, 4, 8); ++ PICO_MVRC_W(PICO_INPIX2, src1); ++ PICO_MVRC_W(PICO_INPIX1, src2); ++ PICO_MVRC_W(PICO_INPIX0, src3); ++ PICO_OP(PICO_USE_ACC, 0, 0, 4, 8); ++ PICO_STCM_W(tmp, ++ PICO_REGVECT_VMU0_OUT, ++ PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT); ++ tmp += 3; ++ ++ PICO_OP(0, 0, 1, 5, 9); ++ PICO_MVRC_W(PICO_INPIX0, srcB); ++ PICO_MVRC_W(PICO_INPIX1, srcA); ++ PICO_MVRC_W(PICO_INPIX2, src0); ++ PICO_OP(PICO_USE_ACC, 0, 1, 5, 9); ++ PICO_STCM_W(tmp, ++ PICO_REGVECT_VMU0_OUT, ++ PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT); ++ tmp += 3; ++ ++ PICO_MVRC_W(PICO_INPIX0, src1); ++ PICO_OP(0, 0, 4, 8, 0); ++ PICO_MVRC_W(PICO_INPIX2, src2); ++ PICO_MVRC_W(PICO_INPIX1, src3); ++ PICO_MVRC_W(PICO_INPIX0, src4); ++ PICO_OP(PICO_USE_ACC, 0, 0, 4, 8); ++ PICO_STCM_W(tmp, ++ PICO_REGVECT_VMU0_OUT, ++ PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT); ++ tmp += 3; ++ ++ PICO_OP(0, 0, 1, 5, 9); ++ PICO_MVRC_W(PICO_INPIX0, srcA); ++ PICO_MVRC_W(PICO_INPIX1, src0); ++ PICO_MVRC_W(PICO_INPIX2, src1); ++ PICO_OP(PICO_USE_ACC, 0, 1, 5, 9); ++ PICO_STCM_W(tmp, ++ PICO_REGVECT_VMU0_OUT, ++ PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT); ++ tmp += 3; ++ ++ PICO_MVRC_W(PICO_INPIX0, src2); ++ PICO_OP(0, 0, 4, 8, 0); ++ PICO_MVRC_W(PICO_INPIX2, src3); ++ PICO_MVRC_W(PICO_INPIX1, src4); ++ PICO_MVRC_W(PICO_INPIX0, src5); ++ PICO_OP(PICO_USE_ACC, 0, 0, 4, 8); ++ PICO_STCM_W(tmp, ++ PICO_REGVECT_VMU0_OUT, ++ PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT); ++ tmp += 3; ++ ++ PICO_OP(0, 0, 1, 5, 9); ++ PICO_MVRC_W(PICO_INPIX0, src0); ++ PICO_MVRC_W(PICO_INPIX1, src1); ++ PICO_MVRC_W(PICO_INPIX2, src2); ++ PICO_OP(PICO_USE_ACC, 0, 1, 5, 9); ++ PICO_STCM_W(tmp, ++ PICO_REGVECT_VMU0_OUT, ++ PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT); ++ tmp += 3; ++ ++ PICO_MVRC_W(PICO_INPIX0, src3); ++ PICO_OP(0, 0, 4, 8, 0); ++ PICO_MVRC_W(PICO_INPIX2, src4); ++ PICO_MVRC_W(PICO_INPIX1, src5); ++ PICO_MVRC_W(PICO_INPIX0, src6); ++ PICO_OP(PICO_USE_ACC, 0, 0, 4, 8); ++ PICO_STCM_W(tmp, ++ PICO_REGVECT_VMU0_OUT, ++ PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT); ++ tmp += 3; ++ ++ PICO_OP(0, 0, 1, 5, 9); ++ PICO_MVRC_W(PICO_INPIX0, src1); ++ PICO_MVRC_W(PICO_INPIX1, src2); ++ PICO_MVRC_W(PICO_INPIX2, src3); ++ PICO_OP(PICO_USE_ACC, 0, 1, 5, 9); ++ PICO_STCM_W(tmp, ++ PICO_REGVECT_VMU0_OUT, ++ PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT); ++ tmp += 3; ++ src += 2; ++ } ++ ++ src -= 1; ++ tmp -= 48; ++ ++ ++ PICO_PUT_W(PICO_CONFIG, ++ PICO_OUTPUT_MODE(PICO_PLANAR_MODE) ++ | PICO_INPUT_MODE(PICO_VERT_FILTER_MODE) ++ | PICO_COEFF_FRAC_BITS(10) ++ | PICO_OFFSET_FRAC_BITS(10)); ++ ++ for ( i = 0; i < 2; i++ ){ ++ int srcB= LD32(src - 2*srcStride); ++ int srcA= LD32(src - 1*srcStride); ++ int src0= LD32(src + 0 *srcStride); ++ int src1= LD32(src + 1 *srcStride); ++ int src2= LD32(src + 2 *srcStride); ++ int src3= LD32(src + 3 *srcStride); ++ int src4= LD32(src + 4 *srcStride); ++ int src5= LD32(src + 5 *srcStride); ++ int src6= LD32(src + 6 *srcStride); ++ ++ PICO_LDCM_W_INC(tmp, ++ PICO_REGVECT_VMU0_OUT, ++ PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT); ++ PICO_MVRC_W(PICO_INPIX0, srcB); ++ PICO_MVRC_W(PICO_INPIX1, srcA); ++ PICO_MVRC_W(PICO_INPIX2, src0); ++ PICO_OP(PICO_USE_ACC, 0, 6, 3, 0); ++ PICO_MVRC_W(PICO_INPIX2, src1); ++ PICO_MVRC_W(PICO_INPIX1, src2); ++ PICO_MVRC_W(PICO_INPIX0, src3); ++ PICO_OP(PICO_USE_ACC | PICO_SINGLE_VECTOR, 0, 6, 3, 0); ++ ++ PICO_LDCM_W_INC(tmp, ++ PICO_REGVECT_VMU0_OUT, ++ PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT); ++ PICO_OP(PICO_USE_ACC, 1, 9, 6, 3); ++ PICO_MVRC_W(PICO_INPIX0, srcB); ++ PICO_MVRC_W(PICO_INPIX1, srcA); ++ PICO_MVRC_W(PICO_INPIX2, src0); ++ PICO_OP(PICO_USE_ACC | PICO_SINGLE_VECTOR, 1, 9, 6, 3); ++ ++ PICO_LDCM_W_INC(tmp, ++ PICO_REGVECT_VMU0_OUT, ++ PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT); ++ PICO_MVRC_W(PICO_INPIX0, srcA); ++ PICO_MVRC_W(PICO_INPIX1, src0); ++ PICO_MVRC_W(PICO_INPIX2, src1); ++ PICO_OP(PICO_USE_ACC, 2, 6, 3, 0); ++ PICO_MVRC_W(PICO_INPIX2, src2); ++ PICO_MVRC_W(PICO_INPIX1, src3); ++ PICO_MVRC_W(PICO_INPIX0, src4); ++ PICO_OP(PICO_USE_ACC | PICO_SINGLE_VECTOR, 2, 6, 3, 0); ++ ++ PICO_LDCM_W_INC(tmp, ++ PICO_REGVECT_VMU0_OUT, ++ PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT); ++ PICO_OP(PICO_USE_ACC, 3, 9, 6, 3); ++ PICO_MVRC_W(PICO_INPIX0, srcA); ++ PICO_MVRC_W(PICO_INPIX1, src0); ++ PICO_MVRC_W(PICO_INPIX2, src1); ++ PICO_OP(PICO_USE_ACC | PICO_SINGLE_VECTOR, 3, 9, 6, 3); ++ ++ ST16(dst + 0*dstStride, rnd_avg32(LD16(dst + 0*dstStride), PICO_GET_W(PICO_OUTPIX0) >> 16)); ++ ST16(dst + 1*dstStride, rnd_avg32(LD16(dst + 1*dstStride), PICO_GET_W(PICO_OUTPIX0))); ++ ++ ++ PICO_LDCM_W_INC(tmp, ++ PICO_REGVECT_VMU0_OUT, ++ PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT); ++ PICO_MVRC_W(PICO_INPIX0, src0); ++ PICO_MVRC_W(PICO_INPIX1, src1); ++ PICO_MVRC_W(PICO_INPIX2, src2); ++ PICO_OP(PICO_USE_ACC, 0, 6, 3, 0); ++ PICO_MVRC_W(PICO_INPIX2, src3); ++ PICO_MVRC_W(PICO_INPIX1, src4); ++ PICO_MVRC_W(PICO_INPIX0, src5); ++ PICO_OP(PICO_USE_ACC| PICO_SINGLE_VECTOR, 0, 6, 3, 0); ++ ++ PICO_LDCM_W_INC(tmp, ++ PICO_REGVECT_VMU0_OUT, ++ PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT); ++ PICO_OP(PICO_USE_ACC, 1, 9, 6, 3); ++ PICO_MVRC_W(PICO_INPIX0, src0); ++ PICO_MVRC_W(PICO_INPIX1, src1); ++ PICO_MVRC_W(PICO_INPIX2, src2); ++ PICO_OP(PICO_USE_ACC| PICO_SINGLE_VECTOR, 1, 9, 6, 3); ++ ++ PICO_LDCM_W_INC(tmp, ++ PICO_REGVECT_VMU0_OUT, ++ PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT); ++ PICO_MVRC_W(PICO_INPIX0, src1); ++ PICO_MVRC_W(PICO_INPIX1, src2); ++ PICO_MVRC_W(PICO_INPIX2, src3); ++ PICO_OP(PICO_USE_ACC, 2, 6, 3, 0); ++ PICO_MVRC_W(PICO_INPIX2, src4); ++ PICO_MVRC_W(PICO_INPIX1, src5); ++ PICO_MVRC_W(PICO_INPIX0, src6); ++ PICO_OP(PICO_USE_ACC| PICO_SINGLE_VECTOR, 2, 6, 3, 0); ++ ++ PICO_LDCM_W_INC(tmp, ++ PICO_REGVECT_VMU0_OUT, ++ PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT); ++ PICO_OP(PICO_USE_ACC, 3, 9, 6, 3); ++ PICO_MVRC_W(PICO_INPIX0, src1); ++ PICO_MVRC_W(PICO_INPIX1, src2); ++ PICO_MVRC_W(PICO_INPIX2, src3); ++ PICO_OP(PICO_USE_ACC| PICO_SINGLE_VECTOR, 3, 9, 6, 3); ++ ++ ST16(dst + 2*dstStride, rnd_avg32(LD16(dst + 2*dstStride), PICO_GET_W(PICO_OUTPIX0) >> 16)); ++ ST16(dst + 3*dstStride, rnd_avg32(LD16(dst + 3*dstStride), PICO_GET_W(PICO_OUTPIX0))); ++ ++ dst += 2; ++ src += 2; ++ } ++} ++ ++ ++static void put_h264_qpel8_v_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){ ++ put_h264_qpel4_v_lowpass_pico(dst , src , dstStride, srcStride); ++ put_h264_qpel4_v_lowpass_pico(dst+4, src+4, dstStride, srcStride); ++ src += 4*srcStride; ++ dst += 4*dstStride; ++ put_h264_qpel4_v_lowpass_pico(dst , src , dstStride, srcStride); ++ put_h264_qpel4_v_lowpass_pico(dst+4, src+4, dstStride, srcStride); ++} ++ ++static void avg_h264_qpel8_v_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){ ++ avg_h264_qpel4_v_lowpass_pico(dst , src , dstStride, srcStride); ++ avg_h264_qpel4_v_lowpass_pico(dst+4, src+4, dstStride, srcStride); ++ src += 4*srcStride; ++ dst += 4*dstStride; ++ avg_h264_qpel4_v_lowpass_pico(dst , src , dstStride, srcStride); ++ avg_h264_qpel4_v_lowpass_pico(dst+4, src+4, dstStride, srcStride); ++} ++ ++static void put_h264_qpel8_h_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){ ++ put_h264_qpel4_h_lowpass_pico(dst , src , dstStride, srcStride); ++ put_h264_qpel4_h_lowpass_pico(dst+4, src+4, dstStride, srcStride); ++ src += 4*srcStride; ++ dst += 4*dstStride; ++ put_h264_qpel4_h_lowpass_pico(dst , src , dstStride, srcStride); ++ put_h264_qpel4_h_lowpass_pico(dst+4, src+4, dstStride, srcStride); ++} ++ ++static void avg_h264_qpel8_h_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){ ++ avg_h264_qpel4_h_lowpass_pico(dst , src , dstStride, srcStride); ++ avg_h264_qpel4_h_lowpass_pico(dst+4, src+4, dstStride, srcStride); ++ src += 4*srcStride; ++ dst += 4*dstStride; ++ avg_h264_qpel4_h_lowpass_pico(dst , src , dstStride, srcStride); ++ avg_h264_qpel4_h_lowpass_pico(dst+4, src+4, dstStride, srcStride); ++} ++ ++static void put_h264_qpel8_hv_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){ ++ put_h264_qpel4_hv_lowpass_pico(dst , src , dstStride, srcStride); ++ put_h264_qpel4_hv_lowpass_pico(dst+4, src+4, dstStride, srcStride); ++ src += 4*srcStride; ++ dst += 4*dstStride; ++ put_h264_qpel4_hv_lowpass_pico(dst , src , dstStride, srcStride); ++ put_h264_qpel4_hv_lowpass_pico(dst+4, src+4, dstStride, srcStride); ++} ++ ++static void avg_h264_qpel8_hv_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){ ++ avg_h264_qpel4_hv_lowpass_pico(dst , src , dstStride, srcStride); ++ avg_h264_qpel4_hv_lowpass_pico(dst+4, src+4, dstStride, srcStride); ++ src += 4*srcStride; ++ dst += 4*dstStride; ++ avg_h264_qpel4_hv_lowpass_pico(dst , src , dstStride, srcStride); ++ avg_h264_qpel4_hv_lowpass_pico(dst+4, src+4, dstStride, srcStride); ++} ++ ++static void put_h264_qpel16_v_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){ ++ put_h264_qpel8_v_lowpass_pico(dst , src , dstStride, srcStride); ++ put_h264_qpel8_v_lowpass_pico(dst+8, src+8, dstStride, srcStride); ++ src += 8*srcStride; ++ dst += 8*dstStride; ++ put_h264_qpel8_v_lowpass_pico(dst , src , dstStride, srcStride); ++ put_h264_qpel8_v_lowpass_pico(dst+8, src+8, dstStride, srcStride); ++} ++ ++static void avg_h264_qpel16_v_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){ ++ avg_h264_qpel8_v_lowpass_pico(dst , src , dstStride, srcStride); ++ avg_h264_qpel8_v_lowpass_pico(dst+8, src+8, dstStride, srcStride); ++ src += 8*srcStride; ++ dst += 8*dstStride; ++ avg_h264_qpel8_v_lowpass_pico(dst , src , dstStride, srcStride); ++ avg_h264_qpel8_v_lowpass_pico(dst+8, src+8, dstStride, srcStride); ++} ++ ++static void put_h264_qpel16_h_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){ ++ put_h264_qpel8_h_lowpass_pico(dst , src , dstStride, srcStride); ++ put_h264_qpel8_h_lowpass_pico(dst+8, src+8, dstStride, srcStride); ++ src += 8*srcStride; ++ dst += 8*dstStride; ++ put_h264_qpel8_h_lowpass_pico(dst , src , dstStride, srcStride); ++ put_h264_qpel8_h_lowpass_pico(dst+8, src+8, dstStride, srcStride); ++} ++ ++static void avg_h264_qpel16_h_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){ ++ avg_h264_qpel8_h_lowpass_pico(dst , src , dstStride, srcStride); ++ avg_h264_qpel8_h_lowpass_pico(dst+8, src+8, dstStride, srcStride); ++ src += 8*srcStride; ++ dst += 8*dstStride; ++ avg_h264_qpel8_h_lowpass_pico(dst , src , dstStride, srcStride); ++ avg_h264_qpel8_h_lowpass_pico(dst+8, src+8, dstStride, srcStride); ++} ++ ++static void put_h264_qpel16_hv_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){ ++ put_h264_qpel8_hv_lowpass_pico(dst , src , dstStride, srcStride); ++ put_h264_qpel8_hv_lowpass_pico(dst+8, src+8, dstStride, srcStride); ++ src += 8*srcStride; ++ dst += 8*dstStride; ++ put_h264_qpel8_hv_lowpass_pico(dst , src , dstStride, srcStride); ++ put_h264_qpel8_hv_lowpass_pico(dst+8, src+8, dstStride, srcStride); ++} ++ ++static void avg_h264_qpel16_hv_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){ ++ avg_h264_qpel8_hv_lowpass_pico(dst , src , dstStride, srcStride); ++ avg_h264_qpel8_hv_lowpass_pico(dst+8, src+8, dstStride, srcStride); ++ src += 8*srcStride; ++ dst += 8*dstStride; ++ avg_h264_qpel8_hv_lowpass_pico(dst , src , dstStride, srcStride); ++ avg_h264_qpel8_hv_lowpass_pico(dst+8, src+8, dstStride, srcStride); ++} ++ ++ ++#define H264_MC(OPNAME, SIZE) \ ++static void OPNAME ## h264_qpel ## SIZE ## _mc00_pico (uint8_t *dst, uint8_t *src, int stride){\ ++ OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\ ++}\ ++\ ++static void OPNAME ## h264_qpel ## SIZE ## _mc10_pico(uint8_t *dst, uint8_t *src, int stride){\ ++ uint8_t half[SIZE*SIZE];\ ++ put_h264_qpel ## SIZE ## _h_lowpass_pico(half, src, SIZE, stride);\ ++ OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\ ++}\ ++\ ++static void OPNAME ## h264_qpel ## SIZE ## _mc20_pico(uint8_t *dst, uint8_t *src, int stride){\ ++ OPNAME ## h264_qpel ## SIZE ## _h_lowpass_pico(dst, src, stride, stride);\ ++}\ ++\ ++static void OPNAME ## h264_qpel ## SIZE ## _mc30_pico(uint8_t *dst, uint8_t *src, int stride){\ ++ uint8_t half[SIZE*SIZE];\ ++ put_h264_qpel ## SIZE ## _h_lowpass_pico(half, src, SIZE, stride);\ ++ OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\ ++}\ ++\ ++static void OPNAME ## h264_qpel ## SIZE ## _mc01_pico(uint8_t *dst, uint8_t *src, int stride){\ ++ uint8_t full[SIZE*(SIZE+5)];\ ++ uint8_t * const full_mid= full + SIZE*2;\ ++ uint8_t half[SIZE*SIZE];\ ++ copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ ++ put_h264_qpel ## SIZE ## _v_lowpass_pico(half, full_mid, SIZE, SIZE);\ ++ OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\ ++}\ ++\ ++static void OPNAME ## h264_qpel ## SIZE ## _mc02_pico(uint8_t *dst, uint8_t *src, int stride){\ ++ uint8_t full[SIZE*(SIZE+5)];\ ++ uint8_t * const full_mid= full + SIZE*2;\ ++ copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ ++ OPNAME ## h264_qpel ## SIZE ## _v_lowpass_pico(dst, full_mid, stride, SIZE);\ ++}\ ++\ ++static void OPNAME ## h264_qpel ## SIZE ## _mc03_pico(uint8_t *dst, uint8_t *src, int stride){\ ++ uint8_t full[SIZE*(SIZE+5)];\ ++ uint8_t * const full_mid= full + SIZE*2;\ ++ uint8_t half[SIZE*SIZE];\ ++ copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ ++ put_h264_qpel ## SIZE ## _v_lowpass_pico(half, full_mid, SIZE, SIZE);\ ++ OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\ ++}\ ++\ ++static void OPNAME ## h264_qpel ## SIZE ## _mc11_pico(uint8_t *dst, uint8_t *src, int stride){\ ++ uint8_t full[SIZE*(SIZE+5)];\ ++ uint8_t * const full_mid= full + SIZE*2;\ ++ uint8_t halfH[SIZE*SIZE];\ ++ uint8_t halfV[SIZE*SIZE];\ ++ put_h264_qpel ## SIZE ## _h_lowpass_pico(halfH, src, SIZE, stride);\ ++ copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ ++ put_h264_qpel ## SIZE ## _v_lowpass_pico(halfV, full_mid, SIZE, SIZE);\ ++ OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ ++}\ ++\ ++static void OPNAME ## h264_qpel ## SIZE ## _mc31_pico(uint8_t *dst, uint8_t *src, int stride){\ ++ uint8_t full[SIZE*(SIZE+5)];\ ++ uint8_t * const full_mid= full + SIZE*2;\ ++ uint8_t halfH[SIZE*SIZE];\ ++ uint8_t halfV[SIZE*SIZE];\ ++ put_h264_qpel ## SIZE ## _h_lowpass_pico(halfH, src, SIZE, stride);\ ++ copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ ++ put_h264_qpel ## SIZE ## _v_lowpass_pico(halfV, full_mid, SIZE, SIZE);\ ++ OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ ++}\ ++\ ++static void OPNAME ## h264_qpel ## SIZE ## _mc13_pico(uint8_t *dst, uint8_t *src, int stride){\ ++ uint8_t full[SIZE*(SIZE+5)];\ ++ uint8_t * const full_mid= full + SIZE*2;\ ++ uint8_t halfH[SIZE*SIZE];\ ++ uint8_t halfV[SIZE*SIZE];\ ++ put_h264_qpel ## SIZE ## _h_lowpass_pico(halfH, src + stride, SIZE, stride);\ ++ copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ ++ put_h264_qpel ## SIZE ## _v_lowpass_pico(halfV, full_mid, SIZE, SIZE);\ ++ OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ ++}\ ++\ ++static void OPNAME ## h264_qpel ## SIZE ## _mc33_pico(uint8_t *dst, uint8_t *src, int stride){\ ++ uint8_t full[SIZE*(SIZE+5)];\ ++ uint8_t * const full_mid= full + SIZE*2;\ ++ uint8_t halfH[SIZE*SIZE];\ ++ uint8_t halfV[SIZE*SIZE];\ ++ put_h264_qpel ## SIZE ## _h_lowpass_pico(halfH, src + stride, SIZE, stride);\ ++ copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ ++ put_h264_qpel ## SIZE ## _v_lowpass_pico(halfV, full_mid, SIZE, SIZE);\ ++ OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ ++}\ ++\ ++static void OPNAME ## h264_qpel ## SIZE ## _mc22_pico(uint8_t *dst, uint8_t *src, int stride){\ ++ OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_pico(dst, src, stride, stride);\ ++}\ ++\ ++static void OPNAME ## h264_qpel ## SIZE ## _mc21_pico(uint8_t *dst, uint8_t *src, int stride){\ ++ uint8_t halfH[SIZE*SIZE];\ ++ uint8_t halfHV[SIZE*SIZE];\ ++ put_h264_qpel ## SIZE ## _h_lowpass_pico(halfH, src, SIZE, stride);\ ++ put_h264_qpel ## SIZE ## _hv_lowpass_pico(halfHV, src, SIZE, stride);\ ++ OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ ++}\ ++\ ++static void OPNAME ## h264_qpel ## SIZE ## _mc23_pico(uint8_t *dst, uint8_t *src, int stride){\ ++ uint8_t halfH[SIZE*SIZE];\ ++ uint8_t halfHV[SIZE*SIZE];\ ++ put_h264_qpel ## SIZE ## _h_lowpass_pico(halfH, src + stride, SIZE, stride);\ ++ put_h264_qpel ## SIZE ## _hv_lowpass_pico(halfHV, src, SIZE, stride);\ ++ OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ ++}\ ++\ ++static void OPNAME ## h264_qpel ## SIZE ## _mc12_pico(uint8_t *dst, uint8_t *src, int stride){\ ++ uint8_t full[SIZE*(SIZE+5)];\ ++ uint8_t * const full_mid= full + SIZE*2;\ ++ uint8_t halfV[SIZE*SIZE];\ ++ uint8_t halfHV[SIZE*SIZE];\ ++ copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ ++ put_h264_qpel ## SIZE ## _v_lowpass_pico(halfV, full_mid, SIZE, SIZE);\ ++ put_h264_qpel ## SIZE ## _hv_lowpass_pico(halfHV, src, SIZE, stride);\ ++ OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ ++}\ ++\ ++static void OPNAME ## h264_qpel ## SIZE ## _mc32_pico(uint8_t *dst, uint8_t *src, int stride){\ ++ uint8_t full[SIZE*(SIZE+5)];\ ++ uint8_t * const full_mid= full + SIZE*2;\ ++ uint8_t halfV[SIZE*SIZE];\ ++ uint8_t halfHV[SIZE*SIZE];\ ++ copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ ++ put_h264_qpel ## SIZE ## _v_lowpass_pico(halfV, full_mid, SIZE, SIZE);\ ++ put_h264_qpel ## SIZE ## _hv_lowpass_pico(halfHV, src, SIZE, stride);\ ++ OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ ++}\ ++ ++H264_MC(put_, 4) ++H264_MC(put_, 8) ++H264_MC(put_, 16) ++H264_MC(avg_, 4) ++H264_MC(avg_, 8) ++H264_MC(avg_, 16) ++ ++ ++ ++#define dspfunc16(PFX) \ ++ void PFX ## _pixels16_avr32(uint8_t *dst, const uint8_t *pixels, int line_size, int h ){ \ ++ PFX ## _pixels8_avr32(dst, pixels, line_size, h);\ ++ PFX ## _pixels8_avr32(dst + 8, pixels + 8, line_size, h);\ ++ }\ ++ void PFX ## _pixels16_h_avr32(uint8_t *dst, const uint8_t *pixels, int line_size, int h ){ \ ++ PFX ## _pixels8_h_avr32(dst, pixels, line_size, h);\ ++ PFX ## _pixels8_h_avr32(dst + 8, pixels + 8, line_size, h);\ ++ }\ ++ void PFX ## _pixels16_v_avr32(uint8_t *dst, const uint8_t *pixels, int line_size, int h ){ \ ++ PFX ## _pixels8_v_avr32(dst, pixels, line_size, h);\ ++ PFX ## _pixels8_v_avr32(dst + 8, pixels + 8, line_size, h);\ ++ }\ ++ void PFX ## _pixels16_hv_avr32(uint8_t *dst, const uint8_t *pixels, int line_size, int h ){ \ ++ PFX ## _pixels8_hv_avr32(dst, pixels, line_size, h);\ ++ PFX ## _pixels8_hv_avr32(dst + 8, pixels + 8, line_size, h);\ ++ }\ ++ ++ ++dspfunc16(put) ++dspfunc16(put_no_rnd) ++dspfunc16(avg) ++dspfunc16(avg_no_rnd) ++#undef dspfunc16 ++ ++static int pix_sum_avr32(uint8_t * pix, int line_size) ++{ ++ int s, i; ++ ++ s = 0; ++ for (i = 0; i < 16; i++) { ++ int tmp1,tmp2,tmp3,tmp4,tmp5; ++ __asm__ volatile ( "ld.w\t%0, %6[0]\n\t" ++ "ld.w\t%1, %6[4]\n\t" ++ "ld.w\t%2, %6[8]\n\t" ++ "ld.w\t%3, %6[12]\n\t" ++ "punpckub.h\t%4, %0:t\n\t" ++ "padd.h\t%5, %5, %4\n\t" ++ "punpckub.h\t%4, %0:b\n\t" ++ "padd.h\t%5, %5, %4\n\t" ++ "punpckub.h\t%4, %1:t\n\t" ++ "padd.h\t%5, %5, %4\n\t" ++ "punpckub.h\t%4, %1:b\n\t" ++ "padd.h\t%5, %5, %4\n\t" ++ "punpckub.h\t%4, %2:t\n\t" ++ "padd.h\t%5, %5, %4\n\t" ++ "punpckub.h\t%4, %2:b\n\t" ++ "padd.h\t%5, %5, %4\n\t" ++ "punpckub.h\t%4, %3:t\n\t" ++ "padd.h\t%5, %5, %4\n\t" ++ "punpckub.h\t%4, %3:b\n\t" ++ "padd.h\t%5, %5, %4\n\t" ++ : "=&r"(tmp1),"=&r"(tmp2),"=&r"(tmp3),"=&r"(tmp4),"=&r"(tmp5),"=&r"(s) ++ : "r"(pix)); ++ pix += line_size; ++ } ++ __asm__ volatile ( "addhh.w\t%0, %0:t, %0:b" : "=&r" (s) ); ++ ++ return s; ++} ++ ++ ++//#define op_scale1(x) block[x] = clip_uint8( (block[x]*weight + offset) >> log2_denom ) ++//#define op_scale2(x) dst[x] = clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1)) ++//#define H264_WEIGHT(W,H) \ ++//static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \ ++// int attribute_unused x, y; \ ++// offset <<= log2_denom; \ ++// if(log2_denom) offset += 1<<(log2_denom-1); \ ++// for(y=0; y<H; y++, block += stride){ \ ++// uint32_t tmp0, tmp1; ++// if(W==2) { \ ++// asm volatile ( "ld.ub\t%[tmp0], %[block][0]\n" \ ++// "ld.ub\t%[tmp1], %[block][1]\n" \ ++// "mulhh.w\t%[tmp0], %[tmp0]:b, %[weight]:b\n" \ ++// "mulhh.w\t%[tmp1], %[tmp1]:b, %[weight]:b\n" \ ++// "asr\t%[tmp0], %[log2_denom]\n" \ ++// "asr\t%[tmp1], %[log2_denom]\n" \ ++// "satu\t%[tmp0] >> 0, 8\n" \ ++// "satu\t%[tmp1] >> 0, 8\n" \ ++// "st.b\t%[block][0], %[tmp0]\n" \ ++// "st.b\t%[block][1], %[tmp1]\n" \ ++// : [tmp0] "=&r"(tmp0), [tmp1] "=&r"(tmp1) \ ++// : [block] "r"(block), [weight]"r"(weight), [log2_denom]"r"(log2denom) ); \ ++// } else if ( W==4 ) { \ ++// asm volatile ( "ld.w\t%[tmp0], %[block][0]\n" \ ++// "punpckub.h\t%[tmp1], %[tmp0]:t\n" \ ++// "punpckub.h\t%[tmp0], %[tmp0]:b\n" \ ++// "mulhh.w\t%[tmp2], %[tmp1]:t, %[weight]:b\n" \ ++// "mulhh.w\t%[tmp1], %[tmp1]:b, %[weight]:b\n" \ ++// "asr\t%[tmp0], %[log2_denom]\n" \ ++// "asr\t%[tmp1], %[log2_denom]\n" \ ++// "satu\t%[tmp0] >> 0, 8\n" \ ++// "satu\t%[tmp1] >> 0, 8\n" \ ++// "st.b\t%[block][0], %[tmp0]\n" \ ++// "st.b\t%[block][1], %[tmp1]\n" \ ++// : [tmp0] "=&r"(tmp0), [tmp1] "=&r"(tmp1) \ ++// : [block] "r"(block), [weight]"r"(weight), [log2_denom]"r"(log2denom) ); \ ++// ++// ++// ++// if(W==4) continue; \ ++// op_scale1(4); \ ++// op_scale1(5); \ ++// op_scale1(6); \ ++// op_scale1(7); \ ++// if(W==8) continue; \ ++// op_scale1(8); \ ++// op_scale1(9); \ ++// op_scale1(10); \ ++// op_scale1(11); \ ++// op_scale1(12); \ ++// op_scale1(13); \ ++// op_scale1(14); \ ++// op_scale1(15); \ ++// } \ ++//} \ ++//static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offsetd, int offsets){ \ ++// int attribute_unused x, y; \ ++// int offset = (offsets + offsetd + 1) >> 1; \ ++// offset = ((offset << 1) + 1) << log2_denom; \ ++// for(y=0; y<H; y++, dst += stride, src += stride){ \ ++// op_scale2(0); \ ++// op_scale2(1); \ ++// if(W==2) continue; \ ++// op_scale2(2); \ ++// op_scale2(3); \ ++// if(W==4) continue; \ ++// op_scale2(4); \ ++// op_scale2(5); \ ++// op_scale2(6); \ ++// op_scale2(7); \ ++// if(W==8) continue; \ ++// op_scale2(8); \ ++// op_scale2(9); \ ++// op_scale2(10); \ ++// op_scale2(11); \ ++// op_scale2(12); \ ++// op_scale2(13); \ ++// op_scale2(14); \ ++// op_scale2(15); \ ++// } \ ++//} ++ ++ ++ ++/* Returns zero in each byte where the absolute difference between <a> and <b> ++ is not less than <compare> */ ++#define PABS_DIFF_LESS_THAN( a, b, compare) \ ++ ({ uint32_t __tmp__, __tmp2__, __mask__; \ ++ asm ( \ ++ /* Check ABS( a - b ) < compare */ \ ++ "psubs.ub\t%[tmp], %[opa], %[opb]\n" \ ++ "psubs.ub\t%[tmp2], %[opb], %[opa]\n" \ ++ "or\t%[tmp], %[tmp2]\n" /* ABS ( a - b ) */ \ ++ /* This produces 0 for all bytes where the comparison is not true */ \ ++ "psubs.ub\t%[mask], %[cmp], %[tmp]\n" \ ++ : [tmp] "=&r"(__tmp__), [tmp2] "=&r"(__tmp2__), [mask] "=&r"(__mask__) \ ++ : [opa] "r"(a), [opb] "r"(b), [cmp] "r"(compare) ); \ ++ __mask__; }) ++ ++/* ++ Set all bytes containing zero in <value> to 255 and the rest to zero. ++ ++ Add with saturation 254 to all bytes making all bytes different from ++ zero become 255. Then add one without saturation to make all bytes ++ originally containing zero 255 and the rest 0. */ ++#define SET_ALL_BITS_IN_ZERO_BYTES(value) \ ++ ({ uint32_t __tmp__; \ ++ asm ( \ ++ "padds.ub\t%[tmp], %[val], %[max_minus_one]\n" \ ++ "padd.b\t%[tmp], %[tmp], %[all_ones]\n" \ ++ : [tmp] "=r"(__tmp__) \ ++ : [val] "r"(value), [max_minus_one] "r"(0xFEFEFEFE), [all_ones] "r"(0x01010101) ); \ ++ __tmp__; }) ++ ++#define PACKW_SH(upper, lower) \ ++ ({ uint32_t __tmp__; \ ++ asm ( \ ++ "packw.sh\t%[tmp], %[u], %[l]\n" \ ++ : [tmp] "=r"(__tmp__) \ ++ : [u] "r"(upper), [l] "r"(lower) ); \ ++ __tmp__; }) ++ ++#define PACKSH_UB(upper, lower) \ ++ ({ uint32_t __tmp__; \ ++ asm ( \ ++ "packsh.sb\t%[tmp], %[u], %[l]\n" \ ++ : [tmp] "=r"(__tmp__) \ ++ : [u] "r"(upper), [l] "r"(lower) ); \ ++ __tmp__; }) ++ ++static void h264_v_loop_filter_luma_avr32(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) ++{ ++ int i; ++ ++ if ( alpha == 0 ) ++ return; ++ ++ alpha = PACKW_SH(alpha, alpha); ++ alpha = PACKSH_UB(alpha, alpha); ++ beta = PACKW_SH(beta, beta); ++ beta = PACKSH_UB(beta, beta); ++ ++ for( i = 0; i < 4; i++ ) { ++ uint32_t p0, p1, p2, q0, q1, q2; ++ uint32_t mask, mask2; ++ uint32_t tmp, tmp2, tmp3, tmp4; ++ ++ if( tc0[i] < 0 ) { ++ pix += 4; ++ continue; ++ } ++ ++/* for( d = 0; d < 4; d++ ) { ++ const int p0 = pix[-1*stride]; ++ const int p1 = pix[-2*stride]; ++ const int p2 = pix[-3*stride]; ++ const int q0 = pix[0]; ++ const int q1 = pix[1*stride]; ++ const int q2 = pix[2*stride]; ++ ++ if( ABS( p0 - q0 ) < alpha && ++ ABS( p1 - p0 ) < beta && ++ ABS( q1 - q0 ) < beta ) { */ ++ ++ p0 = LD32(pix - stride); ++ p1 = LD32(pix - 2*stride); ++ q0 = LD32(pix); ++ q1 = LD32(pix + stride); ++ ++ /* Check which of the columns should be filtered, if any. */ ++ mask = PABS_DIFF_LESS_THAN(p0, q0, alpha); ++ mask |= PABS_DIFF_LESS_THAN(p1, p0, beta); ++ mask |= PABS_DIFF_LESS_THAN(q1, q0, beta); ++ ++ if ( !mask ) ++ continue; ++ ++ mask = SET_ALL_BITS_IN_ZERO_BYTES(mask); ++ ++ ++ int tc = PACKW_SH(tc0[i], tc0[i]); ++ int tc0_p = tc; ++ int tc0_m = PACKW_SH(-tc0[i], -tc0[i]); ++ ++ /* ++ int i_delta; ++ if( ABS( p2 - p0 ) < beta ) { ++ pix[-2*stride] = p1 + clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] ); ++ tc++; ++ }*/ ++ ++ p2 = LD32(pix - 3*stride); ++ mask2 = PABS_DIFF_LESS_THAN(p2, p0, beta) & ~mask; ++ ++ if ( mask2 ){ ++ mask2 = SET_ALL_BITS_IN_ZERO_BYTES(mask2); ++ asm ("pavg.ub\t%[tmp], %[p0], %[q0]\n" ++ "paddh.ub\t%[tmp], %[tmp], %[p2]\n" ++ "punpckub.h\t%[tmp2], %[tmp]:t\n" ++ "punpckub.h\t%[tmp], %[tmp]:b\n" ++ "punpckub.h\t%[tmp3], %[p1]:t\n" ++ "punpckub.h\t%[tmp4], %[p1]:b\n" ++ "psub.h\t%[tmp2], %[tmp2], %[tmp3]\n" ++ "psub.h\t%[tmp], %[tmp], %[tmp4]\n" ++ "pmin.sh\t%[tmp2], %[tmp2], %[tc0_p]\n" ++ "pmin.sh\t%[tmp], %[tmp], %[tc0_p]\n" ++ "pmax.sh\t%[tmp2], %[tmp2], %[tc0_m]\n" ++ "pmax.sh\t%[tmp], %[tmp], %[tc0_m]\n" ++ "padd.h\t%[tmp2], %[tmp2], %[tmp3]\n" ++ "padd.h\t%[tmp], %[tmp], %[tmp4]\n" ++ "packsh.ub\t%[tmp], %[tmp2], %[tmp]\n" ++ "andn\t%[tmp], %[mask2]\n" ++ "and\t%[tmp2], %[q1], %[mask2]\n" ++ "or\t%[tmp], %[tmp2]\n" ++ : [tmp]"=&r"(tmp), [tmp2]"=&r"(tmp2), [tmp3]"=&r"(tmp3), ++ [tmp4]"=&r"(tmp4) ++ : [q0]"r"(q0), [p2]"r"(p2), [p1]"r"(p1), [p0]"r"(p0), [q1]"r"(q1), [tc0_p]"r"(tc0_p), ++ [tc0_m]"r"(tc0_m), [mask2]"r"(mask2)); ++ ST32(pix - 2*stride, tmp); ++ tc += 0x00010001; ++ } ++ ++ ++ q2 = LD32(pix + 2*stride); ++ ++ /* ++ if( ABS( q2 - q0 ) < beta ) { ++ pix[ stride] = q1 + clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] ); ++ tc++; ++ } ++ */ ++ mask2 = PABS_DIFF_LESS_THAN(q2, q0, beta) & ~mask; ++ ++ if ( mask2 ){ ++ mask2 = SET_ALL_BITS_IN_ZERO_BYTES(mask2); ++ asm ("pavg.ub\t%[tmp], %[p0], %[q0]\n" ++ "paddh.ub\t%[tmp], %[tmp], %[q2]\n" ++ "punpckub.h\t%[tmp2], %[tmp]:t\n" ++ "punpckub.h\t%[tmp], %[tmp]:b\n" ++ "punpckub.h\t%[tmp3], %[q1]:t\n" ++ "punpckub.h\t%[tmp4], %[q1]:b\n" ++ "psub.h\t%[tmp2], %[tmp2], %[tmp3]\n" ++ "psub.h\t%[tmp], %[tmp], %[tmp4]\n" ++ "pmin.sh\t%[tmp2], %[tmp2], %[tc0_p]\n" ++ "pmin.sh\t%[tmp], %[tmp], %[tc0_p]\n" ++ "pmax.sh\t%[tmp2], %[tmp2], %[tc0_m]\n" ++ "pmax.sh\t%[tmp], %[tmp], %[tc0_m]\n" ++ "padd.h\t%[tmp2], %[tmp2], %[tmp3]\n" ++ "padd.h\t%[tmp], %[tmp], %[tmp4]\n" ++ "packsh.ub\t%[tmp], %[tmp2], %[tmp]\n" ++ "andn\t%[tmp], %[mask2]\n" ++ "and\t%[tmp2], %[q1], %[mask2]\n" ++ "or\t%[tmp], %[tmp2]\n" ++ : [tmp]"=&r"(tmp), [tmp2]"=&r"(tmp2), [tmp3]"=&r"(tmp3), ++ [tmp4]"=&r"(tmp4) ++ : [q0]"r"(q0), [q2]"r"(q2), [q1]"r"(q1), [p0]"r"(p0), [tc0_p]"r"(tc0_p), ++ [tc0_m]"r"(tc0_m), [mask2]"r"(mask2)); ++ ST32(pix + stride, tmp); ++ tc += 0x00010001; ++ } ++ ++ uint32_t old_p0 = p0; ++ uint32_t old_q0 = q0; ++ ++ /* i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); ++ pix[-stride] = clip_uint8( p0 + i_delta ); ++ pix[0] = clip_uint8( q0 - i_delta ); */ ++ ++ asm ( ++ /* Check if the two upper pixels should be filtered */ ++ "lsr\t%[tmp], %[inv_mask], 16\n" ++ "breq\t0f\n" ++ ++ "punpckub.h\t%[tmp], %[p1]:t\n" ++ "punpckub.h\t%[tmp2], %[q1]:t\n" ++ ++ /* p1 - q1 */ ++ "psub.h\t%[tmp], %[tmp], %[tmp2]\n" ++ ++ "punpckub.h\t%[tmp3], %[q0]:t\n" ++ "punpckub.h\t%[tmp4], %[p0]:t\n" ++ ++ /* q0 - p0 */ ++ "psub.h\t%[tmp2], %[tmp3], %[tmp4]\n" ++ ++ /* (q0 - p0) << 2 */ ++ "plsl.h\t%[tmp2], %[tmp2], 2\n" ++ ++ /* ((q0 - p0) << 2) + (p1 - q1) */ ++ "padd.h\t%[tmp2], %[tmp2], %[tmp]\n" ++ ++ "mov\t%[tmp], 0x00040004\n" ++ /* ((q0 - p0) << 2) + (p1 - q1) + 4*/ ++ "padd.h\t%[tmp2], %[tmp2], %[tmp]\n" ++ ++ /* (((q0 - p0) << 2) + (p1 - q1) + 4) >> 3*/ ++ "pasr.h\t%[tmp2], %[tmp2], 3\n" ++ ++ "mov\t%[tmp], 0\n" ++ "psub.h\t%[tmp], %[tmp], %[tc]\n" ++ ++ /* i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); */ ++ "pmin.sh\t%[tmp2], %[tmp2], %[tc]\n" ++ "pmax.sh\t%[tmp2], %[tmp2], %[tmp]\n" ++ ++ ++ /* pix[-stride] = clip_uint8( p0 + i_delta ); */ ++ "padd.h\t%[tmp4], %[tmp4], %[tmp2]\n" ++ ++ ++ /* pix[0] = clip_uint8( q0 - i_delta ); */ ++ "psub.h\t%[tmp3], %[tmp3], %[tmp2]\n" ++ ++ /* Check if the two lower pixels should be filtered */ ++ "lsl\t%[tmp2], %[inv_mask], 16\n" ++ "breq\t1f\n" ++ ++ "0:\n" ++ "punpckub.h\t%[p1], %[p1]:b\n" ++ "punpckub.h\t%[q1], %[q1]:b\n" ++ ++ /* p1 - q1 */ ++ "psub.h\t%[p1], %[p1], %[q1]\n" ++ ++ "punpckub.h\t%[q0], %[q0]:b\n" ++ "punpckub.h\t%[p0], %[p0]:b\n" ++ ++ /* q0 - p0 */ ++ "psub.h\t%[tmp2], %[q0], %[p0]\n" ++ ++ /* (q0 - p0) << 2 */ ++ "plsl.h\t%[tmp2], %[tmp2], 2\n" ++ ++ /* ((q0 - p0) << 2) + (p1 - q1) */ ++ "padd.h\t%[tmp2], %[tmp2], %[p1]\n" ++ ++ "mov\t%[q1], 0x00040004\n" ++ /* ((q0 - p0) << 2) + (p1 - q1) + 4*/ ++ "padd.h\t%[tmp2], %[tmp2], %[q1]\n" ++ ++ /* (((q0 - p0) << 2) + (p1 - q1) + 4) >> 3*/ ++ "pasr.h\t%[tmp2], %[tmp2], 3\n" ++ ++ /* i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); */ ++ "pmin.sh\t%[tmp2], %[tmp2], %[tc]\n" ++ "pmax.sh\t%[tmp2], %[tmp2], %[tmp]\n" ++ ++ /* pix[-stride] = clip_uint8( p0 + i_delta ); */ ++ "padd.h\t%[p0], %[p0], %[tmp2]\n" ++ ++ /* pix[0] = clip_uint8( q0 - i_delta ); */ ++ "psub.h\t%[q0], %[q0], %[tmp2]\n" ++ ++ "1:\n" ++ "packsh.ub\t%[p0], %[tmp4], %[p0]\n" ++ "packsh.ub\t%[q0], %[tmp3], %[tmp4]\n" ++ ++ : [tmp]"=&r"(tmp), [tmp2]"=&r"(tmp2), [tmp3]"=&r"(tmp3), ++ [tmp4]"=&r"(tmp4), [q0]"=&r"(q0), [q1]"=&r"(q1), [p0]"=&r"(p0), [p1]"=&r"(p1) ++ : [tc]"r"(tc), [inv_mask]"r"(~mask)); ++ ++ ST32(pix - stride, (mask & old_p0) | (p0 & ~mask)); ++ ST32(pix, (mask & old_q0) | (q0 & ~mask)); ++ ++ } ++ pix += 1; ++} ++ ++ ++ ++ ++#ifdef CHECK_DSP_FUNCS_AGAINST_C ++ ++void dump_block8(uint8_t *block, int line_size, int h){ ++ int i, j; ++ ++ for ( i = 0; i < h ; i++ ){ ++ av_log(NULL, AV_LOG_ERROR, "\t"); ++ for ( j = 0; j < 8 ; j++ ){ ++ av_log(NULL, AV_LOG_ERROR, "%d ", block[j + i*line_size]); ++ } ++ av_log(NULL, AV_LOG_ERROR, "\n"); ++ } ++} ++ ++void dump_block4(uint8_t *block, int line_size, int h){ ++ int i, j; ++ ++ for ( i = 0; i < h ; i++ ){ ++ av_log(NULL, AV_LOG_ERROR, "\t"); ++ for ( j = 0; j < 4 ; j++ ){ ++ av_log(NULL, AV_LOG_ERROR, "%d ", block[j + i*line_size]); ++ } ++ av_log(NULL, AV_LOG_ERROR, "\n"); ++ } ++} ++ ++void dump_block(uint8_t *block, int line_size, int h, int w){ ++ int i, j; ++ ++ for ( i = 0; i < h ; i++ ){ ++ av_log(NULL, AV_LOG_ERROR, "\t"); ++ for ( j = 0; j < w ; j++ ){ ++ av_log(NULL, AV_LOG_ERROR, "%d ", block[j + i*line_size]); ++ } ++ av_log(NULL, AV_LOG_ERROR, "\n"); ++ } ++} ++ ++void check_block8(uint8_t *test, uint8_t *correct, int line_size_test, int line_size_correct, ++ int h, char *name, int max_dev){ ++ int i,j; ++ for ( i = 0; i < 8 ; i++ ){ ++ for ( j = 0; j < h ; j++ ){ ++ int diff = test[i + line_size_test*j] - correct[i + line_size_correct*j]; ++ diff = diff < 0 ? -diff : diff; ++ if ( diff > max_dev ){ ++ av_log(NULL, AV_LOG_ERROR, "Error pixel x=%i, y=%i differs. Is 0x%x should be 0x%x\n", ++ i, j, test[i + line_size_test*j], correct[i + j*line_size_correct]); ++ av_log(NULL, AV_LOG_ERROR, "Error resulting block from %s is:\n", name); ++ dump_block8(test, line_size_test, h); ++ av_log(NULL, AV_LOG_ERROR, "But should be equal to:\n"); ++ dump_block8(correct, line_size_correct, h); ++ exit(1); ++ } ++ } ++ } ++} ++ ++void check_block4(uint8_t *test, uint8_t *correct, int line_size_test, int line_size_correct, ++ int h, char *name, int max_dev){ ++ int i,j; ++ for ( i = 0; i < 4 ; i++ ){ ++ for ( j = 0; j < h ; j++ ){ ++ int diff = test[i + line_size_test*j] - correct[i + line_size_correct*j]; ++ diff = diff < 0 ? -diff : diff; ++ if ( diff > max_dev ){ ++ av_log(NULL, AV_LOG_ERROR, "Error pixel x=%i, y=%i differs. Is 0x%x should be 0x%x\n", ++ i, j, test[i + line_size_test*j], correct[i + j*line_size_correct]); ++ av_log(NULL, AV_LOG_ERROR, "Error resulting block from %s is:\n", name); ++ dump_block8(test, line_size_test, h); ++ av_log(NULL, AV_LOG_ERROR, "But should be equal to:\n"); ++ dump_block4(correct, line_size_correct, h); ++ exit(1); ++ } ++ } ++ } ++} ++ ++void check_block(uint8_t *test, uint8_t *correct, int line_size_test, int line_size_correct, ++ int h, int width, char *name, int max_dev){ ++ int i,j; ++ for ( i = 0; i < width ; i++ ){ ++ for ( j = 0; j < h ; j++ ){ ++ int diff = test[i + line_size_test*j] - correct[i + line_size_correct*j]; ++ diff = diff < 0 ? -diff : diff; ++ if ( diff > max_dev ){ ++ av_log(NULL, AV_LOG_ERROR, "Error pixel x=%i, y=%i differs. Is 0x%x should be 0x%x\n", ++ i, j, test[i + line_size_test*j], correct[i + j*line_size_correct]); ++ av_log(NULL, AV_LOG_ERROR, "Error resulting block from %s is:\n", name); ++ dump_block(test, line_size_test, h, width); ++ av_log(NULL, AV_LOG_ERROR, "But should be equal to:\n"); ++ dump_block(correct, line_size_correct, h, width); ++ exit(1); ++ } ++ } ++ } ++} ++ ++void dump_dct_block(DCTELEM *block){ ++ int i, j; ++ ++ for ( i = 0; i < 8 ; i++ ){ ++ av_log(NULL, AV_LOG_ERROR, "\t"); ++ for ( j = 0; j < 8 ; j++ ){ ++ av_log(NULL, AV_LOG_ERROR, "0x%x ", block[j + i*8]); ++ } ++ av_log(NULL, AV_LOG_ERROR, "\n"); ++ } ++} ++ ++void test_idct_avr32(DCTELEM *block){ ++ DCTELEM testBlock[64]; ++ int i, j; ++ ++ /* Copy transposed block to testBlock */ ++ for ( i = 0; i < 8 ; i++ ){ ++ for ( j = 0; j < 8 ; j++ ){ ++ testBlock[i + 8*j] = block[j + i*8]; ++ } ++ } ++ ++ idct_avr32(block); ++ simple_idct(&testBlock); ++ ++ for ( i = 0; i < 64 ; i++ ){ ++ if ( block[i] != testBlock[i] ){ ++ av_log(NULL, AV_LOG_ERROR, "Error resulting block from idct is:\n"); ++ dump_dct_block(block); ++ av_log(NULL, AV_LOG_ERROR, "But should be equal to the transposed of:\n"); ++ dump_dct_block(testBlock); ++ exit(1); ++ } ++ } ++} ++ ++void test_idct_put_avr32(uint8_t *dest, int line_size, DCTELEM *block){ ++ uint8_t testBlock[64]; ++ DCTELEM blockCopy[64]; ++ int i, j; ++ ++ /* Copy transposed block to blockCopy */ ++ for ( i = 0; i < 8 ; i++ ){ ++ for ( j = 0; j < 8 ; j++ ){ ++ blockCopy[i + 8*j] = block[j + i*8]; ++ } ++ } ++ ++ idct_put_avr32(dest, line_size, block); ++ simple_idct_put(&testBlock, 8, blockCopy); ++ ++ check_block8(dest, testBlock, line_size, 8, 8, "idct_put", 1); ++} ++ ++ ++void test_idct_add_avr32(uint8_t *dest, int line_size, DCTELEM *block){ ++ uint8_t testBlock[64]; ++ DCTELEM blockCopy[64]; ++ int i, j; ++ ++ /* Copy dest to testBlock */ ++ for ( i = 0; i < 8 ; i++ ){ ++ for ( j = 0; j < 8 ; j++ ){ ++ testBlock[i + 8*j] = dest[i + j*line_size]; ++ } ++ } ++ ++ /* Copy transposed block to blockCopy */ ++ for ( i = 0; i < 8 ; i++ ){ ++ for ( j = 0; j < 8 ; j++ ){ ++ blockCopy[i + 8*j] = block[j + i*8]; ++ } ++ } ++ ++ idct_add_avr32(dest, line_size, block); ++ simple_idct_add(&testBlock, 8, blockCopy); ++ ++ check_block8(dest, testBlock, line_size, 8, 8, "idct_add", 1); ++} ++ ++void test_h264_idct_add_avr32(uint8_t *dest, DCTELEM *block, int stride){ ++ uint8_t testBlock[16]; ++ DCTELEM blockCopy[16]; ++ int i, j; ++ ++ /* Copy dest to testBlock */ ++ for ( i = 0; i < 4 ; i++ ){ ++ for ( j = 0; j < 4 ; j++ ){ ++ testBlock[i + 4*j] = dest[i + j*stride]; ++ } ++ } ++ ++ /* Copy transposed block to blockCopy */ ++ for ( i = 0; i < 16 ; i++ ){ ++ blockCopy[i] = block[i]; ++ } ++ ++ ff_h264_idct_add_c(dest, block, stride); ++ ++ h264_idct_add_avr32(testBlock, blockCopy, 4); ++ ++ check_block(dest, testBlock, stride, 4, 4, 4, "h264_idct_add", 0); ++} ++ ++void test_h264_idct8_add_avr32(uint8_t *dest, DCTELEM *block, int stride){ ++ uint8_t testBlock[8*8]; ++ DCTELEM blockCopy[8*8]; ++ int i, j; ++ ++ /* Copy dest to testBlock */ ++ for ( i = 0; i < 8 ; i++ ){ ++ for ( j = 0; j < 8 ; j++ ){ ++ testBlock[i + 8*j] = dest[i + j*stride]; ++ } ++ } ++ ++ /* Copy source block to blockCopy */ ++ for ( i = 0; i < 8*8 ; i++ ){ ++ blockCopy[i] = block[i]; ++ } ++ ++ ff_h264_idct8_add_c(dest, block, stride); ++ h264_idct8_add_avr32(testBlock, blockCopy, 8); ++ ++ check_block(dest, testBlock, stride, 8, 8, 8, "h264_idct8_add", 0); ++} ++ ++void test_put_pixels_funcs8(op_pixels_func test, op_pixels_func correct, uint8_t *block, ++ const uint8_t *pixels, int line_size, int h, char *name, int in_h_size, int in_v_size){ ++ uint8_t *testBlock, *testBlock2; ++ int i, j; ++ int input_v_size = h + in_v_size; ++ int input_h_size = 8 + in_h_size; ++ ++ testBlock = alloca(input_h_size*input_v_size); ++ testBlock2 = alloca(input_h_size*input_v_size); ++ ++ for ( i = 0; i < input_h_size ; i++ ){ ++ for ( j = 0; j < input_v_size ; j++ ){ ++ testBlock[i + input_h_size*j] = pixels[i + j*line_size]; ++ } ++ } ++ ++ test(block, pixels, line_size, h); ++ correct(testBlock2, testBlock, input_h_size, h); ++ ++ check_block8(block, testBlock2, line_size, input_h_size, h, name, 0); ++ ++} ++ ++void test_h264_chroma_mc_funcs(h264_chroma_mc_func test, h264_chroma_mc_func correct, uint8_t *dst, ++ uint8_t *src, int stride, int h, int w, int x, int y, char *name){ ++ uint8_t *testBlock, *testBlock2; ++ int i, j; ++ int input_v_size = h + 1; ++ int input_h_size = ((w + 1) + 3) & ~3; ++ ++ testBlock = alloca(input_h_size*input_v_size); ++ testBlock2 = alloca(input_h_size*input_v_size); ++ ++ for ( i = 0; i < w + 1 ; i++ ){ ++ for ( j = 0; j < h + 1 ; j++ ){ ++ testBlock[i + input_h_size*j] = src[i + j*stride]; ++ } ++ } ++ ++ for ( i = 0; i < w ; i++ ){ ++ for ( j = 0; j < h ; j++ ){ ++ testBlock2[i + input_h_size*j] = dst[i + j*stride]; ++ } ++ } ++ ++ test(dst, src, stride, h, x, y); ++ correct(testBlock2, testBlock, input_h_size, h, x, y); ++ ++ check_block(dst, testBlock2, stride, input_h_size, h, w, name, 0); ++ ++} ++ ++void test_qpel_mc_funcs(qpel_mc_func test, qpel_mc_func correct, uint8_t *dst, ++ uint8_t *src, int stride, int size, char *name){ ++ uint8_t *testBlock, *testBlock2; ++ int i, j; ++ int test_stride = size + 8; ++ ++ testBlock = alloca(test_stride*(size+8)) + 4 + test_stride*4; ++ testBlock2 = alloca(test_stride*size); ++ ++ for ( i = -4; i < size+4 ; i++ ){ ++ for ( j = -4; j < size+4 ; j++ ){ ++ testBlock[i + test_stride*j] = src[i + j*stride]; ++ } ++ } ++ ++ for ( i = 0; i < size ; i++ ){ ++ for ( j = 0; j < size ; j++ ){ ++ testBlock2[i + test_stride*j] = dst[i + j*stride]; ++ } ++ } ++ ++ correct(dst, src, stride); ++ test(testBlock2, testBlock, test_stride); ++ ++ check_block(testBlock2, dst, test_stride, stride, size, size, name, 0); ++ ++} ++ ++ ++#define test_pixels_funcs(PFX, NUM ) \ ++void test_ ## PFX ## _pixels ## NUM ## _avr32( uint8_t *block, const uint8_t *pixels, int line_size, int h){ \ ++ test_put_pixels_funcs8(PFX ## _pixels ## NUM ## _avr32, PFX ## _pixels ## NUM ## _c, \ ++ block, pixels, line_size, h, "test_" #PFX "_pixels", 0, 0); } \ ++void test_ ## PFX ## _pixels ## NUM ## _h_avr32( uint8_t *block, const uint8_t *pixels, int line_size, int h){ \ ++ test_put_pixels_funcs8(PFX ## _pixels ## NUM ## _h_avr32, PFX ## _pixels ## NUM ## _x2_c, \ ++ block, pixels, line_size, h, "test_" #PFX "_pixels_h", 1, 0); } \ ++void test_ ## PFX ## _pixels ## NUM ## _v_avr32( uint8_t *block, const uint8_t *pixels, int line_size, int h){ \ ++ test_put_pixels_funcs8(PFX ## _pixels ## NUM ## _v_avr32, PFX ## _pixels ## NUM ## _y2_c, \ ++ block, pixels, line_size, h, "test_" #PFX "_pixels_v", 0, 1); } \ ++void test_ ## PFX ## _pixels ## NUM ## _hv_avr32( uint8_t *block, const uint8_t *pixels, int line_size, int h){ \ ++ test_put_pixels_funcs8(PFX ## _pixels ## NUM ## _hv_avr32, PFX ## _pixels ## NUM ## _xy2_c, \ ++ block, pixels, line_size, h, "test_" #PFX "_pixels_hv", 1, 1); } ++ ++test_pixels_funcs(put, 8); ++test_pixels_funcs(put_no_rnd, 8); ++test_pixels_funcs(put, 16); ++test_pixels_funcs(put_no_rnd, 16); ++ ++test_pixels_funcs(avg, 8); ++test_pixels_funcs(avg_no_rnd, 8); ++test_pixels_funcs(avg, 16); ++test_pixels_funcs(avg_no_rnd, 16); ++ ++#define test_h264_chroma_mc_funcs(PFX, NUM ) \ ++void test_ ## PFX ## _h264_chroma_mc ## NUM ## _pico( uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){ \ ++ test_h264_chroma_mc_funcs(PFX ## _h264_chroma_mc ## NUM ## _pico, PFX ## _h264_chroma_mc ## NUM ## _c, \ ++ dst, src, stride, h, NUM, x, y, "test_" #PFX "_h264_chroma_mc" #NUM "_pico"); } \ ++ ++test_h264_chroma_mc_funcs(put, 2); ++test_h264_chroma_mc_funcs(put, 4); ++test_h264_chroma_mc_funcs(put, 8); ++test_h264_chroma_mc_funcs(avg, 2); ++test_h264_chroma_mc_funcs(avg, 4); ++test_h264_chroma_mc_funcs(avg, 8); ++ ++#define test_qpel_mc_funcs_type(PFX, NUM, TYPE ) \ ++void test_ ## PFX ## NUM ## _ ## TYPE ## _pico( uint8_t *dst, uint8_t *src, int stride){ \ ++ test_qpel_mc_funcs(PFX ## NUM ## _ ## TYPE ## _pico, PFX ## NUM ## _ ## TYPE ## _c, \ ++ dst, src, stride, NUM, "test_" #PFX #NUM "_" #TYPE "_pico"); } ++ ++#define test_qpel_mc_funcs(PFX, NUM) \ ++ test_qpel_mc_funcs_type(PFX, NUM, mc00);\ ++ test_qpel_mc_funcs_type(PFX, NUM, mc10);\ ++ test_qpel_mc_funcs_type(PFX, NUM, mc20);\ ++ test_qpel_mc_funcs_type(PFX, NUM, mc30);\ ++ test_qpel_mc_funcs_type(PFX, NUM, mc01);\ ++ test_qpel_mc_funcs_type(PFX, NUM, mc11);\ ++ test_qpel_mc_funcs_type(PFX, NUM, mc21);\ ++ test_qpel_mc_funcs_type(PFX, NUM, mc31);\ ++ test_qpel_mc_funcs_type(PFX, NUM, mc02);\ ++ test_qpel_mc_funcs_type(PFX, NUM, mc12);\ ++ test_qpel_mc_funcs_type(PFX, NUM, mc22);\ ++ test_qpel_mc_funcs_type(PFX, NUM, mc32);\ ++ test_qpel_mc_funcs_type(PFX, NUM, mc03);\ ++ test_qpel_mc_funcs_type(PFX, NUM, mc13);\ ++ test_qpel_mc_funcs_type(PFX, NUM, mc23);\ ++ test_qpel_mc_funcs_type(PFX, NUM, mc33) ++ ++test_qpel_mc_funcs(put_h264_qpel, 4); ++test_qpel_mc_funcs(put_h264_qpel, 8); ++test_qpel_mc_funcs(put_h264_qpel, 16); ++test_qpel_mc_funcs(avg_h264_qpel, 4); ++test_qpel_mc_funcs(avg_h264_qpel, 8); ++test_qpel_mc_funcs(avg_h264_qpel, 16); ++ ++ ++#define dspfunc(PFX, IDX, NUM) \ ++ c->PFX ## _pixels_tab[IDX][ 0] = DSP_FUNC_NAME( PFX ## NUM ## _mc00_pico ); \ ++ c->PFX ## _pixels_tab[IDX][ 1] = DSP_FUNC_NAME( PFX ## NUM ## _mc10_pico ); \ ++ c->PFX ## _pixels_tab[IDX][ 2] = DSP_FUNC_NAME( PFX ## NUM ## _mc20_pico ); \ ++ c->PFX ## _pixels_tab[IDX][ 3] = DSP_FUNC_NAME( PFX ## NUM ## _mc30_pico ); \ ++ c->PFX ## _pixels_tab[IDX][ 4] = DSP_FUNC_NAME( PFX ## NUM ## _mc01_pico ); \ ++ c->PFX ## _pixels_tab[IDX][ 5] = DSP_FUNC_NAME( PFX ## NUM ## _mc11_pico ); \ ++ c->PFX ## _pixels_tab[IDX][ 6] = DSP_FUNC_NAME( PFX ## NUM ## _mc21_pico ); \ ++ c->PFX ## _pixels_tab[IDX][ 7] = DSP_FUNC_NAME( PFX ## NUM ## _mc31_pico ); \ ++ c->PFX ## _pixels_tab[IDX][ 8] = DSP_FUNC_NAME( PFX ## NUM ## _mc02_pico ); \ ++ c->PFX ## _pixels_tab[IDX][ 9] = DSP_FUNC_NAME( PFX ## NUM ## _mc12_pico ); \ ++ c->PFX ## _pixels_tab[IDX][10] = DSP_FUNC_NAME( PFX ## NUM ## _mc22_pico ); \ ++ c->PFX ## _pixels_tab[IDX][11] = DSP_FUNC_NAME( PFX ## NUM ## _mc32_pico ); \ ++ c->PFX ## _pixels_tab[IDX][12] = DSP_FUNC_NAME( PFX ## NUM ## _mc03_pico ); \ ++ c->PFX ## _pixels_tab[IDX][13] = DSP_FUNC_NAME( PFX ## NUM ## _mc13_pico ); \ ++ c->PFX ## _pixels_tab[IDX][14] = DSP_FUNC_NAME( PFX ## NUM ## _mc23_pico ); \ ++ c->PFX ## _pixels_tab[IDX][15] = DSP_FUNC_NAME( PFX ## NUM ## _mc33_pico ) ++ ++#endif ++ ++void dsputil_init_avr32(DSPContext* c, AVCodecContext *avctx) ++{ ++ ++ /* H264 */ ++ ++ if ( 0 /*avr32_use_pico*/ ){ ++ c->put_h264_chroma_pixels_tab[0]= DSP_FUNC_NAME(put_h264_chroma_mc8_pico); ++ c->put_h264_chroma_pixels_tab[1]= DSP_FUNC_NAME(put_h264_chroma_mc4_pico); ++ c->put_h264_chroma_pixels_tab[2]= DSP_FUNC_NAME(put_h264_chroma_mc2_pico); ++ ++ c->avg_h264_chroma_pixels_tab[0]= DSP_FUNC_NAME(avg_h264_chroma_mc8_pico); ++ c->avg_h264_chroma_pixels_tab[1]= DSP_FUNC_NAME(avg_h264_chroma_mc4_pico); ++ c->avg_h264_chroma_pixels_tab[2]= DSP_FUNC_NAME(avg_h264_chroma_mc2_pico); ++ } ++ ++#define dspfunc(PFX, IDX, NUM) \ ++ c->PFX ## _pixels_tab[IDX][ 0] = DSP_FUNC_NAME( PFX ## NUM ## _mc00_pico ); \ ++ c->PFX ## _pixels_tab[IDX][ 1] = DSP_FUNC_NAME( PFX ## NUM ## _mc10_pico ); \ ++ c->PFX ## _pixels_tab[IDX][ 2] = DSP_FUNC_NAME( PFX ## NUM ## _mc20_pico ); \ ++ c->PFX ## _pixels_tab[IDX][ 3] = DSP_FUNC_NAME( PFX ## NUM ## _mc30_pico ); \ ++ c->PFX ## _pixels_tab[IDX][ 4] = DSP_FUNC_NAME( PFX ## NUM ## _mc01_pico ); \ ++ c->PFX ## _pixels_tab[IDX][ 5] = DSP_FUNC_NAME( PFX ## NUM ## _mc11_pico ); \ ++ c->PFX ## _pixels_tab[IDX][ 6] = DSP_FUNC_NAME( PFX ## NUM ## _mc21_pico ); \ ++ c->PFX ## _pixels_tab[IDX][ 7] = DSP_FUNC_NAME( PFX ## NUM ## _mc31_pico ); \ ++ c->PFX ## _pixels_tab[IDX][ 8] = DSP_FUNC_NAME( PFX ## NUM ## _mc02_pico ); \ ++ c->PFX ## _pixels_tab[IDX][ 9] = DSP_FUNC_NAME( PFX ## NUM ## _mc12_pico ); \ ++ c->PFX ## _pixels_tab[IDX][10] = DSP_FUNC_NAME( PFX ## NUM ## _mc22_pico ); \ ++ c->PFX ## _pixels_tab[IDX][11] = DSP_FUNC_NAME( PFX ## NUM ## _mc32_pico ); \ ++ c->PFX ## _pixels_tab[IDX][12] = DSP_FUNC_NAME( PFX ## NUM ## _mc03_pico ); \ ++ c->PFX ## _pixels_tab[IDX][13] = DSP_FUNC_NAME( PFX ## NUM ## _mc13_pico ); \ ++ c->PFX ## _pixels_tab[IDX][14] = DSP_FUNC_NAME( PFX ## NUM ## _mc23_pico ); \ ++ c->PFX ## _pixels_tab[IDX][15] = DSP_FUNC_NAME( PFX ## NUM ## _mc33_pico ) ++ ++ if ( avr32_use_pico ){ ++ dspfunc(put_h264_qpel, 0, 16); ++ dspfunc(put_h264_qpel, 1, 8); ++ dspfunc(put_h264_qpel, 2, 4); ++ dspfunc(avg_h264_qpel, 0, 16); ++ dspfunc(avg_h264_qpel, 1, 8); ++ dspfunc(avg_h264_qpel, 2, 4); ++ } ++ ++ c->idct_put= DSP_FUNC_NAME(idct_put_avr32); ++ c->idct_add= DSP_FUNC_NAME(idct_add_avr32); ++ c->idct = DSP_FUNC_NAME(idct_avr32); ++ c->h264_idct_add = DSP_FUNC_NAME(h264_idct_add_avr32); ++ c->h264_idct8_add = DSP_FUNC_NAME(h264_idct8_add_avr32); ++ ++ /*c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_avr32;*/ ++ ++ c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM; ++ ++ c->fdct = fdct_avr32; ++ ++ c->clear_blocks = clear_blocks_avr32; ++ ++#undef dspfunc ++#define dspfunc(PFX, IDX, NUM) \ ++ c->PFX ## _pixels_tab[IDX][0] = DSP_FUNC_NAME( PFX ## _pixels ## NUM ## _avr32 ); \ ++ c->PFX ## _pixels_tab[IDX][1] = DSP_FUNC_NAME( PFX ## _pixels ## NUM ## _h_avr32); \ ++ c->PFX ## _pixels_tab[IDX][2] = DSP_FUNC_NAME( PFX ## _pixels ## NUM ## _v_avr32); \ ++ c->PFX ## _pixels_tab[IDX][3] = DSP_FUNC_NAME( PFX ## _pixels ## NUM ## _hv_avr32) ++ ++ dspfunc(put, 0, 16); ++ dspfunc(put_no_rnd, 0, 16); ++ dspfunc(put, 1, 8); ++ dspfunc(put_no_rnd, 1, 8); ++ ++ dspfunc(avg, 1, 8); ++ dspfunc(avg_no_rnd, 1, 8); ++ dspfunc(avg, 0, 16); ++ dspfunc(avg_no_rnd, 0, 16); ++#undef dspfunc ++ ++} ++ ++ ++ ++#if 0 ++int main(int argc, char *argv[]){ ++ ++ ++} ++#endif ++ +diff --git a/libavcodec/avr32/fdct.S b/libavcodec/avr32/fdct.S +new file mode 100644 +index 0000000..be45b86 +--- /dev/null ++++ b/libavcodec/avr32/fdct.S +@@ -0,0 +1,541 @@ ++/* ++ * Copyright (c) 2007 Atmel Corporation. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. The name of ATMEL may not be used to endorse or promote products ++ * derived from this software without specific prior written ++ * permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY ATMEL ``AS IS'' AND ANY EXPRESS OR ++ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ++ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE EXPRESSLY AND SPECIFICALLY DISCLAIMED. IN NO EVENT SHALL ATMEL ++ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, ++ * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, ++ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR ++ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY ++ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE ++ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH ++ * DAMAGE. ++ */ ++ ++//********************************************************** ++//* 2-D fDCT, Based on: * ++//* C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical * ++//* Fast 1-D DCT Algorithms with 11 Multiplications", * ++//* Proc. Int'l. Conf. on Acoustics, Speech, and Signal * ++//* Processing 1989 (ICASSP '89), pp. 988-991. * ++//* * ++//* Fixed point implementation optimized for the AVR-II * ++//* instruction set. If a table is used for the * ++//* coeffisients we can load two and two of them from * ++//* This will give a reduction of ++//* * ++//* * ++//********************************************************** ++ ++ ++/* This routine is a slow-but-accurate integer implementation of the ++ * forward DCT (Discrete Cosine Transform). Taken from the IJG software ++ * ++ * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT ++ * on each column. Direct algorithms are also available, but they are ++ * much more complex and seem not to be any faster when reduced to code. ++ * ++ * This implementation is based on an algorithm described in ++ * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT ++ * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, ++ * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. ++ * The primary algorithm described there uses 11 multiplies and 29 adds. ++ * We use their alternate method with 12 multiplies and 32 adds. ++ * The advantage of this method is that no data path contains more than one ++ * multiplication; this allows a very simple and accurate implementation in ++ * scaled fixed-point arithmetic, with a minimal number of shifts. ++ * ++ * The poop on this scaling stuff is as follows: ++ * ++ * Each 1-D DCT step produces outputs which are a factor of sqrt(N) ++ * larger than the true DCT outputs. The final outputs are therefore ++ * a factor of N larger than desired; since N=8 this can be cured by ++ * a simple right shift at the end of the algorithm. The advantage of ++ * this arrangement is that we save two multiplications per 1-D DCT, ++ * because the y0 and y4 outputs need not be divided by sqrt(N). ++ * In the IJG code, this factor of 8 is removed by the quantization step ++ * (in jcdctmgr.c), here it is removed. ++ * ++ * We have to do addition and subtraction of the integer inputs, which ++ * is no problem, and multiplication by fractional constants, which is ++ * a problem to do in integer arithmetic. We multiply all the constants ++ * by CONST_SCALE and convert them to integer constants (thus retaining ++ * CONST_BITS bits of precision in the constants). After doing a ++ * multiplication we have to divide the product by CONST_SCALE, with proper ++ * rounding, to produce the correct output. This division can be done ++ * cheaply as a right shift of CONST_BITS bits. We postpone shifting ++ * as long as possible so that partial sums can be added together with ++ * full fractional precision. ++ * ++ * The outputs of the first pass are scaled up by PASS1_BITS bits so that ++ * they are represented to better-than-integral precision. These outputs ++ * require 8 + PASS1_BITS + 3 bits; this fits in a 16-bit word ++ * with the recommended scaling. (For 12-bit sample data, the intermediate ++ * array is INT32 anyway.) ++ * ++ * To avoid overflow of the 32-bit intermediate results in pass 2, we must ++ * have 8 + CONST_BITS + PASS1_BITS <= 26. Error analysis ++ * shows that the values given below are the most effective. ++ * ++ * We can gain a little more speed, with a further compromise in accuracy, ++ * by omitting the addition in a descaling shift. This yields an incorrectly ++ * rounded result half the time... ++ */ ++ ++ .global fdct_avr32 ++ ++ ++ ++#define CONST_BITS 13 ++#define PASS1_BITS 2 ++ ++#define FIX_0_298631336 2446 /* FIX(0.298631336) */ ++#define FIX_0_390180644 3196 /* FIX(0.390180644) */ ++#define FIX_0_541196100 4433 /* FIX(0.541196100) */ ++#define FIX_0_765366865 6270 /* FIX(0.765366865) */ ++#define FIX_0_899976223 7373 /* FIX(0.899976223) */ ++#define FIX_1_175875602 9633 /* FIX(1.175875602) */ ++#define FIX_1_501321110 12299 /* FIX(1.501321110) */ ++#define FIX_1_847759065 15137 /* FIX(1.847759065) */ ++#define FIX_1_961570560 16069 /* FIX(1.961570560) */ ++#define FIX_2_053119869 16819 /* FIX(2.053119869) */ ++#define FIX_2_562915447 20995 /* FIX(2.562915447) */ ++#define FIX_3_072711026 25172 /* FIX(3.072711026) */ ++ ++ ++/* ++ * Perform an integer forward DCT on one block of samples. ++ */ ++ ++//void ++//fdct_int32(short *const block) ++//{ ++// int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; ++// int tmp10, tmp11, tmp12, tmp13; ++// int z1, z2, z3, z4, z5; ++// short *blkptr; ++// int *dataptr; ++// int data[64]; ++// int i; ++// ++// /* Pass 1: process rows. */ ++// /* Note results are scaled up by sqrt(8) compared to a true DCT; */ ++// /* furthermore, we scale the results by 2**PASS1_BITS. */ ++// ++// dataptr = data; ++// blkptr = block; ++ ++ .text ++fdct_avr32: ++ pushm r0-r3, r4-r7, lr ++#define loop_ctr r0 ++#define blkptr r12 ++#define x0 r1 ++#define x1 r2 ++#define x2 r3 ++#define x3 r4 ++#define x4 r5 ++#define x5 r6 ++#define x6 r7 ++#define x7 r8 ++#define tmp0 r5 ++#define tmp7 r2 ++#define tmp1 r3 ++#define tmp6 r4 ++#define tmp2 r9 ++#define tmp5 r8 ++#define tmp3 r7 ++#define tmp4 r6 ++ ++ ++ mov loop_ctr, 8 ++// for (i = 0; i < 8; i++) { ++ROW_LOOP: ++ ++ ldm blkptr, r1, r2, r3, r4 ++ ++// tmp2 = blkptr[2] + blkptr[5]; ++// tmp3 = blkptr[3] + blkptr[4]; ++ paddx.h r5, r3, r2 ++// tmp5 = blkptr[2] - blkptr[5]; ++// tmp4 = blkptr[3] - blkptr[4]; ++ psubx.h r6, r3, r2 ++// tmp0 = blkptr[0] + blkptr[7]; ++// tmp1 = blkptr[1] + blkptr[6]; ++ paddx.h r2, r4, r1 ++// tmp7 = blkptr[0] - blkptr[7]; ++// tmp6 = blkptr[1] - blkptr[6]; ++ psubx.h r3, r4, r1 ++ ++// /* Even part per LL&M figure 1 --- note that published figure is faulty; ++// * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". ++// */ ++ ++#define tmp10 r1 ++#define tmp13 r5 ++#define tmp11 r7 ++#define tmp12 r3 ++#define z1 r9 ++ ++// tmp10 = tmp0 + tmp3; ++// tmp13 = tmp0 - tmp3; ++ paddsub.h r1, r2:t, r5:b ++// tmp11 = tmp1 + tmp2; ++// tmp12 = tmp1 - tmp2; ++ paddsub.h r4, r2:b, r5:t ++ ++ ++// dataptr[0] = (tmp10 + tmp11) << PASS1_BITS; ++// dataptr[4] = (tmp10 - tmp11) << PASS1_BITS; ++ paddsub.h r7, r1:t, r4:t ++ ld.w r10, pc[const_table - .] ++ plsl.h r7, r7, PASS1_BITS ++ ++// z1 = (tmp12 + tmp13) * FIX_0_541196100; ++ addhh.w r8, r4:b, r1:b ++ mulhh.w r8, r8:b, r10:t ++ ++// dataptr[2] = ++// DESCALE(z1 + tmp13 * FIX_0_765366865, CONST_BITS - PASS1_BITS); ++// dataptr[6] = ++// DESCALE(z1 + tmp12 * (-FIX_1_847759065), CONST_BITS - PASS1_BITS); ++ mulhh.w r9, r1:b, r10:b ++ ld.w r10, pc[const_table - . + 4] ++ add r1, r8, r9 ++ satrnds r1 >> (CONST_BITS - PASS1_BITS), 31 ++ ++ mulhh.w r9, r4:b, r10:t ++ add r4, r8, r9 ++ satrnds r4 >> (CONST_BITS - PASS1_BITS), 31 ++ ++ ++// /* Odd part per figure 8 --- note paper omits factor of sqrt(2). ++// * cK represents cos(K*pi/16). ++// * i0..i3 in the paper are tmp4..tmp7 here. ++// */ ++ ++#define z2 r5 ++#define z3 r6 ++#define z4 r7 ++#define z5 r8 ++ ++// z4 = tmp5 + tmp7; ++// z3 = tmp4 + tmp6; ++ padd.h r2, r6, r3 ++// z2 = tmp5 + tmp6; ++// z1 = tmp4 + tmp7; ++ paddx.h r5, r6, r3 ++ ++ lddpc r9, pc[const_table - . + 8] ++// z5 = (z3 + z4) * FIX_1_175875602; /* sqrt(2) * c3 */ ++ addhh.w r8, r2:t, r2:b ++ mulhh.w r8, r8:b, r10:b ++ lddpc r10, pc[const_table - . + 12] ++ ++ ++// tmp4 *= FIX_0_298631336; /* sqrt(2) * (-c1+c3+c5-c7) */ ++ mulhh.w r11, r6:b, r9:t ++ ++// tmp5 *= FIX_2_053119869; /* sqrt(2) * ( c1+c3-c5+c7) */ ++ mulhh.w r6, r6:t, r9:b ++ ++// tmp6 *= FIX_3_072711026; /* sqrt(2) * ( c1+c3+c5-c7) */ ++ lddpc r9, pc[const_table - . + 20] ++ mulhh.w lr, r3:b, r10:t ++ ++// tmp7 *= FIX_1_501321110; /* sqrt(2) * ( c1+c3-c5-c7) */ ++ mulhh.w r3, r3:t, r10:b ++ ++// z3 *= -FIX_1_961570560; /* sqrt(2) * (-c3-c5) */ ++ mulhh.w r10, r2:b, r9:t ++ ++// z4 *= -FIX_0_390180644; /* sqrt(2) * (c5-c3) */ ++ mulhh.w r2, r2:t, r9:b ++ lddpc r9, pc[const_table - . + 16] ++// z3 += z5; ++// z4 += z5; ++ add r10, r8 ++ add r2, r8 ++ ++// z1 *= -FIX_0_899976223; /* sqrt(2) * (c7-c3) */ ++ mulhh.w r8, r5:b, r9:t ++ ++// z2 *= -FIX_2_562915447; /* sqrt(2) * (-c1-c3) */ ++ mulhh.w r5, r5:t, r9:b ++ ++// dataptr[7] = DESCALE(tmp4 + z1 + z3, CONST_BITS - PASS1_BITS); ++ add r11, r8 ++ add r11, r10 ++ satrnds r11 >> (CONST_BITS - PASS1_BITS), 31 ++ ++// dataptr[5] = DESCALE(tmp5 + z2 + z4, CONST_BITS - PASS1_BITS); ++ add r6, r5 ++ ++ sthh.w blkptr[6*2], r4:b, r11:b ++ add r6, r2 ++ satrnds r6 >> (CONST_BITS - PASS1_BITS), 31 ++ ++// dataptr[3] = DESCALE(tmp6 + z2 + z3, CONST_BITS - PASS1_BITS); ++ add lr, r5 ++ sthh.w blkptr[4*2], r7:b, r6:b ++ add lr, r10 ++ satrnds lr >> (CONST_BITS - PASS1_BITS), 31 ++ ++// dataptr[1] = DESCALE(tmp7 + z1 + z4, CONST_BITS - PASS1_BITS); ++ add r3, r8 ++ sthh.w blkptr[2*2], r1:b, lr:b ++ add r3, r2 ++ satrnds r3 >> (CONST_BITS - PASS1_BITS), 31 ++ ++ ++ ++// dataptr += 8; /* advance pointer to next row */ ++// blkptr += 8; ++ sthh.w blkptr[0], r7:t, r3:b ++ sub blkptr, -16 ++ sub loop_ctr, 1 ++ brne ROW_LOOP ++ ++// } ++ ++ /* Pass 2: process columns. ++ * We remove the PASS1_BITS scaling, but leave the results scaled up ++ * by an overall factor of 8. ++ */ ++ ++// dataptr = data; ++ sub blkptr, 128 ++ ++ mov loop_ctr, 4 ++// for (i = 0; i < 8; i++) { ++COLOUMN_LOOP: ++ ld.w r1, blkptr[0] ++ ld.w r2, blkptr[1*8*2] ++ ld.w r3, blkptr[2*8*2] ++ ld.w r4, blkptr[3*8*2] ++ ld.w r5, blkptr[4*8*2] ++ ld.w r6, blkptr[5*8*2] ++ ld.w r7, blkptr[6*8*2] ++ ld.w r8, blkptr[7*8*2] ++ ++// tmp0 = blkptr[0] + blkptr[7*8]; ++ padds.sh r9, r1, r8 ++// tmp7 = blkptr[0] - blkptr[7*8]; ++ psubs.sh r1, r1, r8 ++// tmp1 = blkptr[1*8] + blkptr[6*8]; ++ padds.sh r8, r2, r7 ++// tmp6 = blkptr[1*8] - blkptr[6*8]; ++ psubs.sh r2, r2, r7 ++// tmp2 = blkptr[2*8] + blkptr[5*8]; ++ padds.sh r7, r3, r6 ++// tmp5 = blkptr[2*8] - blkptr[5*8]; ++ psubs.sh r3, r3, r6 ++// tmp3 = blkptr[3*8] + blkptr[4*8]; ++ padds.sh r6, r4, r5 ++// tmp4 = blkptr[3*8] - blkptr[4*8]; ++ psubs.sh r4, r4, r5 ++ ++// /* even part per ll&m figure 1 --- note that published figure is faulty; ++// * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". ++// */ ++// ++// tmp10 = tmp0 + tmp3; ++ padds.sh r5, r9, r6 ++// tmp13 = tmp0 - tmp3; ++ psubs.sh r9, r9, r6 ++// tmp11 = tmp1 + tmp2; ++ padds.sh r6, r8, r7 ++// tmp12 = tmp1 - tmp2; ++ psubs.sh r8, r8, r7 ++ ++// dataptr[0] = DESCALE(tmp10 + tmp11, PASS1_BITS); ++// dataptr[32] = DESCALE(tmp10 - tmp11, PASS1_BITS); ++//Might get an overflow here ++ padds.sh r7, r5, r6 ++ psubs.sh r5, r5, r6 ++ ++ //Rounding ++ mov lr, (1 << (PASS1_BITS + 2)) ++ orh lr, hi(1 << (16 + PASS1_BITS + 2)) ++ padds.sh r7, r7, lr ++ padds.sh r5, r5, lr ++ ++ pasr.h r7, r7, PASS1_BITS + 3 ++ pasr.h r5, r5, PASS1_BITS + 3 ++ st.w r12[0], r7 ++ st.w r12[4*8*2], r5 ++ ++ lddpc r10, const_table2 ++ ++ ++// z1 = (tmp12 + tmp13) * FIX_0_541196100; ++ padds.sh r5, r8, r9 ++ mulhh.w r6, r5:t, r10:t ++ mulhh.w r7, r5:b, r10:t ++ ++// dataptr[16] = ++// DESCALE(z1 + tmp13 * FIX_0_765366865, CONST_BITS + PASS1_BITS); ++ lddpc r11, const_table2 + 4 ++ mulhh.w lr, r9:t, r10:b ++ mulhh.w r9, r9:b, r10:b ++ add lr, r6 ++ add r9, r7 ++ satrnds lr >> (CONST_BITS + PASS1_BITS + 3), 31 ++ satrnds r9 >> (CONST_BITS + PASS1_BITS + 3), 31 ++ sthh.w r12[2*8*2], lr:b, r9:b ++ ++// dataptr[48] = ++// DESCALE(z1 + tmp12 * (-FIX_1_847759065), CONST_BITS + PASS1_BITS); ++ mulhh.w lr, r8:t, r11:t ++ mulhh.w r8, r8:b, r11:t ++ add lr, r6 ++ add r8, r7 ++ satrnds lr >> (CONST_BITS + PASS1_BITS + 3), 31 ++ satrnds r8 >> (CONST_BITS + PASS1_BITS + 3), 31 ++ sthh.w r12[6*8*2], lr:b, r8:b ++ ++// /* Odd part per figure 8 --- note paper omits factor of sqrt(2). ++// * cK represents cos(K*pi/16). ++// * i0..i3 in the paper are tmp4..tmp7 here. ++// */ ++// ++// z2 = tmp5 + tmp6; ++// z3 = tmp4 + tmp6; ++// z4 = tmp5 + tmp7; ++ padds.sh r5, r3, r2 ++ padds.sh r6, r4, r2 ++ padds.sh r7, r3, r1 ++ ++// z5 = (z3 + z4) * FIX_1_175875602; /* sqrt(2) * c3 */ ++ padds.sh r8, r6, r7 ++ mulhh.w r9, r8:t, r11:b ++ mulhh.w r8, r8:b, r11:b ++ ++// z3 *= -FIX_1_961570560; /* sqrt(2) * (-c3-c5) */ ++// z3 += z5; ++ lddpc r11, const_table2 + 8 ++ mulhh.w r10, r6:t, r11:t ++ mulhh.w r6, r6:b, r11:t ++ add r10, r9 ++ add r6, r8 ++ ++// z4 *= -FIX_0_390180644; /* sqrt(2) * (c5-c3) */ ++// z4 += z5; ++ mulhh.w lr, r7:t, r11:b ++ mulhh.w r7, r7:b, r11:b ++ lddpc r11, const_table2 + 12 ++ st.w --sp,r0 ++ add lr, r9 ++ add r7, r8 ++ ++// tmp6 *= FIX_3_072711026; /* sqrt(2) * ( c1+c3+c5-c7) */ ++ mulhh.w r0, r2:t, r11:t ++ machh.w r0, r5:t, r11:b ++ mulhh.w r2, r2:b, r11:t ++ machh.w r2, r5:b, r11:b ++ ++// z2 *= -FIX_2_562915447; /* sqrt(2) * (-c1-c3) */ ++// dataptr[24] = DESCALE(tmp6 + z2 + z3, CONST_BITS + PASS1_BITS); ++ add r0, r10 ++ lddpc r11, const_table2 + 16 ++ add r2, r6 ++ satrnds r0 >> (CONST_BITS + PASS1_BITS + 3), 31 ++ satrnds r2 >> (CONST_BITS + PASS1_BITS + 3), 31 ++ sthh.w r12[3*8*2], r0:b, r2:b ++// tmp5 *= FIX_2_053119869; /* sqrt(2) * ( c1+c3-c5+c7) */ ++ mulhh.w r0, r3:t, r11:t ++ machh.w r0, r5:t, r11:b ++ mulhh.w r2, r3:b, r11:t ++ machh.w r2, r5:b, r11:b ++ add r0, lr ++ lddpc r11, const_table2 + 20 ++ add r2, r7 ++ ++// dataptr[40] = DESCALE(tmp5 + z2 + z4, CONST_BITS + PASS1_BITS); ++ satrnds r0 >> (CONST_BITS + PASS1_BITS + 3), 31 ++ satrnds r2 >> (CONST_BITS + PASS1_BITS + 3), 31 ++ sthh.w r12[5*8*2], r0:b, r2:b ++ ++ ++// z1 = tmp4 + tmp7; ++ padds.sh r2, r4, r1 ++ ++// tmp4 *= FIX_0_298631336; /* sqrt(2) * (-c1+c3+c5-c7) */ ++ mulhh.w r3, r4:t, r11:t ++ machh.w r3, r2:t, r11:b ++ mulhh.w r4, r4:b, r11:t ++ machh.w r4, r2:b, r11:b ++ add r3, r10 ++ lddpc r11, const_table2 + 24 ++ add r4, r6 ++ ++// z1 *= -FIX_0_899976223; /* sqrt(2) * (c7-c3) */ ++// dataptr[56] = DESCALE(tmp4 + z1 + z3, CONST_BITS + PASS1_BITS); ++ satrnds r3 >> (CONST_BITS + PASS1_BITS + 3), 31 ++ satrnds r4 >> (CONST_BITS + PASS1_BITS + 3), 31 ++ sthh.w r12[7*8*2], r3:b, r4:b ++ ++ ++// tmp7 *= FIX_1_501321110; /* sqrt(2) * ( c1+c3-c5-c7) */ ++ mulhh.w r3, r1:t, r11:t ++ machh.w r3, r2:t, r11:b ++ mulhh.w r4, r1:b, r11:t ++ machh.w r4, r2:b, r11:b ++ add r3, lr ++ add r4, r7 ++ ++// dataptr[8] = DESCALE(tmp7 + z1 + z4, CONST_BITS + PASS1_BITS); ++ satrnds r3 >> (CONST_BITS + PASS1_BITS + 3), 31 ++ satrnds r4 >> (CONST_BITS + PASS1_BITS + 3), 31 ++ sthh.w r12[1*8*2], r3:b, r4:b ++ ld.w r0, sp++ ++ ++// dataptr++; /* advance pointer to next column */ ++ sub blkptr, -4 ++ sub loop_ctr, 1 ++ brne COLOUMN_LOOP ++ ++// } ++ ++ popm r0-r3, r4-r7, pc ++ ++// /* descale */ ++// for (i = 0; i < 64; i++) ++// block[i] = (short int) DESCALE(data[i], 3); ++ ++ ++//} ++ ++ ++ .align 2 ++const_table: .short FIX_0_541196100, FIX_0_765366865, -FIX_1_847759065, FIX_1_175875602 ++ .short FIX_0_298631336, FIX_2_053119869, FIX_3_072711026, FIX_1_501321110 ++ .short -FIX_0_899976223,-FIX_2_562915447, -FIX_1_961570560, -FIX_0_390180644 ++ ++const_table2: .short FIX_0_541196100, FIX_0_765366865, -FIX_1_847759065, FIX_1_175875602 ++ .short -FIX_1_961570560, -FIX_0_390180644, FIX_3_072711026, -FIX_2_562915447 ++ .short FIX_2_053119869, -FIX_2_562915447, FIX_0_298631336, -FIX_0_899976223 ++ .short FIX_1_501321110, -FIX_0_899976223 ++ ++ ++ ++ +diff --git a/libavcodec/avr32/h264idct.S b/libavcodec/avr32/h264idct.S +new file mode 100644 +index 0000000..4b23e2d +--- /dev/null ++++ b/libavcodec/avr32/h264idct.S +@@ -0,0 +1,451 @@ ++/* ++ * Copyright (c) 2007 Atmel Corporation. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. The name of ATMEL may not be used to endorse or promote products ++ * derived from this software without specific prior written ++ * permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY ATMEL ``AS IS'' AND ANY EXPRESS OR ++ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ++ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE EXPRESSLY AND SPECIFICALLY DISCLAIMED. IN NO EVENT SHALL ATMEL ++ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, ++ * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, ++ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR ++ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY ++ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE ++ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH ++ * DAMAGE. ++ */ ++ ++ .global h264_idct_add_avr32 ++ ++ /* Macro for performing the 1-D transform on one row line. ++ ++ The register 'w01' should contain the first two pixels, ++ and the register 'w23' should contain the last two pixels ++ in the line. The resulting line is placed in p01 and p23 ++ so that { w01, w23 } = { x0, x1, x3, x2 }. ++ 'tmp' and 'tmp2' should be scratchpad registers. */ ++ .macro transform_row w01, w23, tmp, tmp2 ++ add \tmp, \w23, \w01 << 1 /* tmp = { xxxx, 2*w1 + w3 } */ ++ sub \tmp2, \w01, \w23 << 1 /* tmp2 = { xxxx, w1 - 2*w3 } */ ++ bfins \tmp2, \tmp, 16, 16 /* tmp2 = { 2*w1 + w3, w1 - 2*w3 } */ ++ pasr.h \tmp2, \tmp2, 1 /* tmp2 = { w1 + w3/2, w1/2 - w3 } */ ++ paddsub.h \tmp, \w01:t, \w23:t /* tmp = { w0 + w2, w0 - w2 } */ ++ padd.h \w01, \tmp, \tmp2 /* w01 = { w0 + w2 + w1 + w3/2, w0 - w2 + w1/2 - w3 } */ ++ psub.h \w23, \tmp, \tmp2 /* w23 = { w0 + w2 - w1 - w3/2, w0 - w2 - w1/2 + w3 } */ ++ .endm ++ ++ /* Macro for performing the 1-D transform on two columns. ++ ++ The registers w0, w1, w2, w3 should each contain two ++ packed samples from the two colomns to transform. ++ tmp and tmp2 are scratchpad registers. ++ ++ The resulting transformed columns are placed in the ++ same positions as the input columns. ++ */ ++ .macro transform_2columns w0, w1, w2, w3, tmp, tmp2 ++ padd.h \tmp, \w0, \w2 /* tmp = z0 = w0 + w2 */ ++ psub.h \w0, \w0, \w2 /* w0 = z1 = w0 - w2 */ ++ pasr.h \w2, \w1, 1 /* w2 = w1/2 */ ++ pasr.h \tmp2, \w3, 1 /* tmp2 = w3/2 */ ++ psub.h \w3, \w2, \w3 /* w3 = z2 = w1/2 - w3 */ ++ padd.h \tmp2, \w1, \tmp2/* tmp2 = z3 = w1 + w3/2 */ ++ padd.h \w1, \w0, \w3 /* w1 = x1 = z1 + z2 */ ++ psub.h \w2, \w0, \w3 /* w2 = x2 = z1 - z2 */ ++ padd.h \w0, \tmp, \tmp2/* w0 = x0 = z0 + z3 */ ++ psub.h \w3, \tmp, \tmp2/* w3 = x3 = z0 - z3 */ ++ /* Scale down result. */ ++ pasr.h \w0, \w0, 6 ++ pasr.h \w1, \w1, 6 ++ pasr.h \w2, \w2, 6 ++ pasr.h \w3, \w3, 6 ++ .endm ++ ++/*void h264_idct_add_avr32(uint8_t *dst, DCTELEM *block, int stride)*/ ++ ++h264_idct_add_avr32: ++ ++ stm --sp,r0-r3,r4-r7, lr ++ ++ /* Setup rounding factor. */ ++ mov r0, (1 << 5) ++ lsl r0, 16 ++ ++ /* Load block */ ++ ldm r11,r2-r9 ++ /* r9 = { w00, w01 }, ++ r8 = { w02, w03 }, ++ r7 = { w10, w11 }, ++ r6 = { w12, w13 }, ++ r5 = { w20, w21 }, ++ r4 = { w22, w23 }, ++ r3 = { w30, w31 }, ++ r2 = { w32, w33 } */ ++ ++ ++ /* Add the rounding factor to w00. */ ++ add r9, r0 ++ ++ /* Transform rows */ ++ transform_row r9, r8, r0, r1 ++ transform_row r7, r6, r0, r1 ++ transform_row r5, r4, r0, r1 ++ transform_row r3, r2, r0, r1 ++ ++ /* Transform columns */ ++ transform_2columns r9, r7, r5, r3, r0, r1 ++ transform_2columns r8, r6, r4, r2, r0, r1 ++ ++ /* Load predicted pixels.*/ ++ ld.w lr, r12[0] ++ ld.w r11, r12[r10] ++ ++ /* Unpack to halwords. */ ++ punpckub.h r0, lr:t ++ punpckub.h r1, lr:b ++ ++ /* Add with transformed row. */ ++ padd.h r0, r0, r9 ++ paddx.h r1, r1, r8 ++ /* Pack and saturate back to 8-bit pixels. */ ++ packsh.ub r0, r0, r1 ++ ++ /* Unpack to halwords. */ ++ punpckub.h lr, r11:t ++ punpckub.h r11, r11:b ++ ++ /* Add with transformed row. */ ++ padd.h lr, lr, r7 ++ paddx.h r11, r11, r6 ++ /* Pack and saturate back to 8-bit pixels. */ ++ packsh.ub r1, lr, r11 ++ ++ /* Store back to frame. */ ++ st.w r12[0], r0 ++ st.w r12[r10], r1 ++ ++ add r12, r12, r10 << 1 ++ ++ /* Load predicted pixels.*/ ++ ld.w lr, r12[0] ++ ld.w r11, r12[r10] ++ ++ /* Unpack to halwords. */ ++ punpckub.h r0, lr:t ++ punpckub.h r1, lr:b ++ ++ /* Add with transformed row. */ ++ padd.h r0, r0, r5 ++ paddx.h r1, r1, r4 ++ /* Pack and saturate back to 8-bit pixels. */ ++ packsh.ub r0, r0, r1 ++ ++ /* Unpack to halwords. */ ++ punpckub.h lr, r11:t ++ punpckub.h r11, r11:b ++ ++ /* Add with transformed row. */ ++ padd.h lr, lr, r3 ++ paddx.h r11, r11, r2 ++ /* Pack and saturate back to 8-bit pixels. */ ++ packsh.ub r1, lr, r11 ++ ++ /* Store back to frame. */ ++ st.w r12[0], r0 ++ st.w r12[r10], r1 ++ ++ ldm sp++,r0-r3,r4-r7, pc ++ ++ ++ .global h264_idct8_add_avr32 ++//void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride){ ++ ++h264_idct8_add_avr32: ++ stm --sp,r0-r3,r4-r7, lr ++ ++ /* Push dst and stride on stack */ ++ stm --sp,r10,r12 ++ ++// int i; ++// DCTELEM (*src)[8] = (DCTELEM(*)[8])block; ++// uint8_t *cm = cropTbl + MAX_NEG_CROP; ++ ++// block[0] += 32; ++ ++ ++// for( i = 0; i < 8; i++ ) ++// { ++ mov lr, 4 ++0: ++ ld.w r7, r11[0*(8*2)] ++ ld.w r6, r11[1*(8*2)] ++ ld.w r5, r11[2*(8*2)] ++ ld.w r4, r11[3*(8*2)] ++ ld.w r3, r11[4*(8*2)] ++ ld.w r2, r11[5*(8*2)] ++ ld.w r1, r11[6*(8*2)] ++ ld.w r0, r11[7*(8*2)] ++ ++/* ++ ++ const int a0 = src[0][i] + src[4][i]; ++ const int a2 = src[0][i] - src[4][i]; ++ const int a4 = (src[2][i]>>1) - src[6][i]; ++ const int a6 = (src[6][i]>>1) + src[2][i]; ++*/ ++ padd.h r8, r7, r3 /* r8 = a0 */ ++ psub.h r7, r7, r3 /* r7 = a2 */ ++ pasr.h r3, r5, 1 /* r3 = src[2][i] >> 1 */ ++ pasr.h r9, r1, 1 /* r9 = src[6][i] >> 1 */ ++ psub.h r3, r3, r1 /* r3 = a4 */ ++ padd.h r9, r9, r5 /* r9 = a6 */ ++ ++/* ++ const int b0 = a0 + a6; ++ const int b2 = a2 + a4; ++ const int b4 = a2 - a4; ++ const int b6 = a0 - a6; ++*/ ++ padd.h r1, r8, r9 /* r1 = b0 */ ++ psub.h r8, r8, r9 /* r8 = b6 */ ++ padd.h r5, r7, r3 /* r5 = b2 */ ++ psub.h r7, r7, r3 /* r7 = b4 */ ++ ++/* ++ const int a1 = -src[3][i] + src[5][i] - src[7][i] - (src[7][i]>>1); ++ const int a3 = src[1][i] + src[7][i] - src[3][i] - (src[3][i]>>1); ++ const int a5 = -src[1][i] + src[7][i] + src[5][i] + (src[5][i]>>1); ++ const int a7 = src[3][i] + src[5][i] + src[1][i] + (src[1][i]>>1); ++*/ ++ pasr.h r3, r0, 1 ++ padd.h r3, r3, r0 ++ psub.h r3, r2, r3 ++ psub.h r3, r3, r4 /* r3 = a1 */ ++ ++ pasr.h r9, r4, 1 ++ padd.h r9, r9, r4 ++ psub.h r9, r0, r9 ++ padd.h r9, r6, r9 /* r9 = a3 */ ++ ++ pasr.h r10, r2, 1 ++ padd.h r10, r10, r2 ++ padd.h r10, r10, r0 ++ psub.h r10, r10, r6 /* r10 = a5 */ ++ ++ pasr.h r0, r6, 1 ++ padd.h r0, r0, r6 ++ padd.h r0, r0, r2 ++ padd.h r0, r0, r4 /* r0 = a7 */ ++/* ++ const int b1 = (a7>>2) + a1; ++ const int b3 = a3 + (a5>>2); ++ const int b5 = (a3>>2) - a5; ++ const int b7 = a7 - (a1>>2); ++*/ ++ pasr.h r2, r0, 2 ++ padd.h r2, r2, r3 /* r2 = b1 */ ++ pasr.h r3, r3, 2 ++ psub.h r3, r0, r3 /* r3 = b7 */ ++ ++ pasr.h r0, r10, 2 ++ padd.h r0, r0, r9 /* r0 = b3 */ ++ pasr.h r9, r9, 2 ++ psub.h r9, r9, r10 /* r9 = b5 */ ++ ++ ++/* ++ src[0][i] = b0 + b7; ++ src[7][i] = b0 - b7; ++ src[1][i] = b2 + b5; ++ src[6][i] = b2 - b5; ++ src[2][i] = b4 + b3; ++ src[5][i] = b4 - b3; ++ src[3][i] = b6 + b1; ++ src[4][i] = b6 - b1; */ ++ ++ padd.h r4, r1, r3 ++ psub.h r1, r1, r3 ++ st.w r11[0*(8*2)], r4 ++ st.w r11[7*(8*2)], r1 ++ ++ padd.h r3, r5, r9 ++ psub.h r5, r5, r9 ++ st.w r11[1*(8*2)], r3 ++ st.w r11[6*(8*2)], r5 ++ ++ padd.h r9, r7, r0 ++ psub.h r7, r7, r0 ++ st.w r11[2*(8*2)], r9 ++ st.w r11[5*(8*2)], r7 ++ ++ padd.h r0, r8, r2 ++ psub.h r8, r8, r2 ++ st.w r11[3*(8*2)], r0 ++ st.w r11[4*(8*2)], r8 ++ ++ sub r11, -4 ++ sub lr, 1 ++ brne 0b ++ ++// } ++ ++ lddsp r12, sp[0] /* r12 = dst */ ++ sub r11, 4*4 ++ ldm r11++, r4-r7 ++ mov lr, 8 ++ /* Push dst and stride on stack */ ++ ++1: ++// for( i = 0; i < 8; i++ ) ++// { ++ ++ /* r7 = {src[i][0], src[i][1]} ++ r6 = {src[i][2], src[i][3]} ++ r5 = {src[i][4], src[i][5]} ++ r4 = {src[i][6], src[i][7]} */ ++ ++/* ++ const int a0 = src[i][0] + src[i][4]; ++ const int a2 = src[i][0] - src[i][4]; ++ const int a4 = (src[i][2]>>1) - src[i][6]; ++ const int a6 = (src[i][6]>>1) + src[i][2]; ++*/ ++ pasr.h r8, r6, 1 ++ pasr.h r9, r4, 1 ++ addhh.w r0, r7:t, r5:t /* r0 = a0 */ ++ subhh.w r1, r7:t, r5:t /* r1 = a2 */ ++ subhh.w r2, r8:t, r4:t /* r2 = a4 */ ++ addhh.w r3, r9:t, r6:t /* r3 = a6 */ ++ ++/* ++ const int b0 = a0 + a6; ++ const int b2 = a2 + a4; ++ const int b4 = a2 - a4; ++ const int b6 = a0 - a6; ++*/ ++ add r10, r0, r3 /* r10 = b0 */ ++ sub r0, r3 /* r0 = b6 */ ++ add r3, r1, r2 /* r3 = b2 */ ++ sub r1, r2 /* r1 = b4 */ ++/* ++ ++ ++ const int a7 = src[i][5] + src[i][3] + src[i][1] + (src[i][1]>>1); ++ const int a1 = src[i][5] - src[i][3] - src[i][7] - (src[i][7]>>1); ++ const int a3 = src[i][7] + src[i][1] - src[i][3] - (src[i][3]>>1); ++ const int a5 = src[i][7] - src[i][1] + src[i][5] + (src[i][5]>>1); */ ++ addhh.w r8, r8:b, r6:b ++ addhh.w r2, r4:b, r7:b ++ sub r2, r8 /* r2 = a3 */ ++ ++ addhh.w r9, r9:b, r4:b ++ subhh.w r8, r5:b, r6:b ++ sub r8, r9 /* r8 = a1 */ ++ ++ pasr.h r9, r7, 1 ++ addhh.w r9, r9:b, r7:b ++ addhh.w r6, r5:b, r6:b ++ add r6, r9 /* r6 = a7 */ ++ ++ pasr.h r9, r5, 1 ++ addhh.w r9, r9:b, r5:b ++ subhh.w r5, r4:b, r7:b ++ add r5, r9 /* r5 = a5 */ ++ ++/* const int b1 = (a7>>2) + a1; ++ const int b3 = (a5>>2) + a3; ++ const int b5 = (a3>>2) - a5; ++ const int b7 = -(a1>>2) + a7 ; */ ++ asr r4, r6, 2 ++ add r4, r8 /* r4 = b1 */ ++ asr r8, 2 ++ rsub r8, r6 /* r8 = b7 */ ++ ++ asr r6, r5, 2 ++ add r6, r2 /* r6 = b3 */ ++ asr r2, 2 ++ sub r2, r5 /* r2 = b5 */ ++ ++/* ++ dst[i*stride + 0] = cm[ dst[i*stride + 0] + ((b0 + b7) >> 6) ]; ++ dst[i*stride + 1] = cm[ dst[i*stride + 1] + ((b2 + b5) >> 6) ]; ++ dst[i*stride + 2] = cm[ dst[i*stride + 2] + ((b4 + b3) >> 6) ]; ++ dst[i*stride + 3] = cm[ dst[i*stride + 3] + ((b6 + b1) >> 6) ]; ++ dst[i*stride + 4] = cm[ dst[i*stride + 4] + ((b6 - b1) >> 6) ]; ++ dst[i*stride + 5] = cm[ dst[i*stride + 5] + ((b4 - b3) >> 6) ]; ++ dst[i*stride + 6] = cm[ dst[i*stride + 6] + ((b2 - b5) >> 6) ]; ++ dst[i*stride + 7] = cm[ dst[i*stride + 7] + ((b0 - b7) >> 6) ]; ++*/ ++ add r5, r10, r8 ++ satrnds r5 >> 6, 0 /* r5 = (b0 + b7) >> 6 */ ++ sub r10, r8 ++ satrnds r10 >> 6, 0 /* r10 = (b0 - b7) >> 6 */ ++ add r8, r3, r2 ++ satrnds r8 >> 6, 0 /* r8 = (b2 + b5) >> 6 */ ++ sub r3, r2 ++ satrnds r3 >> 6, 0 /* r3 = (b2 - b5) >> 6 */ ++ ++ add r2, r1, r6 ++ satrnds r2 >> 6, 0 /* r2 = (b4 + b3) >> 6 */ ++ sub r1, r6 ++ satrnds r1 >> 6, 0 /* r1 = (b4 - b3) >> 6 */ ++ ++ add r6, r0, r4 ++ satrnds r6 >> 6, 0 /* r6 = (b6 + b1) >> 6 */ ++ sub r0, r4 ++ satrnds r0 >> 6, 0 /* r0 = (b6 - b1) >> 6 */ ++ ++ ld.w r4, r12[0] ++ ++ packw.sh r8, r5, r8 ++ packw.sh r7, r2, r6 ++ ld.w r9, r12[4] ++ packw.sh r6, r0, r1 ++ packw.sh r5, r3, r10 ++ ++ punpckub.h r10, r4:t ++ punpckub.h r4, r4:b ++ punpckub.h r3, r9:t ++ punpckub.h r9, r9:b ++ ++ padd.h r8, r8, r10 ++ padd.h r7, r7, r4 ++ padd.h r6, r6, r3 ++ padd.h r5, r5, r9 ++ ++ lddsp r10, sp[4] /* r10 = stride */ ++ packsh.ub r0, r8, r7 ++ packsh.ub r1, r6, r5 ++ ++ st.w r12[0], r0 ++ st.w r12[4], r1 ++ ++ ldm r11++, r4-r7 ++ add r12, r10 /* dst += stride */ ++ ++ sub lr, 1 ++ brne 1b ++ ++ sub sp, -8 ++ ldm sp++,r0-r3,r4-r7, pc ++ ++ ++ ++// } ++//} +diff --git a/libavcodec/avr32/idct.S b/libavcodec/avr32/idct.S +new file mode 100644 +index 0000000..e7551ec +--- /dev/null ++++ b/libavcodec/avr32/idct.S +@@ -0,0 +1,829 @@ ++/* ++ * Copyright (c) 2007 Atmel Corporation. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. The name of ATMEL may not be used to endorse or promote products ++ * derived from this software without specific prior written ++ * permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY ATMEL ``AS IS'' AND ANY EXPRESS OR ++ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ++ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE EXPRESSLY AND SPECIFICALLY DISCLAIMED. IN NO EVENT SHALL ATMEL ++ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, ++ * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, ++ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR ++ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY ++ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE ++ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH ++ * DAMAGE. ++ */ ++ ++ .global idct_add_avr32 ++ .global idct_put_avr32 ++ .global idct_avr32 ++ ++ ++#define CONST_BITS 13 ++#define PASS1_BITS 2 ++ ++#define ONE ((INT32) 1) ++ ++#define CONST_SCALE (ONE << CONST_BITS) ++ ++#define LINE_SIZE 32 ++ ++#define FIX_0_298631336 (2446) /* FIX(0.298631336) */ ++#define FIX_0_390180644 (3196) /* FIX(0.390180644) */ ++#define FIX_0_541196100 (4433) /* FIX(0.541196100) */ ++#define FIX_0_765366865 (6270) /* FIX(0.765366865) */ ++#define FIX_0_899976223 (7373) /* FIX(0.899976223) */ ++#define FIX_1_175875602 (9633) /* FIX(1.175875602) */ ++#define FIX_1_501321110 (12299)/* FIX(1.501321110) */ ++#define FIX_1_847759065 (15137)/* FIX(1.847759065) */ ++#define FIX_1_961570560 (16069)/* FIX(1.961570560) */ ++#define FIX_2_053119869 (16819)/* FIX(2.053119869) */ ++#define FIX_2_562915447 (20995)/* FIX(2.562915447) */ ++#define FIX_3_072711026 (25172)/* FIX(3.072711026) */ ++ ++ ++#define loop_cnt r11 ++ ++ .text ++ ++idct_add_avr32: ++ pushm r0-r3, r4-r7, lr //Free up registers to use for local variables ++ ++ // Give room for some variables on the stack ++ sub sp, 8 ++ stdsp SP[0], r12 // rfp ++ stdsp SP[4], r11 // iinc ++ ++ mov loop_cnt, 8 //Initialize loop counter ++ ++FOR_ROW: ++ ++ ldm r10, r0, r1, r2, r3 //Load 8 DCT-coeffisients from the current row in the DCT-block ++ mov r6, 0 ++#ifdef USE_PREFETCH ++ pref r10[LINE_SIZE] //Prefetch next line ++#endif ++ or r4, r2, r3 << 16 ++ or r4, r1 //Check if all DCT-coeffisients except the DC is zero ++ or r4, r0 ++ brne AC_ROW //If there are non-zero AC coeffisients perform row-transform ++ ++ paddsub.h r5, r3:t, r6:b //Extract the DC-coeff from r5 ++ plsl.h r5, r5, PASS1_BITS ++ mov r4, r5 ++ st.d r10++, r4 ++ st.d r10++, r4 ++ ++ sub loop_cnt, 1 //Decrement loop counter ++ brne FOR_ROW //Perform loop one more time if loop_cnt is not zero ++ ++ bral COLOUMN_TRANSFORM //Perform coloumn transform after row transform is computed ++ ++ ++AC_ROW: ++ ++ ++ ld.w r12, pc[coef_table - .] ++ ld.w r9, pc[coef_table - . + 4] ++ ++ padd.h r4, r2, r0 // r4:t = dataptr[2] + dataptr[6],r4:b = dataptr[3] + dataptr[7] ++ mulhh.w r5, r4:t, r12:t ++ mulhh.w r6, r0:t, r12:b ++ ld.w r12, pc[coef_table - . + 8] ++ mulhh.w r7, r2:t, r9:t ++ add r6, r5 // tmp2 ++ satrnds r6 >> (CONST_BITS - PASS1_BITS), 31 ++ add r7, r5 // tmp3 ++ satrnds r7 >> (CONST_BITS - PASS1_BITS), 31 ++ ++ paddsub.h r5, r3:t, r1:t ++ plsl.h r5, r5, PASS1_BITS // r5:t = tmp0, r5:b = tmp1 ++ ++ paddsub.h r7, r5:t, r7:b // r7:t = tmp10, r7:b = tmp13 ++ paddsub.h r6, r5:b, r6:b // r6:t = tmp11, r6:b = tmp12 ++ ++ ++ addhh.w lr, r3:b, r1:b // lr = z4 ++ addhh.w r5, r4:b, lr:b ++ mulhh.w r5, r5:b, r9:b // r5 = z5 ++ ++ ld.w r9, pc[coef_table - . + 12] ++ mulhh.w r4, r4:b, r12:t // r4 = z3 ++ mulhh.w lr, lr:b, r12:b // lr = z4 ++ ++ add r4, r5 ++ add lr, r5 ++ ++ addhh.w r5, r2:b, r1:b // r5 = z2 ++ addhh.w r8, r3:b, r0:b // r8 = z1 ++ ++ ++ mulhh.w r0, r0:b, r9:t // r0 = tmp0 ++ ld.w r12, pc[coef_table - . + 16] ++ mulhh.w r1, r1:b, r9:b // r1 = tmp1 ++ ld.w r9, pc[coef_table - . + 20] ++ mulhh.w r2, r2:b, r12:t // r2 = tmp2 ++ mulhh.w r3, r3:b, r12:b // r3 = tmp3 ++ mulhh.w r8, r8:b, r9:t // r8 = z1 ++ mulhh.w r5, r5:b, r9:b // r5 = z2 ++ ++ ++ add r0, r8 ++ add r0, r4 ++ add r1, r5 ++ add r1, lr ++ add r2, r5 ++ add r2, r4 ++ add r3, r8 ++ add r3, lr ++ ++ satrnds r0 >> (CONST_BITS - PASS1_BITS), 31 ++ satrnds r1 >> (CONST_BITS - PASS1_BITS), 31 ++ satrnds r2 >> (CONST_BITS - PASS1_BITS), 31 ++ satrnds r3 >> (CONST_BITS - PASS1_BITS), 31 ++ ++ paddsub.h r5, r6:t, r2:b // r5:t = dataptr[1], r5:b = dataptr[6] ++ paddsub.h r4, r7:t, r3:b // r4:t = dataptr[0], r4:b = dataptr[7] ++ paddsub.h r3, r6:b, r1:b // r3:t = dataptr[2], r3:b = dataptr[5] ++ paddsub.h r2, r7:b, r0:b // r2:t = dataptr[3], r2:b = dataptr[4] ++ ++ sthh.w r10[0], r4:t, r5:t ++ sthh.w r10[4], r3:t, r2:t ++ sthh.w r10[8], r2:b, r3:b ++ sthh.w r10[12], r5:b, r4:b ++ ++ ++ ++ sub r10, -16 ++ sub loop_cnt, 1 ++ brne FOR_ROW, e ++ ++COLOUMN_TRANSFORM: ++ ++ sub r10, 128 //Set pointer to start of DCT block ++ ++ ++ mov loop_cnt, 8 ++FOR_COLOUMN: ++ ldins.h r3:t,r10[0] // r3:t = dataptr[0] ++ ldins.h r1:t,r10[1*8*2]// r1:t = dataptr[1] ++ ldins.h r2:t,r10[2*8*2]// r2:t = dataptr[2] ++ ldins.h r0:t,r10[5*8*2]// r0:t = dataptr[5] ++ ldins.h r3:b,r10[4*8*2]// r3:b = dataptr[4] ++ ldins.h r1:b,r10[3*8*2]// r1:b = dataptr[3] ++ ldins.h r2:b,r10[6*8*2]// r2:b = dataptr[6] ++ ldins.h r0:b,r10[7*8*2]// r0:b = dataptr[7] ++ ++ or r4, r1, r3 << 16 ++ or r4, r2 ++ or r4, r0 ++ brne AC_COLOUMN //If there are non-zero AC coeffisients perform row-transform ++ ++ lddsp r12, SP[0] // rfp ++ lddsp r9, SP[4] // iinc ++ satrnds r3 >> ( PASS1_BITS + 3 + 16 ), 9 ++ ld.d r0, r12[0] ++ sub r10, -2 // Increment the dataptr ++ bfins r3, r3, 16, 16 ++ punpckub.h r2, r1:t ++ padd.h r2, r2, r3 ++ punpckub.h r1, r1:b ++ padd.h r1, r1, r3 ++ packsh.ub r1, r2, r1 ++ punpckub.h r2, r0:t ++ padd.h r2, r2, r3 ++ punpckub.h r0, r0:b ++ padd.h r0, r0, r3 ++ packsh.ub r0, r2, r0 ++ st.d r12[0], r0 ++ add r12, r9 // increment rfp ++ stdsp SP[0], r12 ++ ++ sub loop_cnt, 1//Decrement loop counter ++ brne FOR_COLOUMN//Perform loop one more time if loop_cnt is not zero ++ ++ sub sp, -8 ++ popm r0-r3, r4-r7, pc//Pop back registers and PC ++ ++AC_COLOUMN: ++ ++ ld.w r12, pc[coef_table - .] ++ ld.w r9, pc[coef_table - . + 4] ++ ++ addhh.w r4, r2:t, r2:b ++ mulhh.w r4, r4:b, r12:t // r4 = z1 ++ mulhh.w r5, r2:b, r12:b ++ ld.w r12, pc[coef_table - . + 8] ++ mulhh.w r6, r2:t, r9:t ++ add r5, r4 // r5 = tmp2 ++ add r6, r4 // r6 = tmp3 ++ ++ addhh.w r7, r3:t, r3:b ++ subhh.w r8, r3:t, r3:b ++ ++ lsl r7, CONST_BITS ++ lsl r8, CONST_BITS ++ ++ add r2, r7, r6 // r2 = tmp10 ++ sub r3, r7, r6 // r3 = tmp13 ++ add r4, r8, r5 // r4 = tmp11 ++ sub r5, r8, r5 // r5 = tmp12 ++ ++ padd.h r6, r0, r1 // r6:t = z4, r6:b = z3 ++ addhh.w r7, r6:t, r6:b ++ mulhh.w r7, r7:b, r9:b // r7 = z5 ++ ++ ld.w r9, pc[coef_table - . + 12] ++ mulhh.w r8, r6:b, r12:t // r8 = z3 ++ mulhh.w r6, r6:t, r12:b // r6 = z4 ++ ++ add r8, r7 ++ add r6, r7 ++ ++ paddx.h r7, r0, r1 // r7:t = z2, r7:b = z1 ++ ++ mulhh.w r12, r0:b, r9:t // r12 = tmp0 ++ mulhh.w r0, r0:t, r9:b // r0 = tmp1 ++ ld.w r9, pc[coef_table - . + 16] ++ add r12, r8 ++ add r0, r6 ++ ++ ld.w lr, pc[coef_table - . + 20] ++ machh.w r8, r1:b, r9:t // r8 = tmp2 ++ machh.w r6, r1:t, r9:b // r6 = tmp3 ++ mulhh.w r9, r7:b, lr:t // r9 = z1 ++ mulhh.w r7, r7:t, lr:b // r7 = z2 ++ ++ ++ add r12, r9 ++ add r0, r7 ++ add r8, r7 ++ add r6, r9 ++ ++ add r1, r2, r6 // r1 = dataptr[DCTSIZE*0] ++ sub r2, r2, r6 // r2 = dataptr[DCTSIZE*7] ++ add r6, r4, r8 // r6 = dataptr[DCTSIZE*1] ++ sub r4, r4, r8 // r4 = dataptr[DCTSIZE*6] ++ add r8, r5, r0 // r8 = dataptr[DCTSIZE*2] ++ sub r5, r5, r0 // r5 = dataptr[DCTSIZE*5] ++ add r0, r3, r12 // r0 = dataptr[DCTSIZE*3] ++ sub r3, r3, r12 // r3 = dataptr[DCTSIZE*4] ++ ++ satrnds r1 >> (CONST_BITS+PASS1_BITS+3), 9 ++ satrnds r2 >> (CONST_BITS+PASS1_BITS+3), 9 ++ satrnds r6 >> (CONST_BITS+PASS1_BITS+3), 9 ++ satrnds r4 >> (CONST_BITS+PASS1_BITS+3), 9 ++ satrnds r8 >> (CONST_BITS+PASS1_BITS+3), 9 ++ satrnds r5 >> (CONST_BITS+PASS1_BITS+3), 9 ++ satrnds r0 >> (CONST_BITS+PASS1_BITS+3), 9 ++ satrnds r3 >> (CONST_BITS+PASS1_BITS+3), 9 ++ ++ packw.sh r1, r1, r6 ++ packw.sh r8, r8, r0 ++ packw.sh r3, r3, r5 ++ packw.sh r4, r4, r2 ++ ++ lddsp r12, SP[0] // rfp ++ lddsp r9, SP[4] // iinc ++ ld.d r6, r12[0] ++ sub r10, -2 // Increment the dataptr ++ punpckub.h r0, r7:t ++ padd.h r1, r1, r0 ++ punpckub.h r0, r7:b ++ padd.h r8, r8, r0 ++ packsh.ub r7, r1, r8 ++ punpckub.h r0, r6:t ++ padd.h r3, r3, r0 ++ punpckub.h r0, r6:b ++ padd.h r4, r4, r0 ++ packsh.ub r6, r3, r4 ++ st.d r12[0], r6 ++ add r12, r9 // increment rfp ++ stdsp SP[0], r12 ++ ++ sub loop_cnt, 1 //Decrement loop counter ++ brne FOR_COLOUMN //Perform loop one more time if loop_cnt is not zero ++ ++ sub sp, -8 ++ popm r0-r3, r4-r7, pc //Pop back registers and PC ++ ++ ++ ++//Coeffisient Table: ++ .align 2 ++coef_table: ++ .short FIX_0_541196100, -FIX_1_847759065, FIX_0_765366865, FIX_1_175875602 ++ .short - FIX_1_961570560, - FIX_0_390180644, FIX_0_298631336, FIX_2_053119869 ++ .short FIX_3_072711026, FIX_1_501321110, - FIX_0_899976223, - FIX_2_562915447 ++ ++ ++idct_put_avr32: ++ pushm r0-r3, r4-r7, lr //Free up registers to use for local variables ++ ++ //; Give room for some variables on the stack ++ sub sp, 8 ++ stdsp SP[0], r12 // rfp ++ stdsp SP[4], r11 // iinc ++ ++ mov loop_cnt, 8 //Initialize loop counter ++ ++0: ++ ++ ldm r10, r0, r1, r2, r3 //Load 8 DCT-coeffisients from the current row in the DCT-block ++ mov r6, 0 ++#ifdef USE_PREFETCH ++ pref r10[LINE_SIZE] //Prefetch next line ++#endif ++ or r4, r2, r3 << 16 ++ or r4, r1 //Check if all DCT-coeffisients except the DC is zero ++ or r4, r0 ++ brne 1f //If there are non-zero AC coeffisients perform row-transform ++ ++ paddsub.h r5, r3:t, r6:b //Extract the DC-coeff from r5 ++ plsl.h r5, r5, PASS1_BITS ++ mov r4, r5 ++ st.d r10++, r4 ++ st.d r10++, r4 ++ ++ sub loop_cnt, 1 //Decrement loop counter ++ brne 0b //Perform loop one more time if loop_cnt is not zero ++ ++ bral 2f //Perform coloumn transform after row transform is computed ++ ++1: ++ ++ ld.w r12, pc[coef_table_copy - .] ++ ld.w r9, pc[coef_table_copy - . + 4] ++ ++ padd.h r4, r2, r0 // r4:t = dataptr[2] + dataptr[6],r4:b = dataptr[3] + dataptr[7] ++ mulhh.w r5, r4:t, r12:t ++ mulhh.w r6, r0:t, r12:b ++ ld.w r12, pc[coef_table_copy - . + 8] ++ mulhh.w r7, r2:t, r9:t ++ add r6, r5 // tmp2 ++ satrnds r6 >> (CONST_BITS - PASS1_BITS), 31 ++ add r7, r5 // tmp3 ++ satrnds r7 >> (CONST_BITS - PASS1_BITS), 31 ++ ++ paddsub.h r5, r3:t, r1:t ++ plsl.h r5, r5, PASS1_BITS // r5:t = tmp0, r5:b = tmp1 ++ ++ paddsub.h r7, r5:t, r7:b // r7:t = tmp10, r7:b = tmp13 ++ paddsub.h r6, r5:b, r6:b // r6:t = tmp11, r6:b = tmp12 ++ ++ ++ ++ addhh.w lr, r3:b, r1:b // lr = z4 ++ addhh.w r5, r4:b, lr:b ++ mulhh.w r5, r5:b, r9:b // r5 = z5 ++ ++ ld.w r9, pc[coef_table_copy - . + 12] ++ mulhh.w r4, r4:b, r12:t // r4 = z3 ++ mulhh.w lr, lr:b, r12:b // lr = z4 ++ ++ add r4, r5 ++ add lr, r5 ++ ++ addhh.w r5, r2:b, r1:b // r5 = z2 ++ addhh.w r8, r3:b, r0:b // r8 = z1 ++ ++ ++ mulhh.w r0, r0:b, r9:t // r0 = tmp0 ++ ld.w r12, pc[coef_table_copy - . + 16] ++ mulhh.w r1, r1:b, r9:b // r1 = tmp1 ++ ld.w r9, pc[coef_table_copy - . + 20] ++ mulhh.w r2, r2:b, r12:t // r2 = tmp2 ++ mulhh.w r3, r3:b, r12:b // r3 = tmp3 ++ mulhh.w r8, r8:b, r9:t // r8 = z1 ++ mulhh.w r5, r5:b, r9:b // r5 = z2 ++ ++ ++ add r0, r8 ++ add r0, r4 ++ add r1, r5 ++ add r1, lr ++ add r2, r5 ++ add r2, r4 ++ add r3, r8 ++ add r3, lr ++ ++ satrnds r0 >> (CONST_BITS - PASS1_BITS), 31 ++ satrnds r1 >> (CONST_BITS - PASS1_BITS), 31 ++ satrnds r2 >> (CONST_BITS - PASS1_BITS), 31 ++ satrnds r3 >> (CONST_BITS - PASS1_BITS), 31 ++ ++ paddsub.h r5, r6:t, r2:b // r5:t = dataptr[1], r5:b = dataptr[6] ++ paddsub.h r4, r7:t, r3:b // r4:t = dataptr[0], r4:b = dataptr[7] ++ paddsub.h r3, r6:b, r1:b // r3:t = dataptr[2], r3:b = dataptr[5] ++ paddsub.h r2, r7:b, r0:b // r2:t = dataptr[3], r2:b = dataptr[4] ++ ++ sthh.w r10[0], r4:t, r5:t ++ sthh.w r10[4], r3:t, r2:t ++ sthh.w r10[8], r2:b, r3:b ++ sthh.w r10[12], r5:b, r4:b ++ ++ ++ ++ sub r10, -16 ++ sub loop_cnt, 1 ++ brne 0b ++ ++2: ++ ++ sub r10, 128 //Set pointer to start of DCT block ++ ++ mov loop_cnt, 8 ++ ++0: ++ ldins.h r3:t,r10[0] // r3:t = dataptr[0] ++ ldins.h r1:t,r10[1*8*2]// r1:t = dataptr[1] ++ ldins.h r2:t,r10[2*8*2]// r2:t = dataptr[2] ++ ldins.h r0:t,r10[5*8*2]// r0:t = dataptr[5] ++ ldins.h r3:b,r10[4*8*2]// r3:b = dataptr[4] ++ ldins.h r1:b,r10[3*8*2]// r1:b = dataptr[3] ++ ldins.h r2:b,r10[6*8*2]// r2:b = dataptr[6] ++ ldins.h r0:b,r10[7*8*2]// r0:b = dataptr[7] ++ ++ or r4, r1, r3 << 16 ++ or r4, r2 ++ or r4, r0 ++ brne 1f //If there are non-zero AC coeffisients perform row-transform ++ ++ lddsp r12, SP[0] // rfp ++ lddsp r9, SP[4] // iinc ++ satrnds r3 >> ( PASS1_BITS + 3 + 16 ), 31 ++ packw.sh r3, r3, r3 ++ packsh.ub r3, r3, r3 ++ mov r2, r3 ++ st.d r12[0], r2 ++ add r12, r9 // increment rfp ++ sub r10, -2 // Increment the dataptr ++ stdsp SP[0], r12 ++ ++ sub loop_cnt, 1//Decrement loop counter ++ brne 0b //Perform loop one more time if loop_cnt is not zero ++ ++ sub sp, -8 ++ popm r0-r3, r4-r7, pc//Pop back registers and PC ++ ++1: ++ ++ ld.w r12, pc[coef_table_copy - .] ++ ld.w r9, pc[coef_table_copy - . + 4] ++ ++ addhh.w r4, r2:t, r2:b ++ mulhh.w r4, r4:b, r12:t // r4 = z1 ++ mulhh.w r5, r2:b, r12:b ++ ld.w r12, pc[coef_table_copy - . + 8] ++ mulhh.w r6, r2:t, r9:t ++ add r5, r4 // r5 = tmp2 ++ add r6, r4 // r6 = tmp3 ++ ++ addhh.w r7, r3:t, r3:b ++ subhh.w r8, r3:t, r3:b ++ ++ lsl r7, CONST_BITS ++ lsl r8, CONST_BITS ++ ++ add r2, r7, r6 // r2 = tmp10 ++ sub r3, r7, r6 // r3 = tmp13 ++ add r4, r8, r5 // r4 = tmp11 ++ sub r5, r8, r5 // r5 = tmp12 ++ ++ ++ padd.h r6, r0, r1 // r6:t = z4, r6:b = z3 ++ addhh.w r7, r6:t, r6:b ++ mulhh.w r7, r7:b, r9:b // r7 = z5 ++ ++ ld.w r9, pc[coef_table_copy - . + 12] ++ mulhh.w r8, r6:b, r12:t // r8 = z3 ++ mulhh.w r6, r6:t, r12:b // r6 = z4 ++ ++ add r8, r7 ++ add r6, r7 ++ ++ paddx.h r7, r0, r1 // r7:t = z2, r7:b = z1 ++ ++ mulhh.w r12, r0:b, r9:t // r12 = tmp0 ++ mulhh.w r0, r0:t, r9:b // r0 = tmp1 ++ ld.w r9, pc[coef_table_copy - . + 16] ++ add r12, r8 ++ add r0, r6 ++ ++ ld.w lr, pc[coef_table_copy - . + 20] ++ machh.w r8, r1:b, r9:t // r8 = tmp2 ++ machh.w r6, r1:t, r9:b // r6 = tmp3 ++ mulhh.w r9, r7:b, lr:t // r9 = z1 ++ mulhh.w r7, r7:t, lr:b // r7 = z2 ++ ++ ++ add r12, r9 ++ add r0, r7 ++ add r8, r7 ++ add r6, r9 ++ ++ add r1, r2, r6 // r1 = dataptr[DCTSIZE*0] ++ sub r2, r2, r6 // r2 = dataptr[DCTSIZE*7] ++ add r6, r4, r8 // r6 = dataptr[DCTSIZE*1] ++ sub r4, r4, r8 // r4 = dataptr[DCTSIZE*6] ++ add r8, r5, r0 // r8 = dataptr[DCTSIZE*2] ++ sub r5, r5, r0 // r5 = dataptr[DCTSIZE*5] ++ add r0, r3, r12 // r0 = dataptr[DCTSIZE*3] ++ sub r3, r3, r12 // r3 = dataptr[DCTSIZE*4] ++ ++ satrnds r1 >> (CONST_BITS+PASS1_BITS+3), 9 ++ satrnds r2 >> (CONST_BITS+PASS1_BITS+3), 9 ++ satrnds r6 >> (CONST_BITS+PASS1_BITS+3), 9 ++ satrnds r4 >> (CONST_BITS+PASS1_BITS+3), 9 ++ satrnds r8 >> (CONST_BITS+PASS1_BITS+3), 9 ++ satrnds r5 >> (CONST_BITS+PASS1_BITS+3), 9 ++ satrnds r0 >> (CONST_BITS+PASS1_BITS+3), 9 ++ satrnds r3 >> (CONST_BITS+PASS1_BITS+3), 9 ++ ++ packw.sh r1, r1, r6 ++ packw.sh r8, r8, r0 ++ packw.sh r3, r3, r5 ++ packw.sh r4, r4, r2 ++ ++ packsh.ub r1, r1, r8 ++ packsh.ub r0, r3, r4 ++ lddsp r12, SP[0] // rfp ++ lddsp r9, SP[4] // iinc ++ st.d r12[0], r0 ++ sub r10, -2 // Increment the dataptr ++ add r12, r9 // increment rfp ++ stdsp SP[0], r12 ++ ++ sub loop_cnt, 1 //Decrement loop counter ++ brne 0b //Perform loop one more time if loop_cnt is not zero ++ ++ sub sp, -8 ++ popm r0-r3, r4-r7, pc //Pop back registers and PC ++ ++ ++ ++ .align 2 ++coef_table_copy: ++ .short FIX_0_541196100, -FIX_1_847759065, FIX_0_765366865, FIX_1_175875602 ++ .short - FIX_1_961570560, - FIX_0_390180644, FIX_0_298631336, FIX_2_053119869 ++ .short FIX_3_072711026, FIX_1_501321110, - FIX_0_899976223, - FIX_2_562915447 ++ ++ ++idct_avr32: ++ pushm r0-r3, r4-r7, lr //Free up registers to use for local variables ++ ++ //; Give room for a temporary block on the stack ++ sub sp, 8*8*2 ++ ++ mov loop_cnt, 8 //Initialize loop counter ++ ++0: ++ ++ ldm r12++, r0, r1, r2, r3 //Load 8 DCT-coeffisients from the current row in the DCT-block ++ mov r6, 0 ++#ifdef USE_PREFETCH ++ pref r12[LINE_SIZE] //Prefetch next line ++#endif ++ or r4, r2, r3 << 16 ++ or r4, r1 //Check if all DCT-coeffisients except the DC is zero ++ or r4, r0 ++ brne 1f //If there are non-zero AC coeffisients perform row-transform ++ ++ paddsub.h r5, r3:t, r6:b //Extract the DC-coeff from r5 ++ plsl.h r5, r5, PASS1_BITS ++ mov r4, r5 ++ st.d sp++, r4 ++ st.d sp++, r4 ++ ++ sub loop_cnt, 1 //Decrement loop counter ++ brne 0b //Perform loop one more time if loop_cnt is not zero ++ ++ bral 2f //Perform coloumn transform after row transform is computed ++ ++1: ++ ++ ld.w r10, pc[coef_table_idct - .] ++ ld.w r9, pc[coef_table_idct - . + 4] ++ ++ padd.h r4, r2, r0 // r4:t = dataptr[2] + dataptr[6],r4:b = dataptr[3] + dataptr[7] ++ mulhh.w r5, r4:t, r10:t ++ mulhh.w r6, r0:t, r10:b ++ ld.w r10, pc[coef_table_idct - . + 8] ++ mulhh.w r7, r2:t, r9:t ++ add r6, r5 // tmp2 ++ satrnds r6 >> (CONST_BITS - PASS1_BITS), 31 ++ add r7, r5 // tmp3 ++ satrnds r7 >> (CONST_BITS - PASS1_BITS), 31 ++ ++ paddsub.h r5, r3:t, r1:t ++ plsl.h r5, r5, PASS1_BITS // r5:t = tmp0, r5:b = tmp1 ++ ++ paddsub.h r7, r5:t, r7:b // r7:t = tmp10, r7:b = tmp13 ++ paddsub.h r6, r5:b, r6:b // r6:t = tmp11, r6:b = tmp12 ++ ++ ++ ++ addhh.w lr, r3:b, r1:b // lr = z4 ++ addhh.w r5, r4:b, lr:b ++ mulhh.w r5, r5:b, r9:b // r5 = z5 ++ ++ ld.w r9, pc[coef_table_idct - . + 12] ++ mulhh.w r4, r4:b, r10:t // r4 = z3 ++ mulhh.w lr, lr:b, r10:b // lr = z4 ++ ++ add r4, r5 ++ add lr, r5 ++ ++ addhh.w r5, r2:b, r1:b // r5 = z2 ++ addhh.w r8, r3:b, r0:b // r8 = z1 ++ ++ ++ mulhh.w r0, r0:b, r9:t // r0 = tmp0 ++ ld.w r10, pc[coef_table_idct - . + 16] ++ mulhh.w r1, r1:b, r9:b // r1 = tmp1 ++ ld.w r9, pc[coef_table_idct - . + 20] ++ mulhh.w r2, r2:b, r10:t // r2 = tmp2 ++ mulhh.w r3, r3:b, r10:b // r3 = tmp3 ++ mulhh.w r8, r8:b, r9:t // r8 = z1 ++ mulhh.w r5, r5:b, r9:b // r5 = z2 ++ ++ ++ add r0, r8 ++ add r0, r4 ++ add r1, r5 ++ add r1, lr ++ add r2, r5 ++ add r2, r4 ++ add r3, r8 ++ add r3, lr ++ ++ satrnds r0 >> (CONST_BITS - PASS1_BITS), 31 ++ satrnds r1 >> (CONST_BITS - PASS1_BITS), 31 ++ satrnds r2 >> (CONST_BITS - PASS1_BITS), 31 ++ satrnds r3 >> (CONST_BITS - PASS1_BITS), 31 ++ ++ paddsub.h r5, r6:t, r2:b // r5:t = dataptr[1], r5:b = dataptr[6] ++ paddsub.h r4, r7:t, r3:b // r4:t = dataptr[0], r4:b = dataptr[7] ++ paddsub.h r3, r6:b, r1:b // r3:t = dataptr[2], r3:b = dataptr[5] ++ paddsub.h r2, r7:b, r0:b // r2:t = dataptr[3], r2:b = dataptr[4] ++ ++ sthh.w sp[0], r4:t, r5:t ++ sthh.w sp[4], r3:t, r2:t ++ sthh.w sp[8], r2:b, r3:b ++ sthh.w sp[12], r5:b, r4:b ++ ++ ++ ++ sub sp, -16 ++ sub loop_cnt, 1 ++ brne 0b ++ ++2: ++ ++ sub sp, 8*8*2 //Set pointer to start of DCT block ++ sub r12, 8*8*2 //Set pointer to start of DCT block ++ ++ mov loop_cnt, 8 ++ ++0: ++ ldins.h r3:t,sp[0] // r3:t = dataptr[0] ++ ldins.h r1:t,sp[1*8*2]// r1:t = dataptr[1] ++ ldins.h r2:t,sp[2*8*2]// r2:t = dataptr[2] ++ ldins.h r0:t,sp[5*8*2]// r0:t = dataptr[5] ++ ldins.h r3:b,sp[4*8*2]// r3:b = dataptr[4] ++ ldins.h r1:b,sp[3*8*2]// r1:b = dataptr[3] ++ ldins.h r2:b,sp[6*8*2]// r2:b = dataptr[6] ++ ldins.h r0:b,sp[7*8*2]// r0:b = dataptr[7] ++ ++ or r4, r1, r3 << 16 ++ or r4, r2 ++ or r4, r0 ++ brne 1f //If there are non-zero AC coeffisients perform row-transform ++ ++ satrnds r3 >> ( PASS1_BITS + 3 + 16 ), 31 ++ packw.sh r3, r3, r3 ++ mov r2, r3 ++ st.d r12++, r2 ++ st.d r12++, r2 ++ sub sp, -2 // Increment the dataptr ++ ++ sub loop_cnt, 1//Decrement loop counter ++ brne 0b //Perform loop one more time if loop_cnt is not zero ++ ++ sub sp, -(8*8*2 - 8) ++ popm r0-r3, r4-r7, pc//Pop back registers and PC ++ ++1: ++ ++ ld.w r10, pc[coef_table_idct - .] ++ ld.w r9, pc[coef_table_idct - . + 4] ++ ++ addhh.w r4, r2:t, r2:b ++ mulhh.w r4, r4:b, r10:t // r4 = z1 ++ mulhh.w r5, r2:b, r10:b ++ ld.w r10, pc[coef_table_idct - . + 8] ++ mulhh.w r6, r2:t, r9:t ++ add r5, r4 // r5 = tmp2 ++ add r6, r4 // r6 = tmp3 ++ ++ addhh.w r7, r3:t, r3:b ++ subhh.w r8, r3:t, r3:b ++ ++ lsl r7, CONST_BITS ++ lsl r8, CONST_BITS ++ ++ add r2, r7, r6 // r2 = tmp10 ++ sub r3, r7, r6 // r3 = tmp13 ++ add r4, r8, r5 // r4 = tmp11 ++ sub r5, r8, r5 // r5 = tmp12 ++ ++ ++ padd.h r6, r0, r1 // r6:t = z4, r6:b = z3 ++ addhh.w r7, r6:t, r6:b ++ mulhh.w r7, r7:b, r9:b // r7 = z5 ++ ++ ld.w r9, pc[coef_table_idct - . + 12] ++ mulhh.w r8, r6:b, r10:t // r8 = z3 ++ mulhh.w r6, r6:t, r10:b // r6 = z4 ++ ++ add r8, r7 ++ add r6, r7 ++ ++ paddx.h r7, r0, r1 // r7:t = z2, r7:b = z1 ++ ++ mulhh.w r10, r0:b, r9:t // r10 = tmp0 ++ mulhh.w r0, r0:t, r9:b // r0 = tmp1 ++ ld.w r9, pc[coef_table_idct - . + 16] ++ add r10, r8 ++ add r0, r6 ++ ++ ld.w lr, pc[coef_table_idct - . + 20] ++ machh.w r8, r1:b, r9:t // r8 = tmp2 ++ machh.w r6, r1:t, r9:b // r6 = tmp3 ++ mulhh.w r9, r7:b, lr:t // r9 = z1 ++ mulhh.w r7, r7:t, lr:b // r7 = z2 ++ ++ ++ add r10, r9 ++ add r0, r7 ++ add r8, r7 ++ add r6, r9 ++ ++ add r1, r2, r6 // r1 = dataptr[DCTSIZE*0] ++ sub r2, r2, r6 // r2 = dataptr[DCTSIZE*7] ++ add r6, r4, r8 // r6 = dataptr[DCTSIZE*1] ++ sub r4, r4, r8 // r4 = dataptr[DCTSIZE*6] ++ add r8, r5, r0 // r8 = dataptr[DCTSIZE*2] ++ sub r5, r5, r0 // r5 = dataptr[DCTSIZE*5] ++ add r0, r3, r10 // r0 = dataptr[DCTSIZE*3] ++ sub r3, r3, r10 // r3 = dataptr[DCTSIZE*4] ++ ++ satrnds r1 >> (CONST_BITS+PASS1_BITS+3), 9 ++ satrnds r2 >> (CONST_BITS+PASS1_BITS+3), 9 ++ satrnds r6 >> (CONST_BITS+PASS1_BITS+3), 9 ++ satrnds r4 >> (CONST_BITS+PASS1_BITS+3), 9 ++ satrnds r8 >> (CONST_BITS+PASS1_BITS+3), 9 ++ satrnds r5 >> (CONST_BITS+PASS1_BITS+3), 9 ++ satrnds r0 >> (CONST_BITS+PASS1_BITS+3), 9 ++ satrnds r3 >> (CONST_BITS+PASS1_BITS+3), 9 ++ ++ packw.sh r7, r1, r6 ++ packw.sh r6, r8, r0 ++ packw.sh r5, r3, r5 ++ packw.sh r4, r4, r2 ++ ++ stm r12, r4-r7 ++ sub sp, -2 // Increment the dataptr ++ sub r12, -16 ++ ++ sub loop_cnt, 1 //Decrement loop counter ++ brne 0b //Perform loop one more time if loop_cnt is not zero ++ ++ sub sp, -(8*8*2 - 8) ++ popm r0-r3, r4-r7, pc //Pop back registers and PC ++ ++ ++ ++ .align 2 ++coef_table_idct: ++ .short FIX_0_541196100, -FIX_1_847759065, FIX_0_765366865, FIX_1_175875602 ++ .short - FIX_1_961570560, - FIX_0_390180644, FIX_0_298631336, FIX_2_053119869 ++ .short FIX_3_072711026, FIX_1_501321110, - FIX_0_899976223, - FIX_2_562915447 ++ +diff --git a/libavcodec/avr32/mc.S b/libavcodec/avr32/mc.S +new file mode 100644 +index 0000000..07a002d +--- /dev/null ++++ b/libavcodec/avr32/mc.S +@@ -0,0 +1,434 @@ ++/* ++ * Copyright (c) 2007 Atmel Corporation. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. The name of ATMEL may not be used to endorse or promote products ++ * derived from this software without specific prior written ++ * permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY ATMEL ``AS IS'' AND ANY EXPRESS OR ++ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ++ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE EXPRESSLY AND SPECIFICALLY DISCLAIMED. IN NO EVENT SHALL ATMEL ++ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, ++ * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, ++ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR ++ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY ++ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE ++ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH ++ * DAMAGE. ++ */ ++ ++ ++ /* Macro for masking the lowest bit of each byte in a ++ packed word */ ++ .macro packedmask1 reg, round ++ .if \round ++ and \reg, \reg, r8 >> 1 ++ .else ++ and \reg, r8 ++ .endif ++ .endm ++ ++ /* Macro for 8 pixel wide horizontal and vertical interpolation functions */ ++ .macro pixels8_hv round, put ++ ++ ++ pushm r0-r7, lr ++ ++ /* R12 = uint8_t *block, R11 = uint8_t pixels, R10 = int line_size, R9 = int h */ ++ ++ /* Rounding immediate */ ++ .if \round ++ mov r8, lo(0x02020202) ++ orh r8, hi(0x02020202) ++ .else ++ mov r8, lo(0x01010101) ++ orh r8, hi(0x01010101) ++ .endif ++ mov r7, 2 ++ ++ /* Pixel naming convention : ++ ++ |-----------------------------------------------------| ++ | s00 | s01 | s02 | s03 | s04 | s05 | s06 | s07 | s08 | ++ |----d00---d01---d02---d03---d04---d05---d06---d07----| ++ | s10 | s11 | s12 | s13 | s14 | s15 | s16 | s17 | s18 | ++ |-----------------------------------------------------| ++ */ ++1: ++ ld.w r0, r11[0] // r0 = { s00, s01, s02, s03 } ++ ld.w r1, r11[1] // r1 = { s01, s02, s03, s04 } ++ mov lr, r9 ++ eor r2, r0, r1 ++ packedmask1 r2, \round ++ add r2, r8 ++ ++ paddh.ub r0, r0, r1 // r0 = {(s00+s01)/2,(s01+s02)/2,(s02+s03)/2,(s03+s04)/2} ++ ++ add r11, r10 // pixels += line_size ++ ld.w r1, r11[0] // r1 = { s10, s11, s12, s13 } ++ ld.w r3, r11[1] // r3 = { s11, s12, s13, s14 } ++0: ++ eor r5, r1, r3 ++ packedmask1 r5, \round ++ add r2, r5 ++ ++ paddh.ub r1, r1, r3 // r1 = {(s10+s11)/2,(s11+s12)/2,(s12+s13)/2,(s13+s14)/2} ++ eor r6, r0, r1 ++ packedmask1 r6, \round ++ add r2, r2, r6 << 1 ++ ++ ld.w r3, r11[r10] // r3 = { s00, s01, s02, s03 } ++ add r11, r10 // pixels += line_size ++ ld.w r4, r11[1] // r4 = { s01, s02, s03, s04 } ++ ++ paddh.ub r0, r0, r1 ++ plsr.b r2, r2, 2 ++ padd.b r0, r0, r2 // r0 = { d00, d01, d02, d03 } ++ ++ /* Next row */ ++ .if \put ++ eor r2, r3, r4 ++ packedmask1 r2, \round ++ add r2, r8 ++ .else ++ ld.w r6, r12[0] ++ eor r2, r3, r4 ++ packedmask1 r2, \round ++ add r2, r8 ++ pavg.ub r0, r0, r6 ++ .endif ++ st.w r12[0], r0 // Put data into the block ++ ++ add r5, r2 ++ paddh.ub r0, r3, r4 // r0 = {(s00+s01)/2,(s01+s02)/2,(s02+s03)/2,(s03+s04)/2} ++ ++ eor r6, r0, r1 ++ packedmask1 r6, \round ++ add r5, r5, r6 << 1 ++ ++ .if \put ++ paddh.ub r1, r0, r1 ++ plsr.b r5, r5, 2 ++ padd.b r1, r1, r5 // r1 = { d10, d11, d12, d13 } ++ .else ++ ld.w r3, r12[r10] ++ paddh.ub r1, r0, r1 ++ plsr.b r5, r5, 2 ++ padd.b r1, r1, r5 // r1 = { d10, d11, d12, d13 } ++ pavg.ub r1, r1, r3 ++ .endif ++ ++ st.w r12[r10], r1 // Put data into the block ++ ++ ++ ld.w r1, r11[r10] // r1 = { s10, s11, s12, s13 } ++ add r11, r10 // pixels += line_size ++ ld.w r3, r11[1] // r3 = { s11, s12, s13, s14 } ++ add r12, r12, r10 << 1 // block += 2*line_size ++ sub lr, 2 ++ brne 0b ++ ++ mul r0, r10, r9 // r0 = line_size * h ++ rsub r0, r0, 4 // r0 = 4 - (line_size * h) ++ add r11, r0 ++ sub r11, r10 // pixels += 4 - (line_size * (h+1)) ++ add r12, r0 // pixels += 4 - (line_size * (h)) ++ sub r7, 1 ++ brne 1b ++ ++ popm r0-r7, pc ++ .endm ++ ++ ++ /* Macro for 8 pixel wide vertical interpolation functions */ ++ ++ .macro pixels8_v round, put ++ pushm r4-r7,lr ++ /* R12 = uint8_t *block, R11 = uint8_t pixels, R10 = int line_size, R9 = int h */ ++ ++ /* ++ Pixel Naming Convention : ++ |-----------------------------------------------| ++ | s00 | s01 | s02 | s03 | s04 | s05 | s06 | s07 | ++ |-d00---d01---d02---d03---d04---d05---d06---d07-| ++ | s10 | s11 | s12 | s13 | s14 | s15 | s16 | s17 | ++ |-----------------------------------------------| ++ */ ++ ld.w r8, r11[r10] // r8 = { s10, s11, s12, s13 } ++ ld.w lr, r11++ // lr = { s00, s01, s02, s03 }, src += 4 ++ ld.w r7, r11[0] // r7 = { s04, s05, s06, s07 } ++ ld.w r6, r11[r10] // r6 = { s14, s15, s16, s17 } ++ sub r10, 4 // stride -= 4 ++ add r11, r11, r10 << 1 // src += 2*stride ++ sub r11, -4 // src += 4 ++ ++0: ++ .if \round ++ pavg.ub r5, r8, lr // r5 = {(s10+s00)/2,(s11+s01)/2,(s12+s02)/2,(s13+s03)/2} ++ pavg.ub r4, r6, r7 // r4 = {(s14+s04)/2,(s15+s05)/2,(s16+s06)/2,(s17+s07)/2} ++ .else ++ paddh.ub r5, r8, lr // r5 = {(s10+s00)/2,(s11+s01)/2,(s12+s02)/2,(s13+s03)/2} ++ paddh.ub r4, r6, r7 // r4 = {(s14+s04)/2,(s15+s05)/2,(s16+s06)/2,(s17+s07)/2} ++ .endif ++ ++ .if \put ++ st.w r12++, r5 // *dst++ = { d00, d01, d02, d03 } ++ ld.w lr, r11++ // lr = { s10, s11, s12, s13 }, src += 4 ++ st.w r12[0], r4 // *dst = { d04, d05, d06, d07 } ++ ld.w r7, r11[0] // r7 = { s14, s15, s16, s17 } ++ .else ++ ld.w lr, r12[0] ++ ld.w r7, r12[4] ++ pavg.ub r5, r5, lr ++ pavg.ub r4, r4, r7 ++ st.w r12++, r5 // *dst++ = { d00, d01, d02, d03 } ++ ld.w lr, r11++ // lr = { s10, s11, s12, s13 }, src += 4 ++ st.w r12[0], r4 // *dst = { d04, d05, d06, d07 } ++ ld.w r7, r11[0] // r7 = { s14, s15, s16, s17 } ++ .endif ++ add r11, r10 // src += stride ++#ifdef USE_PREFETCH ++ pref r11[0] ++#endif ++ add r12, r10 // dst += stride ++ ++ .if \round ++ pavg.ub r5, r8, lr // r5 = {(s10+s00)/2,(s11+s01)/2,(s12+s02)/2,(s13+s03)/2} ++ pavg.ub r4, r6, r7 // r4 = {(s14+s04)/2,(s15+s05)/2,(s16+s06)/2,(s17+s07)/2} ++ .else ++ paddh.ub r5, r8, lr // r5 = {(s10+s00)/2,(s11+s01)/2,(s12+s02)/2,(s13+s03)/2} ++ paddh.ub r4, r6, r7 // r4 = {(s14+s04)/2,(s15+s05)/2,(s16+s06)/2,(s17+s07)/2} ++ .endif ++ .if \put ++ st.w r12++, r5 // *dst++ = { d00, d01, d02, d03 } ++ ld.w r8, r11++ // r8 = { s10, s11, s12, s13 }, src += 4 ++ st.w r12[0], r4 // *dst = { d04, d05, d06, d07 } ++ ld.w r6, r11[0] // r6 = { s14, s15, s16, s17 } ++ .else ++ ld.w r8, r12[0] ++ ld.w r6, r12[4] ++ pavg.ub r5, r5, r8 ++ pavg.ub r4, r4, r6 ++ st.w r12++, r5 // *dst++ = { d00, d01, d02, d03 } ++ ld.w r8, r11++ // r8 = { s10, s11, s12, s13 }, src += 4 ++ st.w r12[0], r4 // *dst = { d04, d05, d06, d07 } ++ ld.w r6, r11[0] // r6 = { s14, s15, s16, s17 } ++ .endif ++ ++ add r11, r10 // src += stride ++#ifdef USE_PREFETCH ++ pref r11[0] ++#endif ++ add r12, r10 // dst += stride ++ sub r9, 2 ++ brne 0b ++ ++ popm r4-r7,pc ++ .endm ++ ++ /* Macro for 8 pixel wide horizontal interpolation functions */ ++ ++ .macro pixels8_h round, put ++ pushm r4-r7, lr ++ ++ /* R12 = uint8_t *block, R11 = uint8_t pixels, R10 = int line_size, R9 = int h */ ++ /* ++ Pixel Naming Convention: ++ |--------------------------------------------------------------------| ++ | s00 d00 s01 d01 s02 d02 s03 d03 s04 d04 s05 d05 s06 d06 s07 d07 s08| ++ |------|-------|-------|-------|-------|-------|-------|-------|-----| ++ | s10 d10 s11 d11 s12 d12 s13 d13 s14 d14 s15 d15 s16 d16 s17 d17 s18| ++ |--------------------------------------------------------------------| ++ */ ++ ++ ld.w lr, r11[0] // lr = { s00, s01, s02, s03 } ++ ld.w r8, r11[1] // r8 = { s01, s02, s03, s04 } ++ ld.w r7, r11[4] // r7 = { s04, s05, s06, s07 } ++ ld.w r6, r11[5] // r6 = { s05, s06, s07, s08 } ++ add r11, r10 // src += stride ++ ++0: ++ .if \round ++ pavg.ub lr, r8, lr // lr = {(s00+s01)/2,(s01+s02)/2,(s02+s03)/2,(s03+s04)/2} ++ pavg.ub r7, r6, r7 // r7 = {(s04+s05)/2,(s05+s06)/2,(s06+s07)/2,(s07+s08)/2} ++ .else ++ paddh.ub lr, r8, lr // lr = {(s00+s01)/2,(s01+s02)/2,(s02+s03)/2,(s03+s04)/2} ++ paddh.ub r7, r6, r7 // r7 = {(s04+s05)/2,(s05+s06)/2,(s06+s07)/2,(s07+s08)/2} ++ .endif ++ .if \put ++ ld.w r5, r11[0] // r5 = { s00, s01, s02, s03 } ++ ld.w r4, r11[1] // r4 = { s01, s02, s03, s04 } ++ .else ++ ld.w r8, r12[0] ++ ld.w r6, r12[4] ++ ld.w r5, r11[0] // r5 = { s00, s01, s02, s03 } ++ ld.w r4, r11[1] // r4 = { s01, s02, s03, s04 } ++ pavg.ub lr, lr, r8 ++ pavg.ub r7, r7, r6 ++ .endif ++ st.w r12[0], lr // dst = { d00, d01, d02, d03 } ++ st.w r12[4], r7 // dst = { d04, d05, d06, d07 } ++ ld.w r8, r11[4] // r8 = { s04, s05, s06, s07 } ++ ld.w r6, r11[5] // r6 = { s05, s06, s07, s08 } ++ add r11, r10 // src += stride ++#ifdef USE_PREFETCH ++ pref r11[0] ++#endif ++ add r12, r10 // dst += stride ++ ++ .if \round ++ pavg.ub r5, r4, r5 // r5 = {(s00+s01)/2,(s01+s02)/2,(s02+s03)/2,(s03+s04)/2} ++ pavg.ub r4, r6, r8 // r4 = {(s04+s05)/2,(s05+s06)/2,(s06+s07)/2,(s07+s08)/2} ++ .else ++ paddh.ub r5, r4, r5 // r5 = {(s00+s01)/2,(s01+s02)/2,(s02+s03)/2,(s03+s04)/2} ++ paddh.ub r4, r6, r8 // r4 = {(s04+s05)/2,(s05+s06)/2,(s06+s07)/2,(s07+s08)/2} ++ .endif ++ .if \put ++ ld.w lr, r11[0] // lr = { s00, s01, s02, s03 } ++ ld.w r8, r11[1] // r8 = { s01, s02, s03, s04 } ++ .else ++ ld.w r7, r12[0] ++ ld.w r6, r12[4] ++ ld.w lr, r11[0] // lr = { s00, s01, s02, s03 } ++ ld.w r8, r11[1] // r8 = { s01, s02, s03, s04 } ++ pavg.ub r5, r5, r7 ++ pavg.ub r4, r4, r6 ++ .endif ++ st.w r12[0], r5 // dst = { d00, d01, d02, d03 } ++ st.w r12[4], r4 // dst = { d04, d05, d06, d07 } ++ ld.w r7, r11[4] // r7 = { s04, s05, s06, s07 } ++ ld.w r6, r11[5] // r6 = { s05, s06, s07, s08 } ++ add r11, r10 // src += stride ++#ifdef USE_PREFETCH ++ pref r11[0] ++#endif ++ add r12, r10 // dst += stride ++ sub r9, 2 ++ brne 0b ++ ++ popm r4-r7, pc ++ .endm ++ ++ /* Macro for 8 pixel wide copy functions */ ++ .macro pixels8 put ++ stm --sp, r3-r7,lr ++ /* R12 = uint8_t *block, R11 = uint8_t pixels, R10 = int line_size, R9 = int h */ ++ mov lr, r9 ++ sub r3, r10, 2 // stride2 = stride - 2 ++0: ++ .if \put ++ ld.w r9, r11[r10] // r9 = { s10, s11, s12, s13 } ++ ld.w r7, r11++ // r7 = { s00, s01, s02, s03 }, src += 4 ++ ld.w r6, r11[0] // r6 = { s04, s05, s06, s07 } ++ ld.w r8, r11[r10] // r8 = { s14, s15, s16, s17 } ++ .else ++ ld.w r9, r11[r10] // r9 = { s10, s11, s12, s13 } ++ ld.d r4, r12[0] ++ ld.w r7, r11++ // r7 = { s00, s01, s02, s03 }, src += 4 ++ ld.w r6, r11[0] // r6 = { s04, s05, s06, s07 } ++ ld.w r8, r11[r10] // r8 = { s14, s15, s16, s17 } ++ pavg.ub r6, r6, r4 ++ pavg.ub r7, r7, r5 ++ ld.d r4, r12[r10] ++ .endif ++ st.d r12, r6 // *dst = { s00, s01, s02, s03, s04, s05, s06, s07 } ++ add r11, r11, r3 << 1 // src += stride2 * 2 ++ .ifeq \put ++ pavg.ub r8, r8, r4 ++ pavg.ub r9, r9, r5 ++ .endif ++ st.d r12[r10 << 0], r8 // *(dst + stride) = { s10, s11, s12, s13, s14, s15, s16, s17 } ++ add r12, r12, r10 << 1 // dst += 2*stride ++ sub lr, 2 ++ brne 0b ++ ldm sp++, r3-r7,pc ++ ++ .endm ++ ++ .global put_no_rnd_pixels8_hv_avr32 ++ .text ++put_no_rnd_pixels8_hv_avr32: ++ pixels8_hv 0, 1 ++ ++ .global put_pixels8_hv_avr32 ++ .text ++put_pixels8_hv_avr32: ++ pixels8_hv 1, 1 ++ ++ .global avg_no_rnd_pixels8_hv_avr32 ++ .text ++avg_no_rnd_pixels8_hv_avr32: ++ pixels8_hv 0, 0 ++ ++ .global avg_pixels8_hv_avr32 ++ .text ++avg_pixels8_hv_avr32: ++ pixels8_hv 1, 0 ++ ++ .global put_no_rnd_pixels8_v_avr32 ++ .text ++put_no_rnd_pixels8_v_avr32: ++ pixels8_v 0, 1 ++ ++ .global put_pixels8_v_avr32 ++ .text ++put_pixels8_v_avr32: ++ pixels8_v 1, 1 ++ ++ .global avg_no_rnd_pixels8_v_avr32 ++ .text ++avg_no_rnd_pixels8_v_avr32: ++ pixels8_v 0, 0 ++ ++ .global avg_pixels8_v_avr32 ++ .text ++avg_pixels8_v_avr32: ++ pixels8_v 1, 0 ++ ++ .global put_no_rnd_pixels8_h_avr32 ++ .text ++put_no_rnd_pixels8_h_avr32: ++ pixels8_h 0, 1 ++ ++ .global put_pixels8_h_avr32 ++ .text ++put_pixels8_h_avr32: ++ pixels8_h 1, 1 ++ ++ .global avg_no_rnd_pixels8_h_avr32 ++ .text ++avg_no_rnd_pixels8_h_avr32: ++ pixels8_h 0, 0 ++ ++ .global avg_pixels8_h_avr32 ++ .text ++avg_pixels8_h_avr32: ++ pixels8_h 1, 0 ++ ++ .global put_pixels8_avr32 ++ .global put_no_rnd_pixels8_avr32 ++ .text ++put_pixels8_avr32: ++put_no_rnd_pixels8_avr32: ++ pixels8 1 ++ ++ .global avg_no_rnd_pixels8_avr32 ++ .global avg_pixels8_avr32 ++ .text ++avg_pixels8_avr32: ++avg_no_rnd_pixels8_avr32: ++ pixels8 0 +diff --git a/libavcodec/avr32/pico.h b/libavcodec/avr32/pico.h +new file mode 100644 +index 0000000..32201ba +--- /dev/null ++++ b/libavcodec/avr32/pico.h +@@ -0,0 +1,260 @@ ++/* ++ * Copyright (c) 2007 Atmel Corporation. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. The name of ATMEL may not be used to endorse or promote products ++ * derived from this software without specific prior written ++ * permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY ATMEL ``AS IS'' AND ANY EXPRESS OR ++ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ++ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE EXPRESSLY AND SPECIFICALLY DISCLAIMED. IN NO EVENT SHALL ATMEL ++ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, ++ * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, ++ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR ++ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY ++ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE ++ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH ++ * DAMAGE. ++ */ ++#ifndef __PICO_H__ ++#define __PICO_H__ ++ ++ ++ ++/* Coprocessor Number */ ++#define PICO_CPNO 1 ++ ++/* Pixel Coprocessor Register file */ ++#define PICO_REGVECT_INPIX2 cr0 ++#define PICO_REGVECT_INPIX1 cr1 ++#define PICO_REGVECT_INPIX0 cr2 ++#define PICO_REGVECT_OUTPIX2 cr3 ++#define PICO_REGVECT_OUTPIX1 cr4 ++#define PICO_REGVECT_OUTPIX0 cr5 ++#define PICO_REGVECT_COEFF0_A cr6 ++#define PICO_REGVECT_COEFF0_B cr7 ++#define PICO_REGVECT_COEFF1_A cr8 ++#define PICO_REGVECT_COEFF1_B cr9 ++#define PICO_REGVECT_COEFF2_A cr10 ++#define PICO_REGVECT_COEFF2_B cr11 ++#define PICO_REGVECT_VMU0_OUT cr12 ++#define PICO_REGVECT_VMU1_OUT cr13 ++#define PICO_REGVECT_VMU2_OUT cr14 ++#define PICO_REGVECT_CONFIG cr15 ++ ++#define PICO_INPIX2 0 ++#define PICO_INPIX1 1 ++#define PICO_INPIX0 2 ++#define PICO_OUTPIX2 3 ++#define PICO_OUTPIX1 4 ++#define PICO_OUTPIX0 5 ++#define PICO_COEFF0_A 6 ++#define PICO_COEFF0_B 7 ++#define PICO_COEFF1_A 8 ++#define PICO_COEFF1_B 9 ++#define PICO_COEFF2_A 10 ++#define PICO_COEFF2_B 11 ++#define PICO_VMU0_OUT 12 ++#define PICO_VMU1_OUT 13 ++#define PICO_VMU2_OUT 14 ++#define PICO_CONFIG 15 ++ ++/* Config Register */ ++#define PICO_COEFF_FRAC_BITS_OFFSET 0 ++#define PICO_COEFF_FRAC_BITS_SIZE 4 ++#define PICO_OFFSET_FRAC_BITS_OFFSET 4 ++#define PICO_OFFSET_FRAC_BITS_SIZE 4 ++#define PICO_INPUT_MODE_OFFSET 8 ++#define PICO_INPUT_MODE_SIZE 2 ++#define PICO_OUTPUT_MODE_OFFSET 10 ++#define PICO_OUTPUT_MODE_SIZE 1 ++ ++struct pico_config_t { ++ unsigned int : 32 - PICO_OUTPUT_MODE_OFFSET - PICO_OUTPUT_MODE_SIZE; ++ unsigned int output_mode : PICO_OUTPUT_MODE_SIZE; ++ unsigned int input_mode : PICO_INPUT_MODE_SIZE; ++ unsigned int offset_frac_bits : PICO_OFFSET_FRAC_BITS_SIZE; ++ unsigned int coeff_frac_bits : PICO_COEFF_FRAC_BITS_SIZE; ++ int vmu2_out; ++ int vmu1_out; ++ int vmu0_out; ++ short coeff2_2; ++ short coeff2_3; ++ short coeff2_0; ++ short coeff2_1; ++ short coeff1_2; ++ short coeff1_3; ++ short coeff1_0; ++ short coeff1_1; ++ short coeff0_2; ++ short coeff0_3; ++ short coeff0_0; ++ short coeff0_1; ++}; ++ ++ ++#define PICO_COEFF_FRAC_BITS(x) (x << PICO_COEFF_FRAC_BITS_OFFSET) ++#define PICO_OFFSET_FRAC_BITS(x) (x << PICO_OFFSET_FRAC_BITS_OFFSET) ++#define PICO_INPUT_MODE(x) (x << PICO_INPUT_MODE_OFFSET) ++#define PICO_OUTPUT_MODE(x) (x << PICO_OUTPUT_MODE_OFFSET) ++ ++#define GET_PICO_COEFF_FRAC_BITS(x) ((x >> PICO_COEFF_FRAC_BITS_OFFSET)&((1 << PICO_COEFF_FRAC_BITS_SIZE)-1)) ++#define GET_PICO_OFFSET_FRAC_BITS(x) ((x >> PICO_OFFSET_FRAC_BITS_OFFSET)&((1 << PICO_OFFSET_FRAC_BITS_SIZE)-1)) ++#define GET_PICO_INPUT_MODE(x) ((x >> PICO_INPUT_MODE_OFFSET)&((1 << PICO_INPUT_MODE_SIZE)-1)) ++#define GET_PICO_OUTPUT_MODE(x) ((x >> PICO_OUTPUT_MODE_OFFSET)&((1 << PICO_OUTPUT_MODE_SIZE)-1)) ++ ++enum pico_input_mode { PICO_TRANSFORMATION_MODE, ++ PICO_HOR_FILTER_MODE, ++ PICO_VERT_FILTER_MODE }; ++ ++enum pico_output_mode { PICO_PACKED_MODE, ++ PICO_PLANAR_MODE }; ++ ++/* Bits in coefficients */ ++#define PICO_COEFF_BITS 12 ++ ++/* Operation bits */ ++#define PICO_MATRIX (0) ++#define PICO_USE_ACC (1 << 2) ++#define PICO_SINGLE_VECTOR (1 << 3) ++ ++ ++#define __str(x...) #x ++#define __xstr(x...) __str(x) ++ ++#define PICO_PUT_W(pico_reg, x) \ ++ __builtin_mvrc_w(PICO_CPNO, pico_reg, x); ++#define PICO_GET_W(pico_reg) \ ++ __builtin_mvcr_w(PICO_CPNO, pico_reg) ++ ++#define PICO_MVCR_W(x, pico_reg) \ ++ asm ("mvcr.w\tcp" __xstr(PICO_CPNO) ", %0, cr" __xstr(pico_reg) : "=r"(x)); ++ ++#define PICO_MVRC_W(pico_reg, x) \ ++ asm ("mvrc.w\tcp" __xstr(PICO_CPNO) ", cr" __xstr(pico_reg) ", %0" :: "r"(x)); ++ ++#define PICO_PUT_D(pico_reg, x) \ ++ __builtin_mvrc_d(PICO_CPNO, pico_reg, x); ++#define PICO_GET_D(pico_reg) \ ++ __builtin_mvcr_d(PICO_CPNO, pico_reg) ++ ++#define PICO_MVCR_D(x, pico_reg) \ ++ asm volatile ("mvcr.d\tcp" __xstr(PICO_CPNO) ", %0, cr" __xstr(pico_reg) : "=r"(x)); ++#define PICO_MVRC_D(pico_reg, x) \ ++ asm volatile ("mvrc.d\tcp" __xstr(PICO_CPNO) ", cr" __xstr(pico_reg) ", %0" :: "r"(x)); ++ ++#define PICO_STCM_W(ptr, pico_regs...) \ ++ asm volatile ("stcm.w\tcp" __xstr(PICO_CPNO) ", %0," __xstr(pico_regs) :: "r"(ptr)); ++#define PICO_STCM_D(ptr, pico_regs...) \ ++ asm volatile ("stcm.d\tcp" __xstr(PICO_CPNO) ", %0," __xstr(pico_regs) :: "r"(ptr)); ++ ++#define PICO_STCM_W_DEC(ptr, pico_regs...) \ ++ asm volatile ("stcm.w\tcp" __xstr(PICO_CPNO) ", --%0," __xstr(pico_regs) : "+r"(ptr)); ++#define PICO_STCM_D_DEC(ptr, pico_regs...) \ ++ asm volatile ("stcm.d\tcp" __xstr(PICO_CPNO) ", --%0," __xstr(pico_regs) : "+r"(ptr)); ++ ++#define PICO_LDCM_W(ptr, pico_regs...) \ ++ asm volatile ("ldcm.w\tcp" __xstr(PICO_CPNO) ", %0," __xstr(pico_regs) :: "r"(ptr)); ++#define PICO_LDCM_D(ptr, pico_regs...) \ ++ asm volatile ("ldcm.d\tcp" __xstr(PICO_CPNO) ", %0," __xstr(pico_regs) :: "r"(ptr)); ++ ++#define PICO_LDCM_W_INC(ptr, pico_regs...) \ ++ asm volatile ("ldcm.w\tcp" __xstr(PICO_CPNO) ", %0++," __xstr(pico_regs) : "+r"(ptr)); ++#define PICO_LDCM_D_INC(ptr, pico_regs...) \ ++ asm volatile ("ldcm.d\tcp" __xstr(PICO_CPNO) ", %0++," __xstr(pico_regs) : "+r"(ptr)); ++ ++#define PICO_OP(op, dst_addr, addr0, addr1, addr2) \ ++ __builtin_cop(PICO_CPNO, addr0, addr1, addr2, op | dst_addr); ++ ++static inline void set_pico_config(struct pico_config_t *config){ ++ PICO_LDCM_D(config, ++ PICO_REGVECT_COEFF0_A, PICO_REGVECT_COEFF0_B, ++ PICO_REGVECT_COEFF1_A, PICO_REGVECT_COEFF1_B, ++ PICO_REGVECT_COEFF2_A, PICO_REGVECT_COEFF2_B, ++ PICO_REGVECT_VMU0_OUT, PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT, PICO_REGVECT_CONFIG); ++} ++ ++static inline void get_pico_config(struct pico_config_t *config){ ++ PICO_STCM_D(config, ++ PICO_REGVECT_COEFF0_A, PICO_REGVECT_COEFF0_B, ++ PICO_REGVECT_COEFF1_A, PICO_REGVECT_COEFF1_B, ++ PICO_REGVECT_COEFF2_A, PICO_REGVECT_COEFF2_B, ++ PICO_REGVECT_VMU0_OUT, PICO_REGVECT_VMU1_OUT, ++ PICO_REGVECT_VMU2_OUT, PICO_REGVECT_CONFIG); ++} ++ ++static inline void dump_pico_config(){ ++ struct pico_config_t pico_config; ++ char *input_mode, *output_mode; ++ get_pico_config(&pico_config); ++ ++ ++ av_log(NULL, AV_LOG_INFO, "Dumping pico configuration:\n\n"); ++ av_log(NULL, AV_LOG_INFO, "\tcoeff_frac_bits = %d\n", pico_config.coeff_frac_bits); ++ av_log(NULL, AV_LOG_INFO, "\toffset_frac_bits = %d\n", pico_config.offset_frac_bits); ++ ++ switch ( pico_config.input_mode ){ ++ case PICO_TRANSFORMATION_MODE: ++ input_mode = "Transformation Mode"; ++ break; ++ case PICO_HOR_FILTER_MODE: ++ input_mode = "Horisontal Filter Mode"; ++ break; ++ case PICO_VERT_FILTER_MODE: ++ input_mode = "Vertical Filter Mode"; ++ break; ++ default: ++ input_mode = "Unknown Mode!!"; ++ break; ++ } ++ av_log(NULL, AV_LOG_INFO, "\tinput_mode = %s\n", input_mode); ++ ++ switch ( pico_config.output_mode ){ ++ case PICO_PLANAR_MODE: ++ output_mode = "Planar Mode"; ++ break; ++ case PICO_PACKED_MODE: ++ output_mode = "Packed Mode"; ++ break; ++ default: ++ output_mode = "Unknown Mode!!"; ++ break; ++ } ++ ++ av_log(NULL, AV_LOG_INFO, "\toutput_mode = %s\n", output_mode); ++ ++ av_log(NULL, AV_LOG_INFO, "\tCoeff0_0 = %f\n", (float)pico_config.coeff0_0/(float)(1 << pico_config.coeff_frac_bits)); ++ av_log(NULL, AV_LOG_INFO, "\tCoeff0_1 = %f\n", (float)pico_config.coeff0_1/(float)(1 << pico_config.coeff_frac_bits)); ++ av_log(NULL, AV_LOG_INFO, "\tCoeff0_2 = %f\n", (float)pico_config.coeff0_2/(float)(1 << pico_config.coeff_frac_bits)); ++ av_log(NULL, AV_LOG_INFO, "\tCoeff0_3 = %f\n", (float)pico_config.coeff0_3/(float)(1 << pico_config.offset_frac_bits)); ++ ++ av_log(NULL, AV_LOG_INFO, "\tCoeff1_0 = %f\n", (float)pico_config.coeff1_0/(float)(1 << pico_config.coeff_frac_bits)); ++ av_log(NULL, AV_LOG_INFO, "\tCoeff1_1 = %f\n", (float)pico_config.coeff1_1/(float)(1 << pico_config.coeff_frac_bits)); ++ av_log(NULL, AV_LOG_INFO, "\tCoeff1_2 = %f\n", (float)pico_config.coeff1_2/(float)(1 << pico_config.coeff_frac_bits)); ++ av_log(NULL, AV_LOG_INFO, "\tCoeff1_3 = %f\n", (float)pico_config.coeff1_3/(float)(1 << pico_config.offset_frac_bits)); ++ ++ av_log(NULL, AV_LOG_INFO, "\tCoeff2_0 = %f\n", (float)pico_config.coeff2_0/(float)(1 << pico_config.coeff_frac_bits)); ++ av_log(NULL, AV_LOG_INFO, "\tCoeff2_1 = %f\n", (float)pico_config.coeff2_1/(float)(1 << pico_config.coeff_frac_bits)); ++ av_log(NULL, AV_LOG_INFO, "\tCoeff2_2 = %f\n", (float)pico_config.coeff2_2/(float)(1 << pico_config.coeff_frac_bits)); ++ av_log(NULL, AV_LOG_INFO, "\tCoeff2_3 = %f\n", (float)pico_config.coeff2_3/(float)(1 << pico_config.offset_frac_bits)); ++} ++ ++ ++ ++#endif ++ +diff --git a/libavcodec/bitstream.h b/libavcodec/bitstream.h +index 26b4f8d..1f8fabf 100644 +--- a/libavcodec/bitstream.h ++++ b/libavcodec/bitstream.h +@@ -171,7 +171,7 @@ typedef struct RL_VLC_ELEM { + #endif + + /* used to avoid missaligned exceptions on some archs (alpha, ...) */ +-#if defined(ARCH_X86) || defined(ARCH_X86_64) ++#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_AVR32) + # define unaligned16(a) (*(const uint16_t*)(a)) + # define unaligned32(a) (*(const uint32_t*)(a)) + # define unaligned64(a) (*(const uint64_t*)(a)) +@@ -813,6 +813,44 @@ void free_vlc(VLC *vlc); + * if the vlc code is invalid and max_depth>1 than the number of bits removed + * is undefined + */ ++ ++#if defined(ARCH_AVR32) ++#define GET_VLC(code, name, gb, table, bits, max_depth)\ ++{\ ++ int n, index, nb_bits;\ ++ union { VLC_TYPE vlc[2];\ ++ uint32_t u32; } table_elem;\ ++\ ++ index= SHOW_UBITS(name, gb, bits);\ ++ table_elem.u32 = unaligned32(&table[index]); \ ++ code = table_elem.vlc[0];\ ++ n = table_elem.vlc[1];\ ++\ ++ if(max_depth > 1 && n < 0 ){\ ++ LAST_SKIP_BITS(name, gb, bits)\ ++ UPDATE_CACHE(name, gb)\ ++\ ++ nb_bits = -n;\ ++\ ++ index= SHOW_UBITS(name, gb, nb_bits) + code;\ ++ table_elem.u32 = unaligned32(&table[index]); \ ++ code = table_elem.vlc[0];\ ++ n = table_elem.vlc[1];\ ++ if(max_depth > 2 && n < 0){\ ++ LAST_SKIP_BITS(name, gb, nb_bits)\ ++ UPDATE_CACHE(name, gb)\ ++\ ++ nb_bits = -n;\ ++\ ++ index= SHOW_UBITS(name, gb, nb_bits) + code;\ ++ code = table[index][0];\ ++ n = table[index][1];\ ++ }\ ++ }\ ++ SKIP_BITS(name, gb, n)\ ++} ++ ++#else + #define GET_VLC(code, name, gb, table, bits, max_depth)\ + {\ + int n, index, nb_bits;\ +@@ -821,7 +859,7 @@ void free_vlc(VLC *vlc); + code = table[index][0];\ + n = table[index][1];\ + \ +- if(max_depth > 1 && n < 0){\ ++ if(max_depth > 1 && n < 0 ){\ + LAST_SKIP_BITS(name, gb, bits)\ + UPDATE_CACHE(name, gb)\ + \ +@@ -843,7 +881,38 @@ void free_vlc(VLC *vlc); + }\ + SKIP_BITS(name, gb, n)\ + } ++#endif + ++#if defined(ARCH_AVR32) ++#define GET_RL_VLC(level, run, name, gb, table, bits, max_depth, need_update)\ ++{\ ++ int n, index, nb_bits;\ ++ union { RL_VLC_ELEM vlc;\ ++ uint32_t u32; } table_elem;\ ++\ ++ index= SHOW_UBITS(name, gb, bits);\ ++ table_elem.u32 = unaligned32(&table[index]); \ ++ level = table_elem.vlc.level;\ ++ n = table_elem.vlc.len;\ ++\ ++ if(max_depth > 1 && n < 0 ){\ ++ SKIP_BITS(name, gb, bits)\ ++ if(need_update){\ ++ UPDATE_CACHE(name, gb)\ ++ }\ ++\ ++ nb_bits = -n;\ ++\ ++ index= SHOW_UBITS(name, gb, nb_bits) + level;\ ++ table_elem.u32 = unaligned32(&table[index]); \ ++ level = table_elem.vlc.level;\ ++ n = table_elem.vlc.len;\ ++ }\ ++ run= table_elem.vlc.run;\ ++ SKIP_BITS(name, gb, n)\ ++} ++ ++#else + #define GET_RL_VLC(level, run, name, gb, table, bits, max_depth, need_update)\ + {\ + int n, index, nb_bits;\ +@@ -852,7 +921,7 @@ void free_vlc(VLC *vlc); + level = table[index].level;\ + n = table[index].len;\ + \ +- if(max_depth > 1 && n < 0){\ ++ if(max_depth > 1 && n < 0 ){\ + SKIP_BITS(name, gb, bits)\ + if(need_update){\ + UPDATE_CACHE(name, gb)\ +@@ -867,7 +936,7 @@ void free_vlc(VLC *vlc); + run= table[index].run;\ + SKIP_BITS(name, gb, n)\ + } +- ++#endif + + /** + * parses a vlc code, faster then get_vlc() +diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c +index 56c42b9..8fc10c6 100644 +--- a/libavcodec/dsputil.c ++++ b/libavcodec/dsputil.c +@@ -4197,6 +4197,9 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) + #ifdef ARCH_BFIN + dsputil_init_bfin(c,avctx); + #endif ++#ifdef ARCH_AVR32 ++ dsputil_init_avr32(c,avctx); ++#endif + + for(i=0; i<64; i++){ + if(!c->put_2tap_qpel_pixels_tab[0][i]) +diff --git a/libavcodec/h264.c b/libavcodec/h264.c +index 865e80a..8f7c3f1 100644 +--- a/libavcodec/h264.c ++++ b/libavcodec/h264.c +@@ -3258,7 +3258,12 @@ static void free_tables(H264Context *h){ + + static void init_dequant8_coeff_table(H264Context *h){ + int i,q,x; ++#ifdef ARCH_AVR32 ++ const int transpose = 0; ++#else + const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly ++#endif ++ + h->dequant8_coeff[0] = h->dequant8_buffer[0]; + h->dequant8_coeff[1] = h->dequant8_buffer[1]; + +@@ -3281,7 +3286,13 @@ static void init_dequant8_coeff_table(H264Context *h){ + + static void init_dequant4_coeff_table(H264Context *h){ + int i,j,q,x; ++ // Yes this is ugly as hell.... ++#ifdef ARCH_AVR32 ++ const int transpose = 0; ++#else + const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly ++#endif ++ + for(i=0; i<6; i++ ){ + h->dequant4_coeff[i] = h->dequant4_buffer[i]; + for(j=0; j<i; j++){ +@@ -4663,7 +4674,11 @@ static int decode_slice_header(H264Context *h){ + if (MPV_common_init(s) < 0) + return -1; + ++#ifdef ARCH_AVR32 ++ if ( 1 ){ ++#else + if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly ++#endif + memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t)); + memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t)); + }else{ +diff --git a/libavutil/common.h b/libavutil/common.h +index 3ae5971..7e52b90 100644 +--- a/libavutil/common.h ++++ b/libavutil/common.h +@@ -283,23 +283,39 @@ static inline int mid_pred(int a, int b, int c) + * @param amax maximum value of the clip range + * @return cliped value + */ ++#if defined(ARCH_AVR32) ++#define clip(a, amin, amax) \ ++ ({ int __tmp__; \ ++ asm ("min\t%0, %1, %2\n" \ ++ "max\t%0, %0, %3\n" \ ++ : "=&r"(__tmp__) : "r"(a), "r"(amax), "r"(amin)); \ ++ __tmp__; }) ++#else + static inline int clip(int a, int amin, int amax) + { + if (a < amin) return amin; + else if (a > amax) return amax; + else return a; + } ++#endif + + /** + * clip a signed integer value into the 0-255 range + * @param a value to clip + * @return cliped value + */ ++#if defined(ARCH_AVR32) ++#define clip_uint8(a) \ ++ ({ int __tmp__ = a; \ ++ asm ("satu\t%0 >> 0, 8" : "+r"(__tmp__)); \ ++ __tmp__; }) ++#else + static inline uint8_t clip_uint8(int a) + { + if (a&(~255)) return (-a)>>31; + else return a; + } ++#endif + + /* math */ + int64_t ff_gcd(int64_t a, int64_t b); +diff --git a/libavutil/internal.h b/libavutil/internal.h +index 285d304..a8b0718 100644 +--- a/libavutil/internal.h ++++ b/libavutil/internal.h +@@ -210,6 +210,15 @@ if((y)<(x)){\ + }\ + } + ++/* XXX: Hack for uclibc which declares lrintf but does not implement it... */ ++#ifdef ARCH_AVR32 ++#undef HAVE_LRINTF ++#define HAVE_LRINTF 1 ++#define lrintf(x) rint(x) ++#define llrint(x) (long long)rint(x) ++#endif ++ ++ + #ifndef HAVE_LRINTF + /* XXX: add ISOC specific test to avoid specific BSD testing. */ + /* better than nothing implementation. */ +diff --git a/libfaad2/common.h b/libfaad2/common.h +index f809042..6c5fb21 100644 +--- a/libfaad2/common.h ++++ b/libfaad2/common.h +@@ -67,7 +67,7 @@ extern "C" { + /* Use if target platform has address generators with autoincrement */ + //#define PREFER_POINTERS + +-#if defined(_WIN32_WCE) || defined(__arm__) ++#if defined(_WIN32_WCE) || defined(__arm__) || defined(__avr32__) + #define FIXED_POINT + #endif + +diff --git a/libmpcodecs/ad_libmad.c b/libmpcodecs/ad_libmad.c +index 076359a..51b77fe 100644 +--- a/libmpcodecs/ad_libmad.c ++++ b/libmpcodecs/ad_libmad.c +@@ -86,6 +86,11 @@ static int init(sh_audio_t *sh){ + sh->channels=(this->frame.header.mode == MAD_MODE_SINGLE_CHANNEL) ? 1 : 2; + sh->samplerate=this->frame.header.samplerate; + sh->i_bps=this->frame.header.bitrate/8; ++#ifdef WORDS_BIGENDIAN ++ sh->sample_format = AF_FORMAT_S16_BE; ++#else ++ sh->sample_format = AF_FORMAT_S16_LE; ++#endif + sh->samplesize=2; + + return 1; +diff --git a/libswscale/pico-avr32.h b/libswscale/pico-avr32.h +new file mode 100644 +index 0000000..7ac6200 +--- /dev/null ++++ b/libswscale/pico-avr32.h +@@ -0,0 +1,137 @@ ++/* ++ * Copyright (c) 2007 Atmel Corporation. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. The name of ATMEL may not be used to endorse or promote products ++ * derived from this software without specific prior written ++ * permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY ATMEL ``AS IS'' AND ANY EXPRESS OR ++ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ++ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE EXPRESSLY AND SPECIFICALLY DISCLAIMED. IN NO EVENT SHALL ATMEL ++ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, ++ * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, ++ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR ++ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY ++ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE ++ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH ++ * DAMAGE. ++ */ ++#ifndef __PICO_H__ ++#define __PICO_H__ ++ ++/* Coprocessor Number */ ++#define PICO_CPNO 1 ++ ++/* Pixel Coprocessor Register file */ ++#define PICO_REGVECT_INPIX2 cr0 ++#define PICO_REGVECT_INPIX1 cr1 ++#define PICO_REGVECT_INPIX0 cr2 ++#define PICO_REGVECT_OUTPIX2 cr3 ++#define PICO_REGVECT_OUTPIX1 cr4 ++#define PICO_REGVECT_OUTPIX0 cr5 ++#define PICO_REGVECT_COEFF0_A cr6 ++#define PICO_REGVECT_COEFF0_B cr7 ++#define PICO_REGVECT_COEFF1_A cr8 ++#define PICO_REGVECT_COEFF1_B cr9 ++#define PICO_REGVECT_COEFF2_A cr10 ++#define PICO_REGVECT_COEFF2_B cr11 ++#define PICO_REGVECT_VMU0_OUT cr12 ++#define PICO_REGVECT_VMU1_OUT cr13 ++#define PICO_REGVECT_VMU2_OUT cr14 ++#define PICO_REGVECT_CONFIG cr15 ++ ++#define PICO_INPIX2 0 ++#define PICO_INPIX1 1 ++#define PICO_INPIX0 2 ++#define PICO_OUTPIX2 3 ++#define PICO_OUTPIX1 4 ++#define PICO_OUTPIX0 5 ++#define PICO_COEFF0_A 6 ++#define PICO_COEFF0_B 7 ++#define PICO_COEFF1_A 8 ++#define PICO_COEFF1_B 9 ++#define PICO_COEFF2_A 10 ++#define PICO_COEFF2_B 11 ++#define PICO_VMU0_OUT 12 ++#define PICO_VMU1_OUT 13 ++#define PICO_VMU2_OUT 14 ++#define PICO_CONFIG 15 ++ ++/* Config Register */ ++#define PICO_COEFF_FRAC_BITS 0 ++#define PICO_COEFF_FRAC_BITS_WIDTH 4 ++#define PICO_OFFSET_FRAC_BITS 4 ++#define PICO_OFFSET_FRAC_BITS_WIDTH 4 ++#define PICO_INPUT_MODE 8 ++#define PICO_INPUT_MODE_WIDTH 2 ++#define PICO_OUTPUT_MODE 10 ++ ++#define PICO_TRANSFORMATION_MODE 0 ++#define PICO_HOR_FILTER_MODE 1 ++#define PICO_VERT_FILTER_MODE 2 ++ ++#define PICO_PLANAR_MODE 1 ++#define PICO_PACKED_MODE 0 ++ ++/* Bits in coefficients */ ++#define PICO_COEFF_BITS 12 ++ ++/* Operation bits */ ++#define PICO_USE_ACC (1 << 2) ++#define PICO_SINGLE_VECTOR (1 << 3) ++ ++ ++#define __str(x...) #x ++#define __xstr(x...) __str(x) ++ ++#define PICO_PUT_W(pico_reg, x) \ ++ __builtin_mvrc_w(PICO_CPNO, pico_reg, x); ++#define PICO_GET_W(pico_reg) \ ++ __builtin_mvcr_w(PICO_CPNO, pico_reg) ++ ++#define PICO_PUT_D(pico_reg, x) \ ++ __builtin_mvrc_d(PICO_CPNO, pico_reg, x); ++#define PICO_GET_D(pico_reg) \ ++ __builtin_mvcr_d(PICO_CPNO, pico_reg) ++ ++ ++#define PICO_STCM_W(ptr, pico_regs...) \ ++ asm volatile ("stcm.w\tcp" __xstr(PICO_CPNO) ", %0," __xstr(pico_regs) :: "r"(ptr)); ++#define PICO_STCM_D(ptr, pico_regs...) \ ++ asm volatile ("stcm.d\tcp" __xstr(PICO_CPNO) ", %0," __xstr(pico_regs) :: "r"(ptr)); ++ ++#define PICO_STCM_W_DEC(ptr, pico_regs...) \ ++ asm volatile ("stcm.w\tcp" __xstr(PICO_CPNO) ", --%0," __xstr(pico_regs) : "+r"(ptr)); ++#define PICO_STCM_D_DEC(ptr, pico_regs...) \ ++ asm volatile ("stcm.d\tcp" __xstr(PICO_CPNO) ", --%0," __xstr(pico_regs) : "+r"(ptr)); ++ ++#define PICO_LDCM_W(ptr, pico_regs...) \ ++ asm volatile ("ldcm.w\tcp" __xstr(PICO_CPNO) ", %0," __xstr(pico_regs) :: "r"(ptr)); ++#define PICO_LDCM_D(ptr, pico_regs...) \ ++ asm volatile ("ldcm.d\tcp" __xstr(PICO_CPNO) ", %0," __xstr(pico_regs) :: "r"(ptr)); ++ ++#define PICO_LDCM_W_INC(ptr, pico_regs...) \ ++ asm volatile ("ldcm.w\tcp" __xstr(PICO_CPNO) ", %0++," __xstr(pico_regs) : "+r"(ptr)); ++#define PICO_LDCM_D_INC(ptr, pico_regs...) \ ++ asm volatile ("ldcm.d\tcp" __xstr(PICO_CPNO) ", %0++," __xstr(pico_regs) : "+r"(ptr)); ++ ++#define PICO_OP(op, dst_addr, addr0, addr1, addr2) \ ++ __builtin_cop(PICO_CPNO, addr0, addr1, addr2, op | dst_addr); ++ ++ ++#endif ++ +diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h +index ecd28f5..3221d0c 100644 +--- a/libswscale/swscale_internal.h ++++ b/libswscale/swscale_internal.h +@@ -173,7 +173,7 @@ typedef struct SwsContext{ + SwsFunc yuv2rgb_get_func_ptr (SwsContext *c); + int yuv2rgb_c_init_tables (SwsContext *c, const int inv_table[4], int fullRange, int brightness, int contrast, int saturation); + +-char *sws_format_name(int format); ++char *sws_format_name(enum PixelFormat format); + + //FIXME replace this with something faster + #define isPlanarYUV(x) ((x)==PIX_FMT_YUV410P || (x)==PIX_FMT_YUV420P \ +diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c +index 71759bc..fa83985 100644 +--- a/libswscale/yuv2rgb.c ++++ b/libswscale/yuv2rgb.c +@@ -44,6 +44,10 @@ + #include "yuv2rgb_mlib.c" + #endif + ++#ifdef ARCH_AVR32 ++#include "yuv2rgb_avr32.c" ++#endif ++ + #define DITHER1XBPP // only for mmx + + const uint8_t __attribute__((aligned(8))) dither_2x2_4[2][8]={ +@@ -601,6 +605,12 @@ SwsFunc yuv2rgb_get_func_ptr (SwsContext *c) + if(t) return t; + } + #endif ++#ifdef ARCH_AVR32 ++ { ++ SwsFunc t= yuv2rgb_init_avr32(c); ++ if(t) return t; ++ } ++#endif + #ifdef HAVE_ALTIVEC + if (c->flags & SWS_CPU_CAPS_ALTIVEC) + { +@@ -678,6 +688,10 @@ int yuv2rgb_c_init_tables (SwsContext *c, const int inv_table[4], int fullRange, + //printf("%lld %lld %lld %lld %lld\n", cy, crv, cbu, cgu, cgv); + oy -= 256*brightness; + ++#ifdef ARCH_AVR32 ++ yuv2rgb_c_init_tables_avr32 (c, inv_table, fullRange, brightness, contrast, saturation); ++#endif ++ + for (i = 0; i < 1024; i++) { + int j; + +diff --git a/libswscale/yuv2rgb_avr32.c b/libswscale/yuv2rgb_avr32.c +new file mode 100644 +index 0000000..4a8341e +--- /dev/null ++++ b/libswscale/yuv2rgb_avr32.c +@@ -0,0 +1,416 @@ ++/* ++ * Copyright (c) 2007 Atmel Corporation. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. The name of ATMEL may not be used to endorse or promote products ++ * derived from this software without specific prior written ++ * permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY ATMEL ``AS IS'' AND ANY EXPRESS OR ++ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ++ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE EXPRESSLY AND SPECIFICALLY DISCLAIMED. IN NO EVENT SHALL ATMEL ++ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, ++ * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, ++ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR ++ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY ++ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE ++ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH ++ * DAMAGE. ++ */ ++#include "pico-avr32.h" ++ ++ ++#define RGB(uv_part) \ ++ __asm__ volatile ( \ ++ "ld.w\t%0, %3[%7:" uv_part " << 2]\n\t" /* tmp = c->table_gV[V] */ \ ++ "ld.w\t%1, %4[%8:" uv_part " << 2]\n\t" /* g = c->table_gU[U] */ \ ++ "ld.w\t%2, %5[%8:" uv_part " << 2]\n\t" /* b = c->table_bU[U] */ \ ++ "add\t%1, %0\n\t" /* g += tmp */\ ++ "ld.w\t%0, %6[%7:" uv_part " << 2]" /* r = c->table_rV[V] */ \ ++ : "=&r" (r), "=&r" (g), "=&r" (b) \ ++ : "r" (&c->table_gV[0]), "r" (&c->table_gU[0]),"r" (&c->table_bU[0]), \ ++ "r" (&c->table_rV[0]), "r" (V), "r" (U)); ++ ++ ++#undef YUV2RGB1 ++#define YUV2RGB1(dst, src, y, idx) \ ++ { int tmp2; __asm__ volatile ( \ ++ "ld.ub\t%0, %3[2*%8]\n\t" /* Y = ((uint32_t*)py_1)[0] */ \ ++ "ld.ub\t%1, %4[%0]\n\t" /* tmp = r[(Y >> 24) & 0xFF] */ \ ++ "ld.ub\t%2, %5[%0]\n\t" /* tmp = g[(Y >> 24) & 0xFF] */ \ ++ "st.b\t%7[6*%8 + 0], %1\n\t" /* dst_1[2] = tmp; */ \ ++ "st.b\t%7[6*%8 + 1], %2\n\t" /* dst_1[1] = tmp; */ \ ++ "ld.ub\t%1, %6[%0]\n\t" /* tmp = b[(Y >> 24) & 0xFF] */ \ ++ "ld.ub\t%0, %3[2*%8 + 1]\n\t" /* Y = ((uint32_t*)py_1)[0] */ \ ++ "st.b\t%7[6*%8 + 2], %1\n\t" /* dst_1[0] = tmp; */ \ ++ "ld.ub\t%1, %4[%0]\n\t" /* tmp = r[(Y >> 16) & 0xFF] */ \ ++ "ld.ub\t%2, %5[%0]\n\t" /* tmp = g[(Y >> 16) & 0xFF] */ \ ++ "st.b\t%7[6*%8 + 3], %1\n\t" /* dst_1[5] = tmp; */ \ ++ "ld.ub\t%1, %6[%0]\n\t" /* tmp = b[(Y >> 16) & 0xFF] */ \ ++ "st.b\t%7[6*%8 + 4], %2\n\t" /* dst_1[4] = tmp; */ \ ++ "st.b\t%7[6*%8 + 5], %1" /* dst_1[3] = tmp; */ \ ++ : "=&r" (y), "=&r" (tmp), "=&r" (tmp2) \ ++ : "r" (src), "r" (r), "r" (g), "r" (b), "r" (dst), "i" (idx)); } ++ ++#undef YUV2RGB2 ++#define YUV2RGB2(dst, src, y, idx) \ ++ { int tmp2; __asm__ volatile ( \ ++ "ld.ub\t%0, %3[2*%8]\n\t" /* Y = ((uint32_t*)py_1)[0] */ \ ++ "ld.ub\t%1, %4[%0]\n\t" /* tmp = r[(Y >> 24) & 0xFF] */ \ ++ "ld.ub\t%2, %5[%0]\n\t" /* tmp = g[(Y >> 24) & 0xFF] */ \ ++ "st.b\t%7[6*%8 + 0], %1\n\t" /* dst_1[2] = tmp; */ \ ++ "st.b\t%7[6*%8 + 1], %2\n\t" /* dst_1[1] = tmp; */ \ ++ "ld.ub\t%1, %6[%0]\n\t" /* tmp = b[(Y >> 24) & 0xFF] */ \ ++ "ld.ub\t%0, %3[2*%8 + 1]\n\t" /* Y = ((uint32_t*)py_1)[0] */ \ ++ "st.b\t%7[6*%8 + 2], %1\n\t" /* dst_1[0] = tmp; */ \ ++ "ld.ub\t%1, %4[%0]\n\t" /* tmp = r[(Y >> 16) & 0xFF] */ \ ++ "ld.ub\t%2, %5[%0]\n\t" /* tmp = g[(Y >> 16) & 0xFF] */ \ ++ "st.b\t%7[6*%8 + 3], %1\n\t" /* dst_1[5] = tmp; */ \ ++ "ld.ub\t%1, %6[%0]\n\t" /* tmp = b[(Y >> 16) & 0xFF] */ \ ++ "st.b\t%7[6*%8 + 4], %2\n\t" /* dst_1[4] = tmp; */ \ ++ "st.b\t%7[6*%8 + 5], %1" /* dst_1[3] = tmp; */ \ ++ : "=&r" (y), "=&r" (tmp), "=&r" (tmp2) \ ++ : "r" (src), "r" (r), "r" (g), "r" (b), "r" (dst), "i" (idx)); } ++ ++ ++#undef YUV2BGR1 ++#define YUV2BGR1(dst, src, y, idx) \ ++ { int tmp2; __asm__ volatile ( \ ++ "ld.ub\t%0, %3[2*%8]\n\t" /* Y = ((uint32_t*)py_1)[0] */ \ ++ "ld.ub\t%1, %4[%0]\n\t" /* tmp = r[(Y >> 24) & 0xFF] */ \ ++ "ld.ub\t%2, %5[%0]\n\t" /* tmp = g[(Y >> 24) & 0xFF] */ \ ++ "st.b\t%7[6*%8 + 2], %1\n\t" /* dst_1[2] = tmp; */ \ ++ "st.b\t%7[6*%8 + 1], %2\n\t" /* dst_1[1] = tmp; */ \ ++ "ld.ub\t%1, %6[%0]\n\t" /* tmp = b[(Y >> 24) & 0xFF] */ \ ++ "ld.ub\t%0, %3[2*%8 + 1]\n\t" /* Y = ((uint32_t*)py_1)[0] */ \ ++ "st.b\t%7[6*%8 + 0], %1\n\t" /* dst_1[0] = tmp; */ \ ++ "ld.ub\t%1, %4[%0]\n\t" /* tmp = r[(Y >> 16) & 0xFF] */ \ ++ "ld.ub\t%2, %5[%0]\n\t" /* tmp = g[(Y >> 16) & 0xFF] */ \ ++ "st.b\t%7[6*%8 + 5], %1\n\t" /* dst_1[5] = tmp; */ \ ++ "ld.ub\t%1, %6[%0]\n\t" /* tmp = b[(Y >> 16) & 0xFF] */ \ ++ "st.b\t%7[6*%8 + 4], %2\n\t" /* dst_1[4] = tmp; */ \ ++ "st.b\t%7[6*%8 + 3], %1" /* dst_1[3] = tmp; */ \ ++ : "=&r" (y), "=&r" (tmp), "=&r" (tmp2) \ ++ : "r" (src), "r" (r), "r" (g), "r" (b), "r" (dst), "i" (idx)); } ++ ++#undef YUV2BGR2 ++#define YUV2BGR2(dst, src, y, idx) \ ++ { int tmp2; __asm__ volatile ( \ ++ "ld.ub\t%0, %3[2*%8]\n\t" /* Y = ((uint32_t*)py_1)[0] */ \ ++ "ld.ub\t%1, %4[%0]\n\t" /* tmp = r[(Y >> 24) & 0xFF] */ \ ++ "ld.ub\t%2, %5[%0]\n\t" /* tmp = g[(Y >> 24) & 0xFF] */ \ ++ "st.b\t%7[6*%8 + 2], %1\n\t" /* dst_1[2] = tmp; */ \ ++ "st.b\t%7[6*%8 + 1], %2\n\t" /* dst_1[1] = tmp; */ \ ++ "ld.ub\t%1, %6[%0]\n\t" /* tmp = b[(Y >> 24) & 0xFF] */ \ ++ "ld.ub\t%0, %3[2*%8 + 1]\n\t" /* Y = ((uint32_t*)py_1)[0] */ \ ++ "st.b\t%7[6*%8 + 0], %1\n\t" /* dst_1[0] = tmp; */ \ ++ "ld.ub\t%1, %4[%0]\n\t" /* tmp = r[(Y >> 16) & 0xFF] */ \ ++ "ld.ub\t%2, %5[%0]\n\t" /* tmp = g[(Y >> 16) & 0xFF] */ \ ++ "st.b\t%7[6*%8 + 5], %1\n\t" /* dst_1[5] = tmp; */ \ ++ "ld.ub\t%1, %6[%0]\n\t" /* tmp = b[(Y >> 16) & 0xFF] */ \ ++ "st.b\t%7[6*%8 + 4], %2\n\t" /* dst_1[4] = tmp; */ \ ++ "st.b\t%7[6*%8 + 3], %1" /* dst_1[3] = tmp; */ \ ++ : "=&r" (y), "=&r" (tmp), "=&r" (tmp2) \ ++ : "r" (src), "r" (r), "r" (g), "r" (b), "r" (dst), "i" (idx)); } ++ ++ ++ ++int yuv2bgr24_avr32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, ++ int srcSliceH, uint8_t* dst[], int dstStride[]){ ++ int y; ++ ++ if(c->srcFormat == PIX_FMT_YUV422P){ ++ srcStride[1] *= 2; ++ srcStride[2] *= 2; ++ } ++ ++ ++ for(y=0; y<srcSliceH; y+=2){ ++ uint8_t *dst_1= (uint8_t*)(dst[0] + (y+srcSliceY )*dstStride[0]); ++ uint8_t *dst_2= (uint8_t*)(dst[0] + (y+srcSliceY+1)*dstStride[0]); ++ uint32_t *r, *g, *b; ++ uint8_t *py_1= src[0] + y*srcStride[0]; ++ uint8_t *py_2= py_1 + srcStride[0]; ++ uint8_t *pu= src[1] + (y>>1)*srcStride[1]; ++ uint8_t *pv= src[2] + (y>>1)*srcStride[2]; ++ unsigned int h_size= c->dstW>>3; ++ while (h_size--) { ++ uint32_t U, V, Y1, Y2, tmp; ++ U = ((uint32_t*)pu)[0]; ++ V = ((uint32_t*)pv)[0]; ++ ++ RGB("t") ++ YUV2BGR1(dst_1, py_1, Y1, 0) ++ YUV2BGR1(dst_2, py_2, Y2, 0) ++ ++ RGB("u") ++ YUV2BGR2(dst_1, py_1, Y1, 1) ++ YUV2BGR2(dst_2, py_2, Y2, 1) ++ ++ RGB("l") ++ YUV2BGR1(dst_1, py_1, Y1, 2) ++ YUV2BGR1(dst_2, py_2, Y2, 2) ++ ++ RGB("b") ++ YUV2BGR2(dst_1, py_1, Y1, 3) ++ YUV2BGR2(dst_2, py_2, Y2, 3) ++ ++ ++ ++ pu += 4; ++ pv += 4; ++ py_1 += 8; ++ py_2 += 8; ++ dst_1 += 24; ++ dst_2 += 24; ++ } ++ } ++ return srcSliceH; ++} ++ ++ ++ ++static int yuv2rgb24_avr32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, ++ int srcSliceH, uint8_t* dst[], int dstStride[]){ ++ int y; ++ ++ if(c->srcFormat == PIX_FMT_YUV422P){ ++ srcStride[1] *= 2; ++ srcStride[2] *= 2; ++ } ++ for(y=0; y<srcSliceH; y+=2){ ++ uint8_t *dst_1= (uint8_t*)(dst[0] + (y+srcSliceY )*dstStride[0]); ++ uint8_t *dst_2= (uint8_t*)(dst[0] + (y+srcSliceY+1)*dstStride[0]); ++ uint8_t *r, *g, *b; ++ uint8_t *py_1= src[0] + y*srcStride[0]; ++ uint8_t *py_2= py_1 + srcStride[0]; ++ uint8_t *pu= src[1] + (y>>1)*srcStride[1]; ++ uint8_t *pv= src[2] + (y>>1)*srcStride[2]; ++ unsigned int h_size= c->dstW>>3; ++ while (h_size--) { ++ uint32_t U, V, Y1, Y2, tmp; ++ U = ((uint32_t*)pu)[0]; ++ V = ((uint32_t*)pv)[0]; ++ ++ RGB("t") ++ YUV2RGB1(dst_1, py_1, Y1, 0) ++ YUV2RGB1(dst_2, py_2, Y2, 0) ++ ++ RGB("u") ++ YUV2RGB2(dst_1, py_1, Y1, 1) ++ YUV2RGB2(dst_2, py_2, Y2, 1) ++ ++ RGB("l") ++ YUV2RGB1(dst_1, py_1, Y1, 2) ++ YUV2RGB1(dst_2, py_2, Y2, 2) ++ ++ RGB("b") ++ YUV2RGB2(dst_1, py_1, Y1, 3) ++ YUV2RGB2(dst_2, py_2, Y2, 3) ++ ++ pu += 4; ++ pv += 4; ++ py_1 += 8; ++ py_2 += 8; ++ dst_1 += 24; ++ dst_2 += 24; ++ } ++ } ++ return srcSliceH; ++} ++ ++#define SCALE(x, bits) (((x) + ( 1 << (bits - 1))) >> bits) ++#define COEFF_FRAC_BITS 9 ++#define OFFSET_FRAC_BITS 2 ++ ++/* Coefficients used in the pico */ ++static struct { ++ short coeff2_2; ++ short coeff2_3; ++ short coeff2_0; ++ short coeff2_1; ++ short coeff1_2; ++ short coeff1_3; ++ short coeff1_0; ++ short coeff1_1; ++ short coeff0_2; ++ short coeff0_3; ++ short coeff0_0; ++ short coeff0_1; ++} pico_coeff; ++ ++ ++static int yuv2bgr24_avr32_pico(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, ++ int srcSliceH, uint8_t* dst[], int dstStride[]){ ++ int y; ++ static int first_time = 1; ++ ++ /* Initialize pico */ ++ PICO_LDCM_D(&pico_coeff, ++ PICO_REGVECT_COEFF0_A, PICO_REGVECT_COEFF0_B, ++ PICO_REGVECT_COEFF1_A, PICO_REGVECT_COEFF1_B, ++ PICO_REGVECT_COEFF2_A, PICO_REGVECT_COEFF2_B); ++ ++ PICO_PUT_W(PICO_CONFIG, ++ (PICO_PACKED_MODE << PICO_OUTPUT_MODE ++ | PICO_TRANSFORMATION_MODE << PICO_INPUT_MODE ++ | OFFSET_FRAC_BITS << PICO_OFFSET_FRAC_BITS ++ | COEFF_FRAC_BITS << PICO_COEFF_FRAC_BITS)); ++ ++ ++ if(c->srcFormat == PIX_FMT_YUV422P){ ++ srcStride[1] *= 2; ++ srcStride[2] *= 2; ++ } ++ ++ for(y=0; y<srcSliceH; y+=2){ ++ uint8_t *dst_1= (uint8_t*)(dst[0] + (y+srcSliceY )*dstStride[0]); ++ uint8_t *dst_2= (uint8_t*)(dst[0] + (y+srcSliceY+1)*dstStride[0]); ++ uint8_t *r, *g, *b; ++ uint8_t *py_1= src[0] + y*srcStride[0]; ++ uint8_t *py_2= py_1 + srcStride[0]; ++ uint8_t *pu= src[1] + (y>>1)*srcStride[1]; ++ uint8_t *pv= src[2] + (y>>1)*srcStride[2]; ++ unsigned int h_size= c->dstW>>3; ++ int *py_1_int = (int *)py_1; ++ int *py_2_int = (int *)py_2; ++ int *pu_int = (int *)pu; ++ int *pv_int = (int *)pv; ++ while (h_size--) { ++ PICO_PUT_W(PICO_INPIX0, *py_1_int++); ++ PICO_PUT_W(PICO_INPIX1, *pu_int++); ++ PICO_PUT_W(PICO_INPIX2, *pv_int++); ++ PICO_OP(0, 0, 0, 4, 8); ++ PICO_OP(0, 1, 1, 4, 8); ++ PICO_OP(0, 2, 2, 5, 9); ++ PICO_OP(0, 3, 3, 5, 9); ++ PICO_PUT_W(PICO_INPIX0, *py_1_int++); ++ PICO_STCM_W(dst_1, PICO_REGVECT_OUTPIX2, PICO_REGVECT_OUTPIX1, PICO_REGVECT_OUTPIX0); ++ PICO_OP(0, 0, 0, 6, 10); ++ PICO_OP(0, 1, 1, 6, 10); ++ PICO_OP(0, 2, 2, 7, 11); ++ PICO_OP(0, 3, 3, 7, 11); ++ PICO_PUT_W(PICO_INPIX0, *py_2_int++); ++ PICO_STCM_W(dst_1 + 12, PICO_REGVECT_OUTPIX2, PICO_REGVECT_OUTPIX1, PICO_REGVECT_OUTPIX0); ++ ++ PICO_OP(0, 0, 0, 4, 8); ++ PICO_OP(0, 1, 1, 4, 8); ++ PICO_OP(0, 2, 2, 5, 9); ++ PICO_OP(0, 3, 3, 5, 9); ++ PICO_PUT_W(PICO_INPIX0, *py_2_int++); ++ PICO_STCM_W(dst_2, PICO_REGVECT_OUTPIX2, PICO_REGVECT_OUTPIX1, PICO_REGVECT_OUTPIX0); ++ PICO_OP(0, 0, 0, 6, 10); ++ PICO_OP(0, 1, 1, 6, 10); ++ PICO_OP(0, 2, 2, 7, 11); ++ PICO_OP(0, 3, 3, 7, 11); ++ PICO_STCM_W(dst_2 + 12, PICO_REGVECT_OUTPIX2, PICO_REGVECT_OUTPIX1, PICO_REGVECT_OUTPIX0); ++ ++ dst_1 += 24; ++ dst_2 += 24; ++ } ++ } ++ return srcSliceH; ++} ++ ++extern int avr32_use_pico; ++ ++SwsFunc yuv2rgb_init_avr32 (SwsContext *c){ ++ switch(c->dstFormat){ ++ case PIX_FMT_BGR24: ++ { ++ if ( avr32_use_pico ){ ++ MSG_ERR("AVR32 BGR24: Using PICO for color space conversion\n"); ++ return yuv2bgr24_avr32_pico; ++ } else { ++ MSG_ERR("AVR32 BGR24: Using optimized color space conversion\n"); ++ return yuv2bgr24_avr32; ++ } ++ } ++ break; ++ case PIX_FMT_RGB24: ++ { ++ if ( avr32_use_pico ){ ++ MSG_ERR("AVR32 RGB24: Using PICO for color space conversion\n"); ++ return yuv2bgr24_avr32_pico; ++ } else { ++ MSG_ERR("AVR32 RGB24: Using optimized color space conversion\n"); ++ return yuv2rgb24_avr32; ++ } ++ } ++ } ++ return NULL; ++} ++ ++ ++int yuv2rgb_c_init_tables_avr32 (SwsContext *c, const int inv_table[4], int fullRange, int brightness, int contrast, int saturation){ ++ const int isRgb = (c->dstFormat == PIX_FMT_RGB24); ++ ++ int64_t crv = inv_table[0]; ++ int64_t cbu = inv_table[1]; ++ int64_t cgu = -inv_table[2]; ++ int64_t cgv = -inv_table[3]; ++ int64_t cy = 1<<16; ++ int64_t oy = 0; ++ ++ if(!fullRange){ ++ cy= (cy*255) / 219; ++ oy= 16<<16; ++ } ++ ++ cy = (cy *contrast )>>16; ++ crv= (crv*contrast * saturation)>>32; ++ cbu= (cbu*contrast * saturation)>>32; ++ cgu= (cgu*contrast * saturation)>>32; ++ cgv= (cgv*contrast * saturation)>>32; ++ ++ oy -= 256*brightness; ++ ++ pico_coeff.coeff1_0 = SCALE(cy, 16 - COEFF_FRAC_BITS); /* G <- Y */ ++ pico_coeff.coeff1_1 = SCALE(cgu, 16 - COEFF_FRAC_BITS); /* G <- U */ ++ pico_coeff.coeff1_2 = SCALE(cgv, 16 - COEFF_FRAC_BITS); /* G <- V */ ++ pico_coeff.coeff1_3 = (SCALE(-128*cgu - 128*cgv - 16*cy, 16 - OFFSET_FRAC_BITS) ++ + /*0.5*/(1 << (OFFSET_FRAC_BITS-1))); /* G offset */ ++ ++ if ( isRgb ){ ++ pico_coeff.coeff0_0 = SCALE(cy, 16 - COEFF_FRAC_BITS); /* R <- Y */ ++ pico_coeff.coeff0_1 = 0; /* R <- U */ ++ pico_coeff.coeff0_2 = SCALE(crv, 16 - COEFF_FRAC_BITS); /* R <- V */ ++ pico_coeff.coeff0_3 = (SCALE(-128*crv - 16*cy, 16 - OFFSET_FRAC_BITS) ++ + /*0.5*/(1 << (OFFSET_FRAC_BITS-1))); /* R offset */ ++ ++ pico_coeff.coeff2_0 = SCALE(cy, 16 - COEFF_FRAC_BITS); /* B <- Y */ ++ pico_coeff.coeff2_1 = SCALE(cbu, 16 - COEFF_FRAC_BITS); /* B <- U */ ++ pico_coeff.coeff2_2 = 0; /* B <- V */ ++ pico_coeff.coeff2_3 = (SCALE(-128*cbu - 16*cy, 16 - OFFSET_FRAC_BITS) ++ + /*0.5*/(1 << (OFFSET_FRAC_BITS-1)));/* B offset */ ++ } else { ++ pico_coeff.coeff2_0 = SCALE(cy, 16 - COEFF_FRAC_BITS); /* R <- Y */ ++ pico_coeff.coeff2_1 = 0; /* R <- U */ ++ pico_coeff.coeff2_2 = SCALE(crv, 16 - COEFF_FRAC_BITS); /* R <- V */ ++ pico_coeff.coeff2_3 = (SCALE(-128*crv - 16*cy, 16 - OFFSET_FRAC_BITS) ++ + /*0.5*/(1 << (OFFSET_FRAC_BITS-1))); /* R offset */ ++ ++ pico_coeff.coeff0_0 = SCALE(cy, 16 - COEFF_FRAC_BITS); /* B <- Y */ ++ pico_coeff.coeff0_1 = SCALE(cbu, 16 - COEFF_FRAC_BITS); /* B <- U */ ++ pico_coeff.coeff0_2 = 0; /* B <- V */ ++ pico_coeff.coeff0_3 = (SCALE(-128*cbu - 16*cy, 16 - OFFSET_FRAC_BITS) ++ + /*0.5*/(1 << (OFFSET_FRAC_BITS-1))); /* B offset */ ++ } ++ ++} ++ ++ ++#undef RGB +diff --git a/libvo/vo_fbdev2.c b/libvo/vo_fbdev2.c +index 053c193..7017770 100644 +--- a/libvo/vo_fbdev2.c ++++ b/libvo/vo_fbdev2.c +@@ -22,6 +22,9 @@ + #include "sub.h" + #include "mp_msg.h" + ++/* Draw directly to framebuffer */ ++#define USE_CONVERT2FB ++ + static vo_info_t info = { + "Framebuffer Device", + "fbdev2", +@@ -178,6 +181,15 @@ static int fb_preinit(int reset) + } + fb_orig_vinfo = fb_vinfo; + ++ /* Reset panning offset */ ++ fb_vinfo.yoffset = 0; ++ if (ioctl(fb_dev_fd, FBIOPAN_DISPLAY, &fb_vinfo)) { ++ mp_msg(MSGT_VO, MSGL_ERR, ++ "[fbdev2] FBIOPAN_DISPLAY failed: %s\n", ++ strerror(errno)); ++ return 0; ++ } ++ + fb_bpp = fb_vinfo.bits_per_pixel; + + /* 16 and 15 bpp is reported as 16 bpp */ +@@ -289,6 +301,10 @@ static int config(uint32_t width, uint32_t height, uint32_t d_width, + mp_msg(MSGT_VO, MSGL_ERR, "[fbdev2] Can't malloc next_frame: %s\n", strerror(errno)); + return 1; + } ++#else ++ if ((fb_line_len * fb_vinfo.yres) <= (fb_finfo.smem_len / 2) ++ && fb_vinfo.yoffset == 0) ++ center += fb_line_len * fb_vinfo.yres; + #endif + if (fs) memset(frame_buffer, '\0', fb_line_len * fb_vinfo.yres); + +@@ -299,14 +315,22 @@ static int query_format(uint32_t format) + { + // open the device, etc. + if (fb_preinit(0)) return 0; +- if ((format & IMGFMT_BGR_MASK) == IMGFMT_BGR) { ++ if ((format & IMGFMT_RGB_MASK) == IMGFMT_RGB) { + int fb_target_bpp = format & 0xff; + set_bpp(&fb_vinfo, fb_target_bpp); + fb_vinfo.xres_virtual = fb_vinfo.xres; +- fb_vinfo.yres_virtual = fb_vinfo.yres; ++ fb_vinfo.yres_virtual = fb_vinfo.yres * 2; + if (ioctl(fb_dev_fd, FBIOPUT_VSCREENINFO, &fb_vinfo)) { +- mp_msg(MSGT_VO, MSGL_ERR, "[fbdev2] Can't put VSCREENINFO: %s\n", strerror(errno)); +- return 0; ++ mp_msg(MSGT_VO, MSGL_WARN, ++ "[fbdev2] Can't double virtual y resolution: %s\n", ++ strerror(errno)); ++ fb_vinfo.yres_virtual = fb_vinfo.yres; ++ if (ioctl(fb_dev_fd, FBIOPUT_VSCREENINFO, &fb_vinfo)) { ++ mp_msg(MSGT_VO, MSGL_ERR, ++ "[fbdev2] Can't put VSCREENINFO: %s\n", ++ strerror(errno)); ++ return -1; ++ } + } + fb_pixel_size = fb_vinfo.bits_per_pixel / 8; + fb_bpp = fb_vinfo.red.length + fb_vinfo.green.length + +@@ -367,16 +391,67 @@ static void check_events(void) + + static void flip_page(void) + { +-#ifndef USE_CONVERT2FB + int i, out_offset = 0, in_offset = 0; + +- for (i = 0; i < in_height; i++) { +- memcpy(center + out_offset, next_frame + in_offset, +- in_width * fb_pixel_size); +- out_offset += fb_line_len; +- in_offset += in_width * fb_pixel_size; +- } ++#ifndef USE_CONVERT2FB ++ if (1) { ++#else ++ if (fb_vinfo.yres_virtual == fb_vinfo.yres) { + #endif ++ for (i = 0; i < in_height; i++) { ++ memcpy(center + out_offset, next_frame + in_offset, ++ in_width * fb_pixel_size); ++ out_offset += fb_line_len; ++ in_offset += in_width * fb_pixel_size; ++ } ++ } else { ++ if (fb_vinfo.yoffset == 0) { ++ fb_vinfo.yoffset += fb_vinfo.yres; ++ center -= fb_line_len * fb_vinfo.yres; ++ } else { ++ fb_vinfo.yoffset = 0; ++ center += fb_line_len * fb_vinfo.yres; ++ } ++ ++ if (ioctl(fb_dev_fd, FBIOPAN_DISPLAY, &fb_vinfo)) { ++ mp_msg(MSGT_VO, MSGL_ERR, ++ "[fbdev2] Can't FBIOPAN_DISPLAY: %s\n", ++ strerror(errno)); ++ } ++ } ++} ++ ++static uint32_t get_image(mp_image_t *mpi) ++{ ++ if(mpi->flags&MP_IMGFLAG_READABLE) ++ return VO_FALSE; // slow video ram ++ if(mpi->type==MP_IMGTYPE_STATIC) ++ return VO_FALSE; // it is not static ++ ++ if (mpi->flags & (MP_IMGFLAG_ACCEPT_STRIDE | MP_IMGFLAG_ACCEPT_WIDTH)) { ++ // we're lucky or codec accepts stride => ok, let's go! ++ ++ //YUY2 and RGB formats ++ mpi->planes[0] = center; ++ mpi->width = in_width; ++ mpi->stride[0] = fb_line_len; ++ ++ // center image ++ ++ mpi->flags |= MP_IMGFLAG_DIRECT; ++ ++ return VO_TRUE; ++ } ++ ++ return VO_FALSE; ++} ++ ++static uint32_t put_image(mp_image_t *mpi) ++{ ++ // already out? ++ if ((mpi->flags & (MP_IMGFLAG_DIRECT | MP_IMGFLAG_DRAW_CALLBACK))) ++ return VO_TRUE; ++ return VO_FALSE; + } + + static void uninit(void) +@@ -403,6 +478,10 @@ static int control(uint32_t request, void *data, ...) + switch (request) { + case VOCTRL_QUERY_FORMAT: + return query_format(*((uint32_t*)data)); ++ case VOCTRL_GET_IMAGE: ++ return get_image(data); ++ case VOCTRL_DRAW_IMAGE: ++ return put_image(data); + } + return VO_NOTIMPL; + } +diff --git a/version.sh b/version.sh +index 44b5c5d..cf22a68 100755 +--- a/version.sh ++++ b/version.sh +@@ -1,2 +1,2 @@ + #!/bin/sh +-echo "#define VERSION \"1.0rc1-$1\"" > version.h ++echo "#define VERSION \"1.0rc1.atmel.2-$1\"" > version.h diff --git a/recipes/mplayer/files/mplayer-imageon-svn.patch b/recipes/mplayer/files/mplayer-imageon-svn.patch new file mode 100644 index 0000000000..744a520b13 --- /dev/null +++ b/recipes/mplayer/files/mplayer-imageon-svn.patch @@ -0,0 +1,367 @@ + +# +# Patch managed by http://www.holgerschurig.de/patcher.html +# + +Index: trunk/configure +=================================================================== +--- trunk.orig/configure 2007-10-07 20:31:56.000000000 +0100 ++++ trunk/configure 2007-10-07 20:34:38.000000000 +0100 +@@ -545,6 +545,7 @@ + _vesa=auto + _fbdev=auto + _w100=no ++_imageon=no + _dvb=auto + _dvbhead=auto + _dxr2=auto +@@ -860,6 +861,8 @@ + --disable-fbdev) _fbdev=no ;; + --enable-w100) _w100=yes ;; + --disable-w100) _w100=no ;; ++ --enable-imageon) _imageon=yes ;; ++ --disable-imageon) _imageon=no ;; + --enable-dvb) _dvb=yes ;; + --disable-dvb) _dvb=no ;; + --enable-dvbhead) _dvbhead=yes ;; +@@ -4447,6 +4450,19 @@ + fi + echores "$_w100" + ++echocheck "ATI Imageon 100 (imageon)" ++if test "$_imageon" = yes ; then ++ _def_imageon='#define HAVE_IMAGEON 1' ++ _ld_imageon='-lw100' ++ _libs_mplayer="$_libs_mplayer $_ld_imageon" ++ _vosrc="$_vosrc vo_imageon.c" ++ _vomodules="imageon $_vomodules" ++else ++ _def_imageon='#undef HAVE_IMAGEON' ++ _novomodules="imageon $_novomodules" ++fi ++echores "$_imageon" ++ + + echocheck "DVB" + if test "$_dvb" = auto ; then +@@ -8453,6 +8469,7 @@ + $_def_xmga + $_def_fbdev + $_def_w100 ++$_def_imageon + $_def_dxr2 + $_def_dxr3 + $_def_ivtv +Index: trunk/libvo/vo_imageon.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ trunk/libvo/vo_imageon.c 2007-10-07 20:31:57.000000000 +0100 +@@ -0,0 +1,308 @@ ++#include <stdio.h> ++#include <stdlib.h> ++#include <inttypes.h> ++#include <string.h> ++ ++#include <mp_msg.h> ++#include <video_out.h> ++#include <video_out_internal.h> ++ ++static vo_info_t info = ++{ ++ "ATI IMAGEON 100 driver", ++ "imageon", ++ "Manuel Teira", ++ "C760-Openzaurus Testing version" ++}; ++ ++LIBVO_EXTERN(imageon) ++ ++#include <acapi.h> ++ ++static struct w100privdata_t { ++ uint8_t config; ++ ac_device_t *dev; ++ uint16_t xres; ++ uint16_t yres; ++ uint16_t ovwidth; ++ uint16_t ovheight; ++ ac_surface_t insurface; ++ ac_surface_t ovsurface; ++ uint16_t srcwidth; ++ uint16_t srcheight; ++ uint8_t rotate; ++ uint8_t scale; ++ ac_point_t ovdst; ++ ac_point_t dstpos; ++ ac_overlayprops_t ovprops; ++ uint32_t format; ++} w100_privdata; ++ ++static int preinit(const char *arg) ++{ ++ //Perhaps libw100 should include some code to query the framebuffer ++ struct w100privdata_t *pdata = &w100_privdata; ++ ++ pdata->config = 0; ++ pdata->xres = 640; ++ pdata->yres = 480; ++ pdata->dev = ac_init(pdata->xres, pdata->yres, AC_ROT90); ++ if (pdata->dev) { ++ return 0; ++ } else { ++ //Put a log message here ++ return 1; ++ } ++} ++ ++ ++static void draw_osd(void) ++{ ++} ++ ++void check_events(void) ++{ ++ mp_msg(MSGT_VO, MSGL_V, "check_events got called\n"); ++} ++ ++static int config(uint32_t srcwidth, uint32_t srcheight, ++ uint32_t dstwidth, uint32_t dstheight, ++ uint32_t flags, char *title, uint32_t format) ++{ ++ struct w100privdata_t *pdata = &w100_privdata; ++ uint8_t xscale, yscale; ++ uint16_t scaledwidth, scaledheight; ++ ++ mp_msg(MSGT_VO, MSGL_V, ++ "vo_imageon: srcwidth:%d, srcheight:%d, " ++ "dstwidth:%d, dstheight:%d\n", ++ srcwidth, srcheight, dstwidth, dstheight); ++ ++ if (pdata->config) { ++ ac_overlay_disable(pdata->dev); ++ ac_free_surface(pdata->dev, &pdata->insurface); ++ ac_free_surface(pdata->dev, &pdata->ovsurface); ++ } ++ ++ pdata->srcwidth = srcwidth; ++ pdata->srcheight = srcheight; ++ ++ //By the moment, only YUV420 supported ++ pdata->ovprops.format = OVLFORMAT_YUV420; ++ pdata->ovprops.portrait_mode = 0; ++ pdata->ovprops.inv_hor = 0; ++ pdata->ovprops.inv_ver = 0; ++ pdata->ovprops.yuv2rgb = 0; ++ ++ pdata->rotate = AC_ROT90; ++ ++ if (flags & VOFLAG_FULLSCREEN) { ++ pdata->ovwidth = 240; ++ pdata->ovheight = 320; ++ xscale = ac_get_scaler(pdata->ovheight, srcwidth); ++ yscale = ac_get_scaler(pdata->ovwidth, srcheight); ++ pdata->scale = (xscale > yscale) ? xscale : yscale; ++ pdata->ovdst.x = 0; ++ pdata->ovdst.y = 0; ++ pdata->ovprops.video_hor_exp = 1; ++ pdata->ovprops.video_ver_exp = 1; ++ } else { ++ pdata->ovwidth = (dstheight + 0xf) & ~0xf; ++ pdata->ovheight = (dstwidth + 0xf) & ~0xf; ++ xscale = ac_get_scaler(dstwidth, srcwidth); ++ yscale = ac_get_scaler(dstheight, srcheight); ++ pdata->ovdst.x = (pdata->xres - pdata->ovheight) / 2; ++ pdata->ovdst.y = (pdata->yres - pdata->ovwidth) / 2; ++ pdata->ovprops.video_hor_exp = 0; ++ pdata->ovprops.video_ver_exp = 0; ++ } ++ ++ pdata->scale = (xscale > yscale) ? xscale : yscale; ++ scaledwidth = ac_apply_scaler(srcwidth, pdata->scale); ++ scaledheight = ac_apply_scaler(srcheight, pdata->scale); ++ pdata->dstpos.x = (pdata->ovwidth - scaledheight) / 2; ++ pdata->dstpos.y = (pdata->ovheight - scaledwidth) / 2; ++ ++ if (ac_alloc_surface(pdata->dev, &pdata->ovsurface, ++ FMT_YUV420, ++ pdata->ovwidth, ++ pdata->ovheight, ++ AC_MEM_INTERNAL) == NULL) { ++ mp_msg(MSGT_VO, MSGL_FATAL, "Unable to allocate ov surface\n"); ++ return -1; ++ } ++ ++ if (ac_alloc_surface(pdata->dev, &pdata->insurface, ++ FMT_YUV420, srcwidth, srcheight, ++ AC_MEM_INTERNAL) == NULL) { ++ mp_msg(MSGT_VO, MSGL_WARN, ++ "No room in internal memory for insurface\n"); ++ if (ac_alloc_surface(pdata->dev, &pdata->insurface, ++ FMT_YUV420, srcwidth, srcheight, ++ AC_MEM_EXTERNAL) == NULL) { ++ mp_msg(MSGT_VO, MSGL_FATAL, ++ "Unable to allocate surface\n"); ++ ac_free_surface(pdata->dev, &pdata->ovsurface); ++ return -1; ++ } ++ } ++ ++ ac_clear_surface(pdata->dev, &pdata->ovsurface); ++ ac_clear_surface(pdata->dev, &pdata->insurface); ++ ++ ++ mp_msg(MSGT_VO, MSGL_V, ++ "vo_imageon: rotate:%d scale:%d ovwidth:%d, ovheight:%d, " ++ "ovdst(x:%d, y:%d) dstpos(x:%d,y:%d)\n", ++ pdata->rotate, ++ pdata->scale, ++ pdata->ovwidth, ++ pdata->ovheight, ++ pdata->ovdst.x, ++ pdata->ovdst.y, ++ pdata->dstpos.x, ++ pdata->dstpos.y); ++ ++ ac_overlay_setup(pdata->dev, &pdata->ovsurface, &pdata->ovsurface.rect, ++ &pdata->ovprops, 0); ++ ac_overlay_setpos(pdata->dev, &pdata->ovdst); ++ ac_overlay_enable(pdata->dev); ++ ++ pdata->config = 1; ++ return 0; ++} ++ ++static int draw_slice(uint8_t *image[], int stride[], ++ int w, int h, int x, int y) ++{ ++ struct w100privdata_t *pdata = &w100_privdata; ++ ac_rect_t dstrect; ++ ac_surface_t *dstsurface = &pdata->insurface; ++ int plane; ++ mp_msg(MSGT_VO, MSGL_V, ++ "vo_imageon: draw_slice(w:%d,h:%d,x:%d,y:%d)\n", ++ w, h, x, y); ++ ++ ac_reset_ctx(pdata->dev); ++ for (plane = 0; plane <= V_PLANE; plane++) { ++ mp_msg(MSGT_VO, MSGL_V, "Plane: %d, Stride: %d\n", ++ plane, stride[plane]); ++ dstrect.x = x; ++ dstrect.y = y; ++ dstrect.w = w; ++ dstrect.h = h; ++ ac_host2planerect(pdata->dev, ++ image[plane], ++ &dstrect, ++ &pdata->insurface, ++ plane); ++ } ++ return 0; ++} ++ ++static int draw_frame(uint8_t *frame[]) ++{ ++ struct w100privdata_t *pdata = &w100_privdata; ++ mp_msg(MSGT_VO, MSGL_V, "vo_imageon: draw_frame() not implemented!\n"); ++ ++} ++ ++static void flip_page(void) ++{ ++ struct w100privdata_t *pdata = &w100_privdata; ++ int plane; ++ ac_rect_t srcrect; ++ ac_point_t dstpoint; ++ ac_surface_t *insurface = &pdata->insurface; ++ ac_surface_t *ovsurface = &pdata->ovsurface; ++ ac_surfspec_t *surfspec = &ac_surfspecs[ovsurface->format]; ++ ++ mp_msg(MSGT_VO, MSGL_V, "vo_imageon: flip_page\n"); ++ ++ srcrect.x = 0; ++ srcrect.y = 0; ++ ++ ac_reset_ctx(pdata->dev); ++ if (pdata->rotate != AC_ROT0) { ++ ac_set_xform(pdata->dev, pdata->rotate, AC_NOMIRROR); ++ } ++ ac_disable_dbuf_update(pdata->dev); ++ ac_waitidle(pdata->dev); ++ for (plane = Y_PLANE; plane < surfspec->nplanes; plane++) { ++ ac_setsrcplane(pdata->dev, insurface, plane); ++ ac_setdstplane(pdata->dev, ovsurface, plane); ++ ac_prepare_scaleblt(pdata->dev, DP_DST_8BPP); ++ srcrect.w = pdata->srcwidth / surfspec->planes[plane].xsubsampling; ++ srcrect.h = pdata->srcheight / surfspec->planes[plane].ysubsampling; ++ dstpoint.x = pdata->dstpos.x / surfspec->planes[plane].xsubsampling; ++ dstpoint.y = pdata->dstpos.y / surfspec->planes[plane].ysubsampling; ++ mp_msg(MSGT_VO, MSGL_V, ++ "vo_imageon: scaleblt src(x:%d,y:%d,w:%d,h:%d)" ++ "dst(%d,%d)" ++ "scale(%d)\n", ++ srcrect.x, srcrect.y, ++ srcrect.w, srcrect.h, ++ dstpoint.x, dstpoint.y, ++ pdata->scale); ++ ac_scaleblt(pdata->dev, &srcrect, &dstpoint, ++ pdata->scale, pdata->scale); ++ } ++ ac_enable_dbuf_update(pdata->dev); ++} ++ ++static void uninit(void) ++{ ++ struct w100privdata_t *pdata = &w100_privdata; ++ ac_overlay_disable(pdata->dev); ++ ac_finish(pdata->dev); ++} ++ ++static int control(uint32_t request, void *data, ...) ++{ ++ struct w100privdata_t *pdata = &w100_privdata; ++ switch (request) { ++ case VOCTRL_QUERY_FORMAT: ++ return query_format(*((uint32_t *)data)); ++ case VOCTRL_FULLSCREEN: ++ mp_msg(MSGT_VO, MSGL_V, "vo_imageon: Asked for fullscreen\n"); ++ } ++ return VO_NOTIMPL; ++} ++ ++static int query_format(uint32_t format) ++{ ++ mp_msg(MSGT_VO, MSGL_V, ++ "vo_imageon: query_format was called: %x (%s)\n", ++ format, vo_format_name(format)); ++ ++ if (IMGFMT_IS_RGB(format)) { ++ return 0; ++ ++ switch (IMGFMT_RGB_DEPTH(format)) { ++ case 16: ++ return VFCAP_CSP_SUPPORTED | VFCAP_CSP_SUPPORTED_BY_HW | ++ VFCAP_HWSCALE_UP | VFCAP_HWSCALE_DOWN | ++ VFCAP_OSD | VFCAP_ACCEPT_STRIDE; ++ break; ++ } ++ } else { ++ /* Planar YUV Formats */ ++ switch (format) { ++ case IMGFMT_YV12: ++ case IMGFMT_IYUV: ++ case IMGFMT_I420: ++ case IMGFMT_YVU9: ++ case IMGFMT_IF09: ++ case IMGFMT_Y8: ++ case IMGFMT_Y800: ++ return VFCAP_CSP_SUPPORTED | VFCAP_CSP_SUPPORTED_BY_HW | ++ VFCAP_HWSCALE_UP | VFCAP_HWSCALE_DOWN | ++ VFCAP_OSD | VFCAP_ACCEPT_STRIDE; ++ break; ++ } ++ } ++ ++ return 0; ++} ++ ++ diff --git a/recipes/mplayer/files/omapfb.patch b/recipes/mplayer/files/omapfb.patch new file mode 100644 index 0000000000..860cf070f4 --- /dev/null +++ b/recipes/mplayer/files/omapfb.patch @@ -0,0 +1,10 @@ +--- /tmp/video_out.c 2009-01-14 16:39:38.000000000 +0100 ++++ trunk/libvo/video_out.c 2009-01-14 16:40:11.000000000 +0100 +@@ -86,6 +86,7 @@ + extern vo_functions_t video_out_bl; + extern vo_functions_t video_out_fbdev; + extern vo_functions_t video_out_fbdev2; ++extern vo_functions_t video_out_omapfb; + extern vo_functions_t video_out_svga; + extern vo_functions_t video_out_png; + extern vo_functions_t video_out_ggi; diff --git a/recipes/mplayer/files/pld-onlyarm5-svn.patch b/recipes/mplayer/files/pld-onlyarm5-svn.patch new file mode 100644 index 0000000000..0924060c6c --- /dev/null +++ b/recipes/mplayer/files/pld-onlyarm5-svn.patch @@ -0,0 +1,405 @@ +--- MPlayer-1.0pre8/libavcodec/arm/dsputil_arm_s.S.orig 2006-07-03 09:53:33.000000000 +0100 ++++ MPlayer-1.0pre8/libavcodec/arm/dsputil_arm_s.S 2006-07-03 10:06:58.000000000 +0100 +@@ -16,6 +16,13 @@ + @ License along with this library; if not, write to the Free Software + @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + @ ++#if defined(__ARM_ARCH_5__) || \ ++ defined(__ARM_ARCH_5T__) || \ ++ defined(__ARM_ARCH_5TE__) ++#define PLD(code...) code ++#else ++#define PLD(code...) ++#endif + + .macro ADJ_ALIGN_QUADWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4 + mov \Rd0, \Rn0, lsr #(\shift * 8) +@@ -74,7 +81,7 @@ + put_pixels16_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned +- pld [r1] ++ PLD ( pld [r1] ) + stmfd sp!, {r4-r11, lr} @ R14 is also called LR + adr r5, 5f + ands r4, r1, #3 +@@ -85,7 +92,7 @@ + ldmia r1, {r4-r7} + add r1, r1, r2 + stmia r0, {r4-r7} +- pld [r1] ++ PLD ( pld [r1] ) + subs r3, r3, #1 + add r0, r0, r2 + bne 1b +@@ -95,7 +102,7 @@ + ldmia r1, {r4-r8} + add r1, r1, r2 + ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8 +- pld [r1] ++ PLD ( pld [r1] ) + subs r3, r3, #1 + stmia r0, {r9-r12} + add r0, r0, r2 +@@ -106,7 +113,7 @@ + ldmia r1, {r4-r8} + add r1, r1, r2 + ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8 +- pld [r1] ++ PLD ( pld [r1] ) + subs r3, r3, #1 + stmia r0, {r9-r12} + add r0, r0, r2 +@@ -117,7 +124,7 @@ + ldmia r1, {r4-r8} + add r1, r1, r2 + ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8 +- pld [r1] ++ PLD ( pld [r1] ) + subs r3, r3, #1 + stmia r0, {r9-r12} + add r0, r0, r2 +@@ -136,7 +143,7 @@ + put_pixels8_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned +- pld [r1] ++ PLD ( pld [r1] ) + stmfd sp!, {r4-r5,lr} @ R14 is also called LR + adr r5, 5f + ands r4, r1, #3 +@@ -147,7 +154,7 @@ + ldmia r1, {r4-r5} + add r1, r1, r2 + subs r3, r3, #1 +- pld [r1] ++ PLD ( pld [r1] ) + stmia r0, {r4-r5} + add r0, r0, r2 + bne 1b +@@ -157,7 +164,7 @@ + ldmia r1, {r4-r5, r12} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12 +- pld [r1] ++ PLD ( pld [r1] ) + subs r3, r3, #1 + stmia r0, {r4-r5} + add r0, r0, r2 +@@ -168,7 +175,7 @@ + ldmia r1, {r4-r5, r12} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12 +- pld [r1] ++ PLD ( pld [r1] ) + subs r3, r3, #1 + stmia r0, {r4-r5} + add r0, r0, r2 +@@ -179,7 +186,7 @@ + ldmia r1, {r4-r5, r12} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12 +- pld [r1] ++ PLD ( pld [r1] ) + subs r3, r3, #1 + stmia r0, {r4-r5} + add r0, r0, r2 +@@ -198,7 +205,7 @@ + put_pixels8_x2_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned +- pld [r1] ++ PLD ( pld [r1] ) + stmfd sp!, {r4-r10,lr} @ R14 is also called LR + adr r5, 5f + ands r4, r1, #3 +@@ -210,7 +217,7 @@ + ldmia r1, {r4-r5, r10} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 +- pld [r1] ++ PLD ( pld [r1] ) + RND_AVG32 r8, r9, r4, r5, r6, r7, r12 + subs r3, r3, #1 + stmia r0, {r8-r9} +@@ -223,7 +230,7 @@ + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 + ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10 +- pld [r1] ++ PLD ( pld [r1] ) + RND_AVG32 r4, r5, r6, r7, r8, r9, r12 + subs r3, r3, #1 + stmia r0, {r4-r5} +@@ -236,7 +243,7 @@ + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10 + ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10 +- pld [r1] ++ PLD ( pld [r1] ) + RND_AVG32 r4, r5, r6, r7, r8, r9, r12 + subs r3, r3, #1 + stmia r0, {r4-r5} +@@ -248,7 +255,7 @@ + ldmia r1, {r4-r5, r10} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10 +- pld [r1] ++ PLD ( pld [r1] ) + RND_AVG32 r8, r9, r6, r7, r5, r10, r12 + subs r3, r3, #1 + stmia r0, {r8-r9} +@@ -267,7 +274,7 @@ + put_no_rnd_pixels8_x2_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned +- pld [r1] ++ PLD ( pld [r1] ) + stmfd sp!, {r4-r10,lr} @ R14 is also called LR + adr r5, 5f + ands r4, r1, #3 +@@ -279,7 +286,7 @@ + ldmia r1, {r4-r5, r10} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 +- pld [r1] ++ PLD ( pld [r1] ) + NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 + subs r3, r3, #1 + stmia r0, {r8-r9} +@@ -292,7 +299,7 @@ + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 + ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10 +- pld [r1] ++ PLD ( pld [r1] ) + NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 + subs r3, r3, #1 + stmia r0, {r4-r5} +@@ -305,7 +312,7 @@ + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10 + ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10 +- pld [r1] ++ PLD ( pld [r1] ) + NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 + subs r3, r3, #1 + stmia r0, {r4-r5} +@@ -317,7 +324,7 @@ + ldmia r1, {r4-r5, r10} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10 +- pld [r1] ++ PLD ( pld [r1] ) + NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12 + subs r3, r3, #1 + stmia r0, {r8-r9} +@@ -338,7 +345,7 @@ + put_pixels8_y2_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned +- pld [r1] ++ PLD ( pld [r1] ) + stmfd sp!, {r4-r11,lr} @ R14 is also called LR + adr r5, 5f + ands r4, r1, #3 +@@ -352,13 +359,13 @@ + add r1, r1, r2 + 6: ldmia r1, {r6-r7} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + RND_AVG32 r8, r9, r4, r5, r6, r7, r12 + ldmia r1, {r4-r5} + add r1, r1, r2 + stmia r0, {r8-r9} + add r0, r0, r2 +- pld [r1] ++ PLD ( pld [r1] ) + RND_AVG32 r8, r9, r6, r7, r4, r5, r12 + subs r3, r3, #1 + stmia r0, {r8-r9} +@@ -369,18 +376,18 @@ + 2: + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 + 6: ldmia r1, {r7-r9} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9 + RND_AVG32 r10, r11, r4, r5, r7, r8, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 + subs r3, r3, #1 + RND_AVG32 r10, r11, r7, r8, r4, r5, r12 +@@ -392,18 +399,18 @@ + 3: + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 + 6: ldmia r1, {r7-r9} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9 + RND_AVG32 r10, r11, r4, r5, r7, r8, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 + subs r3, r3, #1 + RND_AVG32 r10, r11, r7, r8, r4, r5, r12 +@@ -415,18 +422,18 @@ + 4: + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 + 6: ldmia r1, {r7-r9} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9 + RND_AVG32 r10, r11, r4, r5, r7, r8, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 + subs r3, r3, #1 + RND_AVG32 r10, r11, r7, r8, r4, r5, r12 +@@ -447,7 +454,7 @@ + put_no_rnd_pixels8_y2_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned +- pld [r1] ++ PLD ( pld [r1] ) + stmfd sp!, {r4-r11,lr} @ R14 is also called LR + adr r5, 5f + ands r4, r1, #3 +@@ -461,13 +468,13 @@ + add r1, r1, r2 + 6: ldmia r1, {r6-r7} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 + ldmia r1, {r4-r5} + add r1, r1, r2 + stmia r0, {r8-r9} + add r0, r0, r2 +- pld [r1] ++ PLD ( pld [r1] ) + NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12 + subs r3, r3, #1 + stmia r0, {r8-r9} +@@ -478,18 +485,18 @@ + 2: + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 + 6: ldmia r1, {r7-r9} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9 + NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 + subs r3, r3, #1 + NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 +@@ -501,18 +508,18 @@ + 3: + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 + 6: ldmia r1, {r7-r9} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9 + NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 + subs r3, r3, #1 + NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 +@@ -524,18 +531,18 @@ + 4: + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 + 6: ldmia r1, {r7-r9} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9 + NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 + subs r3, r3, #1 + NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 +@@ -562,7 +569,7 @@ + ldmia r1, {r8-r10} + .endif + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + .if \align == 0 + ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8 + .elseif \align == 1 +@@ -624,7 +631,7 @@ + put_pixels8_xy2_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned +- pld [r1] ++ PLD ( pld [r1] ) + stmfd sp!, {r4-r11,lr} @ R14 is also called LR + adrl r12, 5f + ands r4, r1, #3 +@@ -661,7 +668,7 @@ + put_no_rnd_pixels8_xy2_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned +- pld [r1] ++ PLD ( pld [r1] ) + stmfd sp!, {r4-r11,lr} @ R14 is also called LR + adrl r12, 5f + ands r4, r1, #3 diff --git a/recipes/mplayer/files/pld-onlyarm5.patch b/recipes/mplayer/files/pld-onlyarm5.patch new file mode 100644 index 0000000000..3b8c576439 --- /dev/null +++ b/recipes/mplayer/files/pld-onlyarm5.patch @@ -0,0 +1,405 @@ +--- MPlayer-1.0pre8/libavcodec/armv4l/dsputil_arm_s.S.orig 2006-07-03 09:53:33.000000000 +0100 ++++ MPlayer-1.0pre8/libavcodec/armv4l/dsputil_arm_s.S 2006-07-03 10:06:58.000000000 +0100 +@@ -16,6 +16,13 @@ + @ License along with this library; if not, write to the Free Software + @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + @ ++#if defined(__ARM_ARCH_5__) || \ ++ defined(__ARM_ARCH_5T__) || \ ++ defined(__ARM_ARCH_5TE__) ++#define PLD(code...) code ++#else ++#define PLD(code...) ++#endif + + .macro ADJ_ALIGN_QUADWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4 + mov \Rd0, \Rn0, lsr #(\shift * 8) +@@ -74,7 +81,7 @@ + put_pixels16_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned +- pld [r1] ++ PLD ( pld [r1] ) + stmfd sp!, {r4-r11, lr} @ R14 is also called LR + adr r5, 5f + ands r4, r1, #3 +@@ -85,7 +92,7 @@ + ldmia r1, {r4-r7} + add r1, r1, r2 + stmia r0, {r4-r7} +- pld [r1] ++ PLD ( pld [r1] ) + subs r3, r3, #1 + add r0, r0, r2 + bne 1b +@@ -95,7 +102,7 @@ + ldmia r1, {r4-r8} + add r1, r1, r2 + ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8 +- pld [r1] ++ PLD ( pld [r1] ) + subs r3, r3, #1 + stmia r0, {r9-r12} + add r0, r0, r2 +@@ -106,7 +113,7 @@ + ldmia r1, {r4-r8} + add r1, r1, r2 + ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8 +- pld [r1] ++ PLD ( pld [r1] ) + subs r3, r3, #1 + stmia r0, {r9-r12} + add r0, r0, r2 +@@ -117,7 +124,7 @@ + ldmia r1, {r4-r8} + add r1, r1, r2 + ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8 +- pld [r1] ++ PLD ( pld [r1] ) + subs r3, r3, #1 + stmia r0, {r9-r12} + add r0, r0, r2 +@@ -136,7 +143,7 @@ + put_pixels8_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned +- pld [r1] ++ PLD ( pld [r1] ) + stmfd sp!, {r4-r5,lr} @ R14 is also called LR + adr r5, 5f + ands r4, r1, #3 +@@ -147,7 +154,7 @@ + ldmia r1, {r4-r5} + add r1, r1, r2 + subs r3, r3, #1 +- pld [r1] ++ PLD ( pld [r1] ) + stmia r0, {r4-r5} + add r0, r0, r2 + bne 1b +@@ -157,7 +164,7 @@ + ldmia r1, {r4-r5, r12} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12 +- pld [r1] ++ PLD ( pld [r1] ) + subs r3, r3, #1 + stmia r0, {r4-r5} + add r0, r0, r2 +@@ -168,7 +175,7 @@ + ldmia r1, {r4-r5, r12} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12 +- pld [r1] ++ PLD ( pld [r1] ) + subs r3, r3, #1 + stmia r0, {r4-r5} + add r0, r0, r2 +@@ -179,7 +186,7 @@ + ldmia r1, {r4-r5, r12} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12 +- pld [r1] ++ PLD ( pld [r1] ) + subs r3, r3, #1 + stmia r0, {r4-r5} + add r0, r0, r2 +@@ -198,7 +205,7 @@ + put_pixels8_x2_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned +- pld [r1] ++ PLD ( pld [r1] ) + stmfd sp!, {r4-r10,lr} @ R14 is also called LR + adr r5, 5f + ands r4, r1, #3 +@@ -210,7 +217,7 @@ + ldmia r1, {r4-r5, r10} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 +- pld [r1] ++ PLD ( pld [r1] ) + RND_AVG32 r8, r9, r4, r5, r6, r7, r12 + subs r3, r3, #1 + stmia r0, {r8-r9} +@@ -223,7 +230,7 @@ + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 + ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10 +- pld [r1] ++ PLD ( pld [r1] ) + RND_AVG32 r4, r5, r6, r7, r8, r9, r12 + subs r3, r3, #1 + stmia r0, {r4-r5} +@@ -236,7 +243,7 @@ + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10 + ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10 +- pld [r1] ++ PLD ( pld [r1] ) + RND_AVG32 r4, r5, r6, r7, r8, r9, r12 + subs r3, r3, #1 + stmia r0, {r4-r5} +@@ -248,7 +255,7 @@ + ldmia r1, {r4-r5, r10} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10 +- pld [r1] ++ PLD ( pld [r1] ) + RND_AVG32 r8, r9, r6, r7, r5, r10, r12 + subs r3, r3, #1 + stmia r0, {r8-r9} +@@ -267,7 +274,7 @@ + put_no_rnd_pixels8_x2_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned +- pld [r1] ++ PLD ( pld [r1] ) + stmfd sp!, {r4-r10,lr} @ R14 is also called LR + adr r5, 5f + ands r4, r1, #3 +@@ -279,7 +286,7 @@ + ldmia r1, {r4-r5, r10} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 +- pld [r1] ++ PLD ( pld [r1] ) + NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 + subs r3, r3, #1 + stmia r0, {r8-r9} +@@ -292,7 +299,7 @@ + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 + ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10 +- pld [r1] ++ PLD ( pld [r1] ) + NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 + subs r3, r3, #1 + stmia r0, {r4-r5} +@@ -305,7 +312,7 @@ + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10 + ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10 +- pld [r1] ++ PLD ( pld [r1] ) + NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 + subs r3, r3, #1 + stmia r0, {r4-r5} +@@ -317,7 +324,7 @@ + ldmia r1, {r4-r5, r10} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10 +- pld [r1] ++ PLD ( pld [r1] ) + NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12 + subs r3, r3, #1 + stmia r0, {r8-r9} +@@ -338,7 +345,7 @@ + put_pixels8_y2_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned +- pld [r1] ++ PLD ( pld [r1] ) + stmfd sp!, {r4-r11,lr} @ R14 is also called LR + adr r5, 5f + ands r4, r1, #3 +@@ -352,13 +359,13 @@ + add r1, r1, r2 + 6: ldmia r1, {r6-r7} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + RND_AVG32 r8, r9, r4, r5, r6, r7, r12 + ldmia r1, {r4-r5} + add r1, r1, r2 + stmia r0, {r8-r9} + add r0, r0, r2 +- pld [r1] ++ PLD ( pld [r1] ) + RND_AVG32 r8, r9, r6, r7, r4, r5, r12 + subs r3, r3, #1 + stmia r0, {r8-r9} +@@ -369,18 +376,18 @@ + 2: + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 + 6: ldmia r1, {r7-r9} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9 + RND_AVG32 r10, r11, r4, r5, r7, r8, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 + subs r3, r3, #1 + RND_AVG32 r10, r11, r7, r8, r4, r5, r12 +@@ -392,18 +399,18 @@ + 3: + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 + 6: ldmia r1, {r7-r9} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9 + RND_AVG32 r10, r11, r4, r5, r7, r8, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 + subs r3, r3, #1 + RND_AVG32 r10, r11, r7, r8, r4, r5, r12 +@@ -415,18 +422,18 @@ + 4: + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 + 6: ldmia r1, {r7-r9} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9 + RND_AVG32 r10, r11, r4, r5, r7, r8, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 + subs r3, r3, #1 + RND_AVG32 r10, r11, r7, r8, r4, r5, r12 +@@ -447,7 +454,7 @@ + put_no_rnd_pixels8_y2_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned +- pld [r1] ++ PLD ( pld [r1] ) + stmfd sp!, {r4-r11,lr} @ R14 is also called LR + adr r5, 5f + ands r4, r1, #3 +@@ -461,13 +468,13 @@ + add r1, r1, r2 + 6: ldmia r1, {r6-r7} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 + ldmia r1, {r4-r5} + add r1, r1, r2 + stmia r0, {r8-r9} + add r0, r0, r2 +- pld [r1] ++ PLD ( pld [r1] ) + NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12 + subs r3, r3, #1 + stmia r0, {r8-r9} +@@ -478,18 +485,18 @@ + 2: + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 + 6: ldmia r1, {r7-r9} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9 + NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 + subs r3, r3, #1 + NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 +@@ -501,18 +508,18 @@ + 3: + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 + 6: ldmia r1, {r7-r9} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9 + NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 + subs r3, r3, #1 + NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 +@@ -524,18 +531,18 @@ + 4: + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 + 6: ldmia r1, {r7-r9} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9 + NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + ldmia r1, {r4-r6} + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 + subs r3, r3, #1 + NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 +@@ -562,7 +569,7 @@ + ldmia r1, {r8-r10} + .endif + add r1, r1, r2 +- pld [r1] ++ PLD ( pld [r1] ) + .if \align == 0 + ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8 + .elseif \align == 1 +@@ -624,7 +631,7 @@ + put_pixels8_xy2_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned +- pld [r1] ++ PLD ( pld [r1] ) + stmfd sp!, {r4-r11,lr} @ R14 is also called LR + adrl r12, 5f + ands r4, r1, #3 +@@ -661,7 +668,7 @@ + put_no_rnd_pixels8_xy2_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned +- pld [r1] ++ PLD ( pld [r1] ) + stmfd sp!, {r4-r11,lr} @ R14 is also called LR + adrl r12, 5f + ands r4, r1, #3 diff --git a/recipes/mplayer/files/powerpc-is-ppc.diff b/recipes/mplayer/files/powerpc-is-ppc.diff new file mode 100644 index 0000000000..f8143c460e --- /dev/null +++ b/recipes/mplayer/files/powerpc-is-ppc.diff @@ -0,0 +1,11 @@ +--- /tmp/configure 2007-03-30 19:40:34.000000000 +0200 ++++ MPlayer-1.0rc1/configure 2007-03-30 19:40:58.795251000 +0200 +@@ -1213,7 +1213,7 @@ + _optimizing='' + ;; + +- ppc) ++ ppc|powerpc) + _def_arch='#define ARCH_POWERPC 1' + _def_dcbzl='#define NO_DCBZL 1' + _target_arch='TARGET_ARCH_POWERPC = yes' diff --git a/recipes/mplayer/files/pxa-video_out.patch b/recipes/mplayer/files/pxa-video_out.patch new file mode 100644 index 0000000000..0c4c4feb8c --- /dev/null +++ b/recipes/mplayer/files/pxa-video_out.patch @@ -0,0 +1,22 @@ +Index: MPlayer-1.0rc1/libvo/video_out.c +=================================================================== +--- MPlayer-1.0rc1.orig/libvo/video_out.c ++++ MPlayer-1.0rc1/libvo/video_out.c +@@ -87,6 +87,7 @@ extern vo_functions_t video_out_fbdev; + extern vo_functions_t video_out_fbdev2; + extern vo_functions_t video_out_w100; + extern vo_functions_t video_out_imageon; ++extern vo_functions_t video_out_pxa; + extern vo_functions_t video_out_svga; + extern vo_functions_t video_out_png; + extern vo_functions_t video_out_ggi; +@@ -206,6 +207,9 @@ vo_functions_t* video_out_drivers[] = + #ifdef HAVE_IMAGEON + &video_out_imageon, + #endif ++#ifdef HAVE_PXA ++ &video_out_pxa, ++#endif + #ifdef HAVE_SVGALIB + &video_out_svga, + #endif diff --git a/recipes/mplayer/files/pxa_configure.patch b/recipes/mplayer/files/pxa_configure.patch new file mode 100644 index 0000000000..079d3086de --- /dev/null +++ b/recipes/mplayer/files/pxa_configure.patch @@ -0,0 +1,47 @@ +Index: MPlayer-1.0rc1/configure +=================================================================== +--- MPlayer-1.0rc1.orig/configure ++++ MPlayer-1.0rc1/configure +@@ -1600,6 +1600,7 @@ _vesa=auto + _fbdev=auto + _w100=no + _imageon=no ++_pxa=no + _dvb=auto + _dvbhead=auto + _dxr2=auto +@@ -1803,6 +1804,8 @@ for ac_option do + --disable-w100) _w100=no ;; + --enable-imageon) _imageon=yes ;; + --disable-imageon) _imageon=no ;; ++ --enable-pxa) _pxa=yes ;; ++ --disable-pxa) _pxa=no ;; + --enable-dvb) _dvb=yes ;; + --disable-dvb) _dvb=no ;; + --enable-dvbhead) _dvbhead=yes ;; +@@ -4296,6 +4299,17 @@ else + fi + echores "$_imageon" + ++echocheck "PXA27x Overlay Support" ++if test "$_pxa" = yes ; then ++ _def_pxa='#define HAVE_PXA 1' ++ _vosrc="$_vosrc vo_pxa.c" ++ _vomodules="pxa $_vomodules" ++else ++ _def_pxa='#undef HAVE_PXA' ++ _novomodules="pxa $_novomodules" ++fi ++echores "$_pxa" ++ + + echocheck "DVB" + if test "$_dvb" = auto ; then +@@ -8398,6 +8412,7 @@ $_def_syncfb + $_def_fbdev + $_def_w100 + $_def_imageon ++$_def_pxa + $_def_dxr2 + $_def_dxr3 + $_def_ivtv diff --git a/recipes/mplayer/files/simple_idct_armv5te.S b/recipes/mplayer/files/simple_idct_armv5te.S new file mode 100644 index 0000000000..3706f3a4ea --- /dev/null +++ b/recipes/mplayer/files/simple_idct_armv5te.S @@ -0,0 +1,715 @@ +/* + * Simple IDCT + * + * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at> + * Copyright (c) 2006 Mans Rullgard <mans@mansr.com> + * Copyright (c) 2007 Siarhei Siamashka <ssvb@users.sourceforge.net> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +.arch armv5te + +/* IMPORTANT: this value should be the same as defined in dsputil.h */ +#define MAX_NEG_CROP 1024 + +/* + * ARM EABI guarantees 8 byte stack alignment, so we can use LDRD instructions + * for accessing stack and load two registers per cycle to improve performance + * on ARM11 and XScale + */ +#ifdef __ARM_EABI__ +#define DWORD_ALIGNED_STACK 1 +#endif + +#define W1 22725 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ +#define W2 21407 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ +#define W3 19266 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ +#define W4 16383 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ +#define W5 12873 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ +#define W6 8867 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ +#define W7 4520 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ +#define ROW_SHIFT 11 +#define COL_SHIFT 20 + +#define W13 (W1 | (W3 << 16)) +#define W26 (W2 | (W6 << 16)) +#define W57 (W5 | (W7 << 16)) + +#define W22 ((-W2 & 0xFFFF) | (W2 << 16)) +#define W44 ((-W4 & 0xFFFF) | (W4 << 16)) +#define W66 ((-W6 & 0xFFFF) | (W6 << 16)) + +#define M51 ((-W5 & 0xFFFF) | ((-W1 & 0xFFFF) << 16)) + + .text + +/* + * a local pool with 64-bit constants for 'idct_rows_armv5te' function, + * we align it at 16 byte boundary in order to ensure that it does not cross + * cache line boundary (occupies only a single cache line) + */ + .balign 16 +w2266idct_rows_armv5te: + .long W22 + .long W66 +w1357idct_rows_armv5te: + .long W13 + .long W57 + +/* + * A rows processing function. Benchmarks on a few video files show that + * about 80-90% of uses of this function have all rows empty except for + * the row[0]. + * + * On entry: + * a1 - row address + * lr - return address + * + * On exit: + * a1 - row address + * + * Registers usage within this function: + * a1 - row address + * a2 - temporary register + * v5, v6, v7, v8 - row data + * v1, v2, v3, v4 - A0, A1, A2 and A3 variables + * a3, a4 - used for loading constants + * ip - temporary register + * lr - temporary register, also holds initial row address value + * to check end of loop condition + */ + .balign 32 + .type idct_rows_armv5te, %function + .func idct_rows_armv5te +idct_rows_armv5te: + str a1, [sp, #-4]! + str lr, [sp, #-4]! + mov lr, a1 + ldrd v7, [a1, #(8 * 16 - 8)]! /* v7 = row[5:4], v8 = row[7:6] */ +1: + ldrd v5, [a1, #-8]! /* v5 = row[1:0], v6 = row[3:2] */ + orrs v1, v7, v8 + cmpeq v1, v6 + cmpeq v1, v5, lsr #16 + bne 2f /* jump to process full row */ + /* only row[0] is not empty here */ + mov v5, v5, lsl #19 + cmp a1, lr + orr v5, v5, v5, lsr #16 + str v5, [a1] + str v5, [a1, #4] + str v5, [a1, #8] + str v5, [a1, #12] + ldrned v7, [a1, #-8]! /* v7 = row[5:4], v8 = row[7:6] */ + bne 1b + ldr pc, [sp], #8 + +2: /* process full row */ + /* the next code fragment calculates A variables */ + + ldr a2, w44 /* a2 = -W4 | (W4 << 16) */ + ldrd a3, w2266idct_rows_armv5te /* a3 = -W2 | (W2 << 16) */ + /* a4 = -W6 | (W6 << 16) */ + mov v1, #(1<<(ROW_SHIFT-1)) + smlatb v1, a2, v5, v1 /* v1 = W4*row[0]+(1<<(ROW_SHIFT-1)) */ + + cmp a1, lr + + smlabb v2, a2, v7, v1 /* v2 = v1 - W4*row[4] */ + smlatb v1, a2, v7, v1 /* v1 = v1 - W4*row[4] */ + + smlabb v3, a4, v6, v2 /* v3 = v2 - W6*row[2] */ + smlabb v4, a3, v6, v1 /* v4 = v1 - W2*row[2] */ + + smlatb v3, a3, v8, v3 /* v3 += W2*row[6] */ + smlabb v4, a4, v8, v4 /* v4 -= W6*row[6] */ + + ldrd a3, w1357idct_rows_armv5te /* a3 = W1 | (W3 << 16) */ + /* a4 = W5 | (W7 << 16) */ + + rsb v2, v3, v2, lsl #1 /* v2 = 2*v2 - v3 */ + rsb v1, v4, v1, lsl #1 /* v1 = 2*v1 - v4 */ + + /* all A variables are now calculated (and stored in v1, v2, v3, v4 registers) */ + + smulbt a2, a3, v5 /* b0 = W1*row[1] */ + smultt ip, a3, v5 /* tmp = W3*row[1] */ + smultt lr, a4, v6 /* -b1 = W7*row[3] */ + smlatt a2, a3, v6, a2 /* b0 += W3*row[3] */ + smlabt lr, a3, v7, lr /* -b1 += W1*row[5] */ + smlabt a2, a4, v7, a2 /* b0 += W5*row[5] */ + smlabt lr, a4, v8, lr /* -b1 += W5*row[7] */ + smlatt a2, a4, v8, a2 /* b0 += W7*row[7] */ + sub lr, ip, lr /* b1 = -b1 - tmp */ + + /* B0 is now calculated (a2), B1 is now calculated (lr) */ + + add ip, v1, a2 /* ip = (A0 + B0) */ + sub a2, v1, a2 /* a2 = (A0 - B0) */ + mov ip, ip, asr #ROW_SHIFT + mov a2, a2, asr #ROW_SHIFT + strh ip, [a1, #0] /* row[0] = (A0 + B0) >> ROW_SHIFT */ + strh a2, [a1, #14] /* row[7] = (A0 - B0) >> ROW_SHIFT */ + + ldr v1, m51 /* v1 = ((-W5 & 0xFFFF) | ((-W1 & 0xFFFF) << 16)) */ + + add ip, v2, lr /* ip = (A1 + B1) */ + sub a2, v2, lr /* ip = (A1 - B1) */ + mov ip, ip, asr #ROW_SHIFT + mov a2, a2, asr #ROW_SHIFT + strh ip, [a1, #2] /* row[1] = (A1 + B1) >> ROW_SHIFT */ + strh a2, [a1, #12] /* row[6] = (A1 - B1) >> ROW_SHIFT */ + + smulbt a2, a4, v5 /* b2 = W5*row[1] */ + smultt v2, a4, v5 /* b3 = W7*row[1] */ + smlatt a2, v1, v6, a2 /* b2 -= W1*row[3] */ + smlatt v2, a3, v7, v2 /* b3 += W3*row[5] */ + smlatt a2, a4, v7, a2 /* b2 += W7*row[5] */ + smlatt v2, v1, v8, v2 /* b3 -= W1*row[7] */ + smlatt a2, a3, v8, a2 /* b2 += W3*row[7] */ + smlabt v2, v1, v6, v2 /* b3 -= W5*row[3] */ + + /* B2 is now calculated (a2), B3 is now calculated (v2) */ + + ldr lr, [sp, #4] + + add ip, v3, a2 /* ip = (A2 + B2) */ + sub a2, v3, a2 /* a2 = (A2 - B2) */ + mov ip, ip, asr #ROW_SHIFT + mov a2, a2, asr #ROW_SHIFT + strh ip, [a1, #4] /* row[2] = (A2 + B2) >> ROW_SHIFT */ + strh a2, [a1, #10] /* row[5] = (A2 - B2) >> ROW_SHIFT */ + + add ip, v4, v2 /* ip = (A3 + B3) */ + sub a2, v4, v2 /* a2 = (A3 - B3) */ + mov ip, ip, asr #ROW_SHIFT + mov a2, a2, asr #ROW_SHIFT + strh ip, [a1, #6] /* row[3] = (A3 + B3) >> ROW_SHIFT */ + strh a2, [a1, #8] /* row[4] = (A3 - B3) >> ROW_SHIFT */ + + ldrned v7, [a1, #-8]! /* v7 = row[5:4], v8 = row[7:6] */ + bne 1b + ldr pc, [sp], #8 + .endfunc + +/******************************************************************************/ + +/* + * a global pool with 32-bit constants (used from all the functions in this module), + * we align it at 32 byte boundary in order to ensure that it does not cross cache + * line boundary (occupies only a single cache line) + */ + .balign 32 +simple_idct_croptbl_armv5te: + .long (ff_cropTbl + MAX_NEG_CROP) +m51: .long M51 +w44: .long W44 +xxx: .long (((1<<(COL_SHIFT-1))/W4)*W4) +m7: .long (-W7) + +/* + * Enforce 8 byte stack alignment if it is not provided by ABI. Used at the beginning + * of global functions. If stack is not properly aligned, real return address is + * pushed to stack (thus fixing stack alignment) and lr register is set to a thunk + * function 'unaligned_return_thunk_armv5te' which is responsible for providing + * correct return from the function in this case. + */ + .macro idct_stackalign_armv5te +#ifndef DWORD_ALIGNED_STACK + tst sp, #4 + strne lr, [sp, #-4]! + adrne lr, unaligned_return_thunk_armv5te +#endif + .endm + +/* + * Process two columns at once. + * + * Registers usage within this macro: + * a1 - column address + * a2 - temporary register + * A0b (v1), A0t (v2), A1b (v3), A1t (v4), A2b (v5), A2t (v6), A3b (v7), A3t (v8) + * B0b (v1), B0t (v2), B1b (v3), B1t (v4), B2b (v5), B2t (v6), B3b (v7), B3t (v8) + * a3, a4 - used for loading constants + * ip - temporary register + * lr - temporary register + * + * Data on exit ('b' suffix - first column (also bottom 16-bits of a register), + * 't' suffix - second column (also top 16-bits of a register)): + * A0b, A0t, A1b, A1t, A2b, A2t, A3b, A3t - are returned in stack + * B0b, B0t, B1b, B1t, B2b, B2t, B3b, B3t - are returned in v1, v2, v3, v4, v5, v6, v7, v8 registers + * a1 - address of the next pair of columns + */ + .macro idct_two_col_armv5te DWORD_CONST_SUFFIX + ldr v4, [a1], #4 /* v4 = col_t[0]:col_b[0] */ + ldr a2, w44 /* a2 = -W4 | (W4 << 16) */ + ldr v1, xxx /* v1 = (((1<<(COL_SHIFT-1))/W4)*W4) */ + ldr ip, [a1, #(16*4 - 4)] /* ip = col_t[4]:col_b[4] */ + ldrd a3, w2266\DWORD_CONST_SUFFIX /* a3 = -W2 | (W2 << 16) */ + /* a4 = -W6 | (W6 << 16) */ + smlatt v2, a2, v4, v1 /* A0t = W4 * (col_t[0] + ((1<<(COL_SHIFT-1))/W4)) */ + smlatb v1, a2, v4, v1 /* A0b = W4 * (col_b[0] + ((1<<(COL_SHIFT-1))/W4)) */ + + ldr lr, [a1, #(16*2 - 4)] /* lr = col_t[2]:col_b[2] */ + + smlabb v3, a2, ip, v1 /* A1b = A0b - W4*col_b[4] */ + smlatb v1, a2, ip, v1 /* A0b = A0b + W4*col_b[4] */ + smlabt v4, a2, ip, v2 /* A1t = A0t - W4*col_t[4] */ + smlatt v2, a2, ip, v2 /* A0t = A0t + W4*col_t[4] */ + + ldr ip, [a1, #(16*6 - 4)] /* ip = col_t[6]:col_b[6] */ + + smlabb v5, a4, lr, v3 /* A2b = A1b - W6*col_b[2] */ + smlabb v7, a3, lr, v1 /* A3b = A0b - W2*col_b[2] */ + smlabt v6, a4, lr, v4 /* A2t = A1t - W6*col_t[2] */ + smlabt v8, a3, lr, v2 /* A3t = A0t - W2*col_t[2] */ + + ldr lr, [a1, #(16*1 - 4)] /* lr = col_t[1]:col_b[1] */ + + smlatb v5, a3, ip, v5 /* A2b += W2*col_b[6] */ + smlabb v7, a4, ip, v7 /* A3b -= W6*col_b[6] */ + smlatt v6, a3, ip, v6 /* A2t += W2*col_t[6] */ + smlabt v8, a4, ip, v8 /* A3t -= W6*col_t[6] */ + + ldrd a3, w1357\DWORD_CONST_SUFFIX /* a3 = W1 | (W3 << 16) */ + /* a4 = W5 | (W7 << 16) */ + + rsb v3, v5, v3, lsl #1 /* A1b = 2*A1b - A2b */ + rsb v1, v7, v1, lsl #1 /* A0b = 2*A0b - A3b */ + rsb v4, v6, v4, lsl #1 /* A1t = 2*A1t - A2t */ + rsb v2, v8, v2, lsl #1 /* A0t = 2*A0t - A3t */ + + ldr ip, [a1, #(16*5 - 4)] /* ip = col_t[5]:col_b[5] */ + ldr a2, m51 /* a2 = ((-W5 & 0xFFFF) | ((-W1 & 0xFFFF) << 16)) */ + + stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, v8} + + smulbb v1, a3, lr /* B0b = W1*col_b[1] */ + smulbt v2, a3, lr /* B0t = W1*col_t[1] */ + smultb v3, a3, lr /* B1b = W3*col_b[1] */ + smultt v4, a3, lr /* B1t = W3*col_t[1] */ + smulbb v5, a4, lr /* B2b = W5*col_b[1] */ + smulbt v6, a4, lr /* B2t = W5*col_t[1] */ + smultb v7, a4, lr /* B3b = W7*col_b[1] */ + smultt v8, a4, lr /* B3t = W7*col_t[1] */ + + ldr lr, [a1, #(16*7 - 4)] /* lr = col_t[7]:col_b[7] */ + + cmp ip, #0 + beq 2f /* jump probability is typically more than 75% */ + + smlabt v2, a4, ip, v2 /* B0t += W5*col_t[5] */ + smlatt v4, a2, ip, v4 /* B1t -= W1*col_t[5] */ + smlatt v6, a4, ip, v6 /* B2t += W7*col_t[5] */ + smlatt v8, a3, ip, v8 /* B3t += W3*col_t[5] */ + smlabb v1, a4, ip, v1 /* B0b += W5*col_b[5] */ + smlatb v3, a2, ip, v3 /* B1b -= W1*col_b[5] */ + smlatb v5, a4, ip, v5 /* B2b += W7*col_b[5] */ + smlatb v7, a3, ip, v7 /* B3b += W3*col_b[5] */ +2: + ldr ip, [a1, #(16*3 - 4)] /* ip = col_t[3]:col_b[3] */ + + cmp lr, #0 + beq 3f /* jump probability is typically more than 90% */ + + smlatt v2, a4, lr, v2 /* B0t += W7*col_t[7] */ + smlabt v4, a2, lr, v4 /* B1t -= W5*col_t[7] */ + smlatt v6, a3, lr, v6 /* B2t += W3*col_t[7] */ + smlatt v8, a2, lr, v8 /* B3t -= W1*col_t[7] */ + + smlatb v1, a4, lr, v1 /* B0b += W7*col_b[7] */ + smlabb v3, a2, lr, v3 /* B1b -= W5*col_b[7] */ + smlatb v5, a3, lr, v5 /* B2b += W3*col_b[7] */ + smlatb v7, a2, lr, v7 /* B3b -= W1*col_b[7] */ +3: + cmp ip, #0 + beq 4f /* jump probability is typically more than 65% */ + + ldr a4, m7 + + smlatt v2, a3, ip, v2 /* B0t += W3*col_t[3] */ + smlatt v6, a2, ip, v6 /* B2t -= W1*col_t[3] */ + smlabt v8, a2, ip, v8 /* B3t -= W5*col_t[3] */ + smlabt v4, a4, ip, v4 /* B1t -= W7*col_t[3] */ + + smlatb v1, a3, ip, v1 /* B0b += W3*col_b[3] */ + smlatb v5, a2, ip, v5 /* B2b -= W1*col_b[3] */ + smlabb v7, a2, ip, v7 /* B3b -= W5*col_b[3] */ + smlabb v3, a4, ip, v3 /* B1b -= W7*col_b[3] */ +4: + .endm + +/******************************************************************************/ + +/* + * a local pool with 64-bit constants for 'simple_idct_put_armv5te' function, + * we align it at 16 byte boundary in order to ensure that it does not cross + * cache line boundary (occupies only a single cache line) + */ + .balign 16 +w2266simple_idct_put_armv5te: + .long W22 + .long W66 +w1357simple_idct_put_armv5te: + .long W13 + .long W57 + + .balign 32 + .global simple_idct_put_armv5te + .type simple_idct_put_armv5te, %function + .func simple_idct_put_armv5te +simple_idct_put_armv5te: + + idct_stackalign_armv5te + + stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, v8, lr} + strd a1, [sp, #-12]! + + mov a1, a3 + bl idct_rows_armv5te + + add a2, a1, #16 + strd a1, [sp, #-8]! +1: + idct_two_col_armv5te simple_idct_put_armv5te + str a1, [sp, #(0 + 32)] + ldrd a3, [sp, #(8 + 32)] + ldr lr, simple_idct_croptbl_armv5te + + ldrd a1, [sp], #8 + add ip, a3, #2 + str ip, [sp, #(8 + 32 - 8)] + + add ip, a1, v1 + sub v1, a1, v1 + add a1, a2, v2 + sub v2, a2, v2 + ldrb a1, [lr, a1, asr #COL_SHIFT] + ldrb ip, [lr, ip, asr #COL_SHIFT] + ldrb v2, [lr, v2, asr #COL_SHIFT] + ldrb v1, [lr, v1, asr #COL_SHIFT] + orr ip, ip, a1, asl #8 + ldrd a1, [sp], #8 + orr v1, v1, v2, asl #8 + strh ip, [a3], a4 + + add ip, a1, v3 + sub v3, a1, v3 + add a1, a2, v4 + sub v4, a2, v4 + ldrb a1, [lr, a1, asr #COL_SHIFT] + ldrb ip, [lr, ip, asr #COL_SHIFT] + ldrb v4, [lr, v4, asr #COL_SHIFT] + ldrb v3, [lr, v3, asr #COL_SHIFT] + orr ip, ip, a1, asl #8 + ldrd a1, [sp], #8 + orr v3, v3, v4, asl #8 + strh ip, [a3], a4 + + add ip, a1, v5 + sub v5, a1, v5 + add a1, a2, v6 + sub v6, a2, v6 + ldrb a1, [lr, a1, asr #COL_SHIFT] + ldrb ip, [lr, ip, asr #COL_SHIFT] + ldrb v6, [lr, v6, asr #COL_SHIFT] + ldrb v5, [lr, v5, asr #COL_SHIFT] + orr ip, ip, a1, asl #8 + ldrd a1, [sp], #8 + orr v5, v5, v6, asl #8 + strh ip, [a3], a4 + + add ip, a1, v7 + sub v7, a1, v7 + add a1, a2, v8 + sub v8, a2, v8 + ldrb a1, [lr, a1, asr #COL_SHIFT] + ldrb ip, [lr, ip, asr #COL_SHIFT] + ldrb v8, [lr, v8, asr #COL_SHIFT] + ldrb v7, [lr, v7, asr #COL_SHIFT] + orr ip, ip, a1, asl #8 + strh ip, [a3], a4 + + ldrd a1, [sp, #0] + orr v7, v7, v8, asl #8 + + strh v7, [a3], a4 + strh v5, [a3], a4 + cmp a1, a2 + strh v3, [a3], a4 + strh v1, [a3], a4 + + bne 1b + + add sp, sp, #20 + ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, v8, pc} + .endfunc + +/******************************************************************************/ + +/* + * a local pool with 64-bit constants for 'simple_idct_add_armv5te' function, we + * align it at 16 byte boundary in order to ensure that it does not cross + * cache line boundary (occupies only a single cache line) + */ + .balign 16 +w2266simple_idct_add_armv5te: + .long W22 + .long W66 +w1357simple_idct_add_armv5te: + .long W13 + .long W57 + + .balign 32 + .global simple_idct_add_armv5te + .type simple_idct_add_armv5te, %function + .func simple_idct_add_armv5te +simple_idct_add_armv5te: + + idct_stackalign_armv5te + + stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, v8, lr} + strd a1, [sp, #-12]! + + mov a1, a3 + bl idct_rows_armv5te + + add a2, a1, #16 + strd a1, [sp, #-8]! + + sub sp, sp, #8 +1: + idct_two_col_armv5te simple_idct_add_armv5te + ldrd a3, [sp, #(8 + 40)] + str a1, [sp, #(0 + 40)] + + ldrd a1, [sp], #8 + add ip, a3, #2 + str ip, [sp, #(8 + 40 - 8)] + + add ip, a1, v1 + sub v1, a1, v1 + add a1, a2, v2 + sub v2, a2, v2 + strd v1, [sp, #(32 - 8)] /* save v1 and v2 to stack in order to use them as temporary registers */ + ldrb v1, [a3, #1] + ldrb v2, [a3] + ldr lr, simple_idct_croptbl_armv5te + add v1, v1, a1, asr #COL_SHIFT + ldrd a1, [sp], #8 + add ip, v2, ip, asr #COL_SHIFT + ldrb v2, [lr, v1] + ldrb ip, [lr, ip] + + add v1, a1, v3 + sub v3, a1, v3 + + ldrb a1, [a3, a4] + orr ip, ip, v2, asl #8 + strh ip, [a3], a4 + + ldrb v2, [a3, #1] + + add ip, a2, v4 + sub v4, a2, v4 + add ip, v2, ip, asr #COL_SHIFT + add v1, a1, v1, asr #COL_SHIFT + ldrb v2, [lr, ip] + ldrb ip, [lr, v1] + ldrb v1, [a3, a4] + ldrd a1, [sp], #8 + orr ip, ip, v2, asl #8 + strh ip, [a3], a4 + + ldrb v2, [a3, #1] + add ip, a1, v5 + sub v5, a1, v5 + add a1, a2, v6 + sub v6, a2, v6 + add a1, v2, a1, asr #COL_SHIFT + add ip, v1, ip, asr #COL_SHIFT + ldrb v2, [lr, a1] + ldrb ip, [lr, ip] + ldrb v1, [a3, a4] + ldrd a1, [sp], #8 + orr ip, ip, v2, asl #8 + strh ip, [a3], a4 + + ldrb v2, [a3, #1] + add ip, a1, v7 + sub v7, a1, v7 + add a1, a2, v8 + sub v8, a2, v8 + add a1, v2, a1, asr #COL_SHIFT + add ip, v1, ip, asr #COL_SHIFT + ldrb v2, [lr, a1] + ldrb ip, [lr, ip] + ldrb v1, [a3, a4] + add a2, lr, v7, asr #COL_SHIFT + orr ip, ip, v2, asl #8 + strh ip, [a3], a4 + + ldrb v2, [a3, #1] + add v8, lr, v8, asr #COL_SHIFT + mov v7, a3 /* a good news, now we have two more spare registers v7 and v8 */ + ldrb ip, [a2, v1] + ldrb v8, [v8, v2] + ldrb v1, [v7, a4]! + ldrb v2, [v7, #1] + orr ip, ip, v8, asl #8 + strh ip, [a3], a4 + + ldrb a1, [v7, a4]! + ldrb a2, [v7, #1] + + add v6, v2, v6, asr #COL_SHIFT + add v5, v1, v5, asr #COL_SHIFT + ldrb v6, [lr, v6] + ldrb v5, [lr, v5] + ldrd v1, [sp, #0] /* restore v1 and v2 that were saved earlier */ + orr v5, v5, v6, asl #8 + strh v5, [a3], a4 + ldrb v5, [v7, a4]! + ldrb v6, [v7, #1] + + add v4, a2, v4, asr #COL_SHIFT + add v3, a1, v3, asr #COL_SHIFT + ldrb v4, [lr, v4] + ldrb v3, [lr, v3] + + ldrd a1, [sp, #8] + add v2, v6, v2, asr #COL_SHIFT + add v1, v5, v1, asr #COL_SHIFT + ldrb v2, [lr, v2] + ldrb v1, [lr, v1] + cmp a1, a2 + orr v3, v3, v4, asl #8 + strh v3, [a3], a4 + orr v1, v1, v2, asl #8 + strh v1, [a3], a4 + + bne 1b + + add sp, sp, #28 + ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, v8, pc} + .endfunc + +/******************************************************************************/ + +/* + * a local pool with 64-bit constants for 'simple_idct_armv5te' function, we + * align it at 16 byte boundary in order to ensure that it does not cross + * cache line boundary (occupies only a single cache line) + */ + .balign 16 +w2266simple_idct_armv5te: + .long W22 + .long W66 +w1357simple_idct_armv5te: + .long W13 + .long W57 + + .balign 32 + .global simple_idct_armv5te + .type simple_idct_armv5te, %function + .func simple_idct_armv5te +simple_idct_armv5te: + + idct_stackalign_armv5te + + stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, v8, lr} + strd a1, [sp, #-12]! + + bl idct_rows_armv5te + + add a2, a1, #16 + str a2, [sp, #-8]! +1: + idct_two_col_armv5te simple_idct_armv5te + + ldr lr, [sp, #32] + + ldrd a3, [sp], #8 + + cmp lr, a1 + + add a2, a3, v1 + add ip, a4, v2 + mov a2, a2, asr #COL_SHIFT + mov ip, ip, asr #COL_SHIFT + strh a2, [a1, #(16*0 - 4)] + strh ip, [a1, #(16*0 + 2 - 4)] + sub a2, a3, v1 + sub ip, a4, v2 + ldrd a3, [sp], #8 + mov a2, a2, asr #COL_SHIFT + mov ip, ip, asr #COL_SHIFT + strh a2, [a1, #(16*7 - 4)] + strh ip, [a1, #(16*7 + 2 - 4)] + + add a2, a3, v3 + add ip, a4, v4 + mov a2, a2, asr #COL_SHIFT + mov ip, ip, asr #COL_SHIFT + strh a2, [a1, #(16*1 - 4)] + strh ip, [a1, #(16*1 + 2 - 4)] + sub a2, a3, v3 + sub ip, a4, v4 + ldrd a3, [sp], #8 + mov a2, a2, asr #COL_SHIFT + mov ip, ip, asr #COL_SHIFT + strh a2, [a1, #(16*6 - 4)] + strh ip, [a1, #(16*6 + 2 - 4)] + + add a2, a3, v5 + add ip, a4, v6 + mov a2, a2, asr #COL_SHIFT + mov ip, ip, asr #COL_SHIFT + strh a2, [a1, #(16*2 - 4)] + strh ip, [a1, #(16*2 + 2 - 4)] + sub a2, a3, v5 + sub ip, a4, v6 + ldrd a3, [sp], #8 + mov a2, a2, asr #COL_SHIFT + mov ip, ip, asr #COL_SHIFT + strh a2, [a1, #(16*5 - 4)] + strh ip, [a1, #(16*5 + 2 - 4)] + + add a2, a3, v7 + add ip, a4, v8 + mov a2, a2, asr #COL_SHIFT + mov ip, ip, asr #COL_SHIFT + strh a2, [a1, #(16*3 - 4)] + strh ip, [a1, #(16*3 + 2 - 4)] + sub a2, a3, v7 + sub ip, a4, v8 + mov a2, a2, asr #COL_SHIFT + mov ip, ip, asr #COL_SHIFT + strh a2, [a1, #(16*4 - 4)] + strh ip, [a1, #(16*4 + 2 - 4)] + + bne 1b + + add sp, sp, #20 + ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, v8, pc} + .endfunc + +/******************************************************************************/ + +unaligned_return_thunk_armv5te: + ldr pc, [sp], #4 diff --git a/recipes/mplayer/files/vo_omapfb.c b/recipes/mplayer/files/vo_omapfb.c new file mode 100644 index 0000000000..5a43404300 --- /dev/null +++ b/recipes/mplayer/files/vo_omapfb.c @@ -0,0 +1,586 @@ +/* + +Copyright (C) 2008 Gregoire Gentil <gregoire@gentil.com> +This file adds an optimized vo output to mplayer for the OMAP platform. This is a first pass and an attempt to help to improve +media playing on the OMAP platform. The usual disclaimer comes here: this code is provided without any warranty. +Many bugs and issues still exist. Feed-back is welcome. + +This output uses the yuv420_to_yuv422 conversion from Mans Rullgard, and is heavily inspired from the work of Siarhei Siamashka. +I would like to thank those two persons here, without them this code would certainly not exist. + +Two options of the output are available: +fb_overlay_only (disabled by default): only the overlay is drawn. X11 stuff is ignored. +dbl_buffer (disabled by default): add double buffering. Some tearsync flags are probably missing in the code. + +Syntax is the following: +mplayer -ao alsa -vo omapfb /test.avi +mplayer -nosound -vo omapfb:fb_overlay_only:dbl_buffer /test.avi + +You need to have two planes on your system. On beagleboard, it means something like: video=omapfb:vram:2M,vram:4M + +Known issues: +1) A green line or some vertical lines (if mplayer decides to draw bands instead of frame) may appear. +It's an interpolation bug in the color conversion that needs to be fixed + +2) The color conversion accepts only 16-pixel multiple for width and height. + +3) The scaling down is disabled as the scaling down kernel patch for the OMAP3 platform doesn't seem to work yet. + + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <unistd.h> +#include <errno.h> + +#include <sys/mman.h> +#include <sys/ioctl.h> +#include <linux/fb.h> + +#include "config.h" +#include "video_out.h" +#include "video_out_internal.h" +#include "fastmemcpy.h" +#include "sub.h" +#include "mp_msg.h" + +#include "omapfb.h" + +#include "libswscale/swscale.h" +#include "libmpcodecs/vf_scale.h" +#include "libavcodec/avcodec.h" + +#include "aspect.h" + +#include "subopt-helper.h" + +#include <X11/Xlib.h> +#include <X11/Xutil.h> +#include <X11/Xatom.h> +#include "wskeys.h" + +static vo_info_t info = { + "omapfb video driver", + "omapfb", + "", + "" +}; + +LIBVO_EXTERN(omapfb) + +static int fb_overlay_only = 0; // if set, we need only framebuffer overlay, but do not need any x11 code +static int dbl_buffer = 0; +static int fullscreen_flag = 0; +static int plane_ready = 0; + +extern void yuv420_to_yuv422(uint8_t *yuv, uint8_t *y, uint8_t *u, uint8_t *v, int w, int h, int yw, int cw, int dw); +static struct fb_var_screeninfo sinfo_p0; +static struct fb_var_screeninfo sinfo; +static struct omapfb_mem_info minfo; +static struct omapfb_plane_info pinfo; +static struct { + unsigned x; + unsigned y; + uint8_t *buf; +} fb_pages[2]; +static int dev_fd = -1; +static int fb_page_flip = 0; +static int page = 0; +static void omapfb_update(int x, int y, int out_w, int out_h, int show); + +extern void mplayer_put_key( int code ); +#include "osdep/keycodes.h" + +#define TRANSPARENT_COLOR_KEY 0xff0 + +static Display *display = NULL; // pointer to X Display structure. +static int screen_num; // number of screen to place the window on. +static Window win = 0; +static Window parent = 0; // pointer to the newly created window. + +/* This is used to intercept window closing requests. */ +static Atom wm_delete_window; + +/** + * Function to get the offset to be used when in windowed mode + * or when using -wid option + */ +static void x11_get_window_abs_position(Display *display, Window window, + int *wx, int *wy, int *ww, int *wh) +{ + Window root, parent; + Window *child; + unsigned int n_children; + XWindowAttributes attribs; + + /* Get window attributes */ + XGetWindowAttributes(display, window, &attribs); + + /* Get relative position of given window */ + *wx = attribs.x; + *wy = attribs.y; + if (ww) + *ww = attribs.width; + if (wh) + *wh = attribs.height; + + /* Query window tree information */ + XQueryTree(display, window, &root, &parent, &child, &n_children); + if (parent) + { + int x, y; + /* If we have a parent we must go there and discover his position*/ + x11_get_window_abs_position(display, parent, &x, &y, NULL, NULL); + *wx += x; + *wy += y; + } + + /* If we had children, free it */ + if(n_children) + XFree(child); +} + + +/** + * Function that controls fullscreen state for x11 window + * action = 1 (set fullscreen) + * action = 0 (set windowed mode) + */ +static void x11_set_fullscreen_state(Display *display, Window window, int action) +{ + XEvent xev; + + /* init X event structure for _NET_WM_FULLSCREEN client msg */ + xev.xclient.type = ClientMessage; + xev.xclient.serial = 0; + xev.xclient.send_event = True; + xev.xclient.message_type = XInternAtom(display, "_NET_WM_STATE", False); + xev.xclient.window = window; + xev.xclient.format = 32; + xev.xclient.data.l[0] = action; + xev.xclient.data.l[1] = XInternAtom(display, "_NET_WM_STATE_FULLSCREEN", False); + xev.xclient.data.l[2] = 0; + xev.xclient.data.l[3] = 0; + xev.xclient.data.l[4] = 0; + + /* finally send that damn thing */ + if (!XSendEvent(display, DefaultRootWindow(display), False, SubstructureRedirectMask | SubstructureNotifyMask, &xev)) { + mp_msg(MSGT_VO, MSGL_ERR, "[omapfb] failure in x11_set_fullscreen_state\n"); + exit(1); + } + XSync(display, False); +} + + +XClassHint classhint = {"mediaplayer-ui", "mediaplayer-ui"}; + + +/** + * Initialize x11 window (it is used to allocate some screen area for framebuffer overlay) + */ +static void x11_init() +{ + display = XOpenDisplay(getenv("DISPLAY")); + if (display == NULL) { + mp_msg(MSGT_VO, MSGL_ERR, "[omapfb] failure in x11_init, can't open display\n"); + exit(1); + } + + screen_num = DefaultScreen(display); + + if (WinID > 0) + { + Window root; + Window *child; + unsigned int n_children; + + win = WinID; + + /* Query window tree information */ + XQueryTree(display, win, &root, &parent, &child, &n_children); + if (n_children) + XFree(child); + + XUnmapWindow(display, win); + if (parent) + XSelectInput(display, parent, StructureNotifyMask); + XMapWindow(display, win); + + wm_delete_window = XInternAtom(display, "WM_DELETE_WINDOW", False); + XSetWMProtocols(display, win, &wm_delete_window, 1); + } else { + win = XCreateSimpleWindow(display, RootWindow(display, screen_num), + sinfo_p0.xres / 2 - sinfo.xres / 2, sinfo_p0.yres / 2 - sinfo.yres / 2, sinfo.xres, sinfo.yres, 0, + WhitePixel(display, screen_num), + TRANSPARENT_COLOR_KEY); + + XSetClassHint(display, win, &classhint); + + XStoreName(display, win, "MPlayer"); + XMapWindow(display, win); + + /* Set WM_DELETE_WINDOW atom in WM_PROTOCOLS property (to get window_delete requests). */ + wm_delete_window = XInternAtom(display, "WM_DELETE_WINDOW", False); + XSetWMProtocols(display, win, &wm_delete_window, 1); + XSelectInput(display, win, StructureNotifyMask | KeyPressMask); + } +} + + +void print_properties(Window win2) +{ + Atom *p; + int num, j; + char *aname; + Atom type; + int format; + unsigned long nitems, bytes_after; + unsigned char *ret = NULL; + + p = XListProperties(display, win2, &num); + printf("found %d properties for window %d\n", num, (int)win2); + for (j = 0; j < num; j++) { + aname = XGetAtomName(display, p[j]); + if (aname) { + if(Success == XGetWindowProperty(display, win2, XInternAtom(display, aname, False), + 0L, ~0L, False, XA_STRING, + &type, &format, &nitems, + &bytes_after, &ret)) + { +/* printf("format = %d, nitems = %d, bytes_after = %d\n", format, nitems, bytes_after);*/ + printf("%s = %s\n", aname, ret); + XFree(ret); + } + XFree(aname); + } else printf("NULL\n"); + } + XFree(p); +} + + +static int x11_check_events() +{ + if (!display) { + mp_msg(MSGT_VO, MSGL_ERR, "[omapfb] 'x11_check_events' called out of sequence\n"); + exit(1); + } + + int ret = 0; + XEvent Event; + while (XPending(display)) { + XNextEvent(display, &Event); + if (Event.type == UnmapNotify) + omapfb_update(0, 0, 0, 0, 0); + else if ((Event.type == MapNotify) || (Event.type == ConfigureNotify)) + omapfb_update(0, 0, 0, 0, 1); + else if (Event.type == KeyPress) { + int key; + KeySym keySym = XKeycodeToKeysym(display, Event.xkey.keycode, 0); + key = ((keySym & 0xff00) != 0 ? ((keySym & 0x00ff) + 256) : (keySym)); + ret |= VO_EVENT_KEYPRESS; + vo_x11_putkey(key); + } else if (Event.type == ClientMessage) { + if ((Atom)Event.xclient.data.l[0] == wm_delete_window) { + mplayer_put_key(KEY_ESC); + } + } + } + return ret; +} + + +static void x11_uninit() +{ + if (display) { + XCloseDisplay(display); + display = NULL; + } +} + + +/** + * Initialize framebuffer + */ +static int preinit(const char *arg) +{ + + opt_t subopts[] = { + {"fb_overlay_only", OPT_ARG_BOOL, &fb_overlay_only, NULL}, + {"dbl_buffer", OPT_ARG_BOOL, &dbl_buffer, NULL}, + {NULL} + }; + + if (subopt_parse(arg, subopts) != 0) { + mp_msg(MSGT_VO, MSGL_FATAL, "[omapfb] unknown suboptions: %s\n", arg); + return -1; + } + + dev_fd = open("/dev/fb0", O_RDWR); + + if (dev_fd == -1) { + mp_msg(MSGT_VO, MSGL_FATAL, "[omapfb] Error /dev/fb0\n"); + return -1; + } + + ioctl(dev_fd, FBIOGET_VSCREENINFO, &sinfo_p0); + close(dev_fd); + + dev_fd = open("/dev/fb1", O_RDWR); + + if (dev_fd == -1) { + mp_msg(MSGT_VO, MSGL_FATAL, "[omapfb] Error /dev/fb1\n"); + return -1; + } + + ioctl(dev_fd, FBIOGET_VSCREENINFO, &sinfo); + ioctl(dev_fd, OMAPFB_QUERY_PLANE, &pinfo); + ioctl(dev_fd, OMAPFB_QUERY_MEM, &minfo); + + if (!fb_overlay_only) + x11_init(); + + return 0; +} + + +static void omapfb_update(int x, int y, int out_w, int out_h, int show) +{ + if (!fb_overlay_only) + x11_get_window_abs_position(display, win, &x, &y, &out_w, &out_h); + + if ((x < 0) || (y < 0) + +// If you develop the right scaling-down patch in kernel, uncomment the line below and comment the next one +// || (out_w < sinfo.xres / 4) || (out_h < sinfo.yres / 4) + || (out_w < sinfo.xres) || (out_h < sinfo.yres) + +// If you don't have the right scaling-up patch in kernel, comment the line below and uncomment the next one +/* Kernel patch to enable scaling up on the omap3 +====================================================== +--- a/drivers/video/omap/dispc.c 2008-11-01 20:08:04.000000000 -0700 ++++ b/drivers/video/omap/dispc.c 2008-11-01 20:09:02.000000000 -0700 +@@ -523,9 +523,6 @@ + if ((unsigned)plane > OMAPFB_PLANE_NUM) + return -ENODEV; + +- if (out_width != orig_width || out_height != orig_height) +- return -EINVAL; +- + enable_lcd_clocks(1); + if (orig_width < out_width) { + /* +====================================================== +*/ + || (out_w > sinfo.xres * 8) || (out_h > sinfo.yres * 8) +// || (out_w > sinfo.xres) || (out_h > sinfo.yres) + + || (x + out_w > sinfo_p0.xres) || (y + out_h > sinfo_p0.yres)) { + pinfo.enabled = 0; + pinfo.pos_x = 0; + pinfo.pos_y = 0; + ioctl(dev_fd, OMAPFB_SETUP_PLANE, &pinfo); + return; + } + + pinfo.enabled = show; + pinfo.pos_x = x; + pinfo.pos_y = y; + pinfo.out_width = out_w; + pinfo.out_height = out_h; + ioctl(dev_fd, OMAPFB_SETUP_PLANE, &pinfo); +} + + +static int config(uint32_t width, uint32_t height, uint32_t d_width, + uint32_t d_height, uint32_t flags, char *title, + uint32_t format) +{ + uint8_t *fbmem; + int i; + struct omapfb_color_key color_key; + + fullscreen_flag = flags & VOFLAG_FULLSCREEN; + + fbmem = mmap(NULL, minfo.size, PROT_READ|PROT_WRITE, MAP_SHARED, dev_fd, 0); + if (fbmem == MAP_FAILED) { + mp_msg(MSGT_VO, MSGL_FATAL, "[omapfb] Error mmap\n"); + return -1; + } + + for (i = 0; i < minfo.size / 4; i++) + ((uint32_t*)fbmem)[i] = 0x80008000; + + sinfo.xres = FFMIN(sinfo_p0.xres, width) & ~15; + sinfo.yres = FFMIN(sinfo_p0.yres, height) & ~15; + sinfo.xoffset = 0; + sinfo.yoffset = 0; + sinfo.nonstd = OMAPFB_COLOR_YUY422; + + fb_pages[0].x = 0; + fb_pages[0].y = 0; + fb_pages[0].buf = fbmem; + + if (dbl_buffer && minfo.size >= sinfo.xres * sinfo.yres * 2) { + sinfo.xres_virtual = sinfo.xres; + sinfo.yres_virtual = sinfo.yres * 2; + fb_pages[1].x = 0; + fb_pages[1].y = sinfo.yres; + fb_pages[1].buf = fbmem + sinfo.xres * sinfo.yres * 2; + fb_page_flip = 1; + } else { + sinfo.xres_virtual = sinfo.xres; + sinfo.yres_virtual = sinfo.yres; + fb_page_flip = 0; + } + + ioctl(dev_fd, FBIOPUT_VSCREENINFO, &sinfo); + + if (WinID <= 0) { + if (fullscreen_flag) { + if (!fb_overlay_only) + x11_set_fullscreen_state(display, win, 1); + omapfb_update(0, 0, sinfo_p0.xres, sinfo_p0.yres, 1); + } else { + if (!fb_overlay_only) + x11_set_fullscreen_state(display, win, 0); + omapfb_update(sinfo_p0.xres / 2 - sinfo.xres / 2, sinfo_p0.yres / 2 - sinfo.yres / 2, sinfo.xres, sinfo.yres, 1); + } + } + + color_key.channel_out = OMAPFB_CHANNEL_OUT_LCD; + color_key.background = 0x0; + color_key.trans_key = TRANSPARENT_COLOR_KEY; + if (fb_overlay_only) + color_key.key_type = OMAPFB_COLOR_KEY_DISABLED; + else + color_key.key_type = OMAPFB_COLOR_KEY_GFX_DST; + ioctl(dev_fd, OMAPFB_SET_COLOR_KEY, &color_key); + + plane_ready = 1; + return 0; +} + + +static void draw_alpha(int x0, int y0, int w, int h, unsigned char *src, unsigned char *srca, int stride) +{ + vo_draw_alpha_yuy2(w, h, src, srca, stride, fb_pages[page].buf + sinfo.xres * y0 * 2 + x0 * 2, sinfo.xres); +} + + +static void draw_osd(void) +{ + vo_draw_text(sinfo.xres, sinfo.yres, draw_alpha); +} + + +static int draw_frame(uint8_t *src[]) +{ + return 1; +} + + +static int draw_slice(uint8_t *src[], int stride[], int w, int h, int x, int y) +{ + if (x!=0) + return 0; + + if (!plane_ready) + return 0; + + ioctl(dev_fd, OMAPFB_SYNC_GFX); + + yuv420_to_yuv422(fb_pages[page].buf + 2 * sinfo.xres * y, src[0], src[1], src[2], w & ~15, h, stride[0], stride[1], 2 * sinfo.xres_virtual); + + return 0; +} + + +static void flip_page(void) +{ + if (fb_page_flip) { + sinfo.xoffset = fb_pages[page].x; + sinfo.yoffset = fb_pages[page].y; + ioctl(dev_fd, FBIOPAN_DISPLAY, &sinfo); + page ^= fb_page_flip; + } +} + + +static int query_format(uint32_t format) +{ + // For simplicity pretend that we can only do YV12, support for + // other formats can be added quite easily if/when needed + if (format != IMGFMT_YV12) + return 0; + + return VFCAP_CSP_SUPPORTED | VFCAP_CSP_SUPPORTED_BY_HW | VFCAP_OSD | VFCAP_SWSCALE | VFCAP_ACCEPT_STRIDE; +} + + +/** + * Uninitialize framebuffer + */ +static void uninit() +{ + pinfo.enabled = 0; + ioctl(dev_fd, OMAPFB_SETUP_PLANE, &pinfo); + + if (!fb_overlay_only) { + struct omapfb_color_key color_key; + color_key.channel_out = OMAPFB_CHANNEL_OUT_LCD; + color_key.key_type = OMAPFB_COLOR_KEY_DISABLED; + ioctl(dev_fd, OMAPFB_SET_COLOR_KEY, &color_key); + } + + close(dev_fd); + + if (!fb_overlay_only) + x11_uninit(); +} + + +static int control(uint32_t request, void *data, ...) +{ + switch (request) { + case VOCTRL_QUERY_FORMAT: + return query_format(*((uint32_t*)data)); + case VOCTRL_FULLSCREEN: { + if (WinID > 0) return VO_FALSE; + if (fullscreen_flag) { + if (!fb_overlay_only) + x11_set_fullscreen_state(display, win, 0); + fullscreen_flag = 0; + omapfb_update(sinfo_p0.xres / 2 - sinfo.xres / 2, sinfo_p0.yres / 2 - sinfo.yres / 2, sinfo.xres, sinfo.yres, 1); + } else { + if (!fb_overlay_only) + x11_set_fullscreen_state(display, win, 1); + fullscreen_flag = 1; + omapfb_update(0, 0, sinfo_p0.xres, sinfo_p0.yres, 1); + } + return VO_TRUE; + } + } + return VO_NOTIMPL; +} + + +static void check_events(void) +{ + if (!fb_overlay_only) + x11_check_events(); +} diff --git a/recipes/mplayer/files/vo_pxa.c b/recipes/mplayer/files/vo_pxa.c new file mode 100644 index 0000000000..1488d14064 --- /dev/null +++ b/recipes/mplayer/files/vo_pxa.c @@ -0,0 +1,980 @@ +/* + * Video driver for PXA 27x Overlay 2, in conjunction with kernel driver + * by Tim Chick <tim (DOT) chick (AT) csr (DOT) com> + * (C) 2007 + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "config.h" +#include "video_out.h" +#include "video_out_internal.h" +#include "sub.h" +#include "aspect.h" +#include "mp_msg.h" +#include "subopt-helper.h" + +#include "vo_pxa.h" + +static vo_info_t info = { + "PXA 27x Framebuffer", + "pxa", + "Tim Chick <tim (DOT) chick (AT) csr (DOT) com>", + "For Sharp Zaurus SL-C1000 etc" +}; + +LIBVO_EXTERN(pxa); + +static pxa_priv_t st_pxa_priv; + +/***************************************************************************** + * preinit + * + * Preinitializes driver + * arg - currently it's vo_subdevice + * returns: zero on successful initialization, non-zero on error. + * + ****************************************************************************/ +static int preinit(const char *vo_subdevice) +{ + pxa_priv_t *priv = &st_pxa_priv; + int rc; + + mp_msg(MSGT_VO, MSGL_V, "vo_pxa: preinit() was called\n"); + + /* fill pxa_priv_t information */ + memset(priv, 0, sizeof(*priv)); + priv->fd = -1; + + /* We need to open the base framebuffer device, to change and restore modes */ + priv->base_fd = open( "/dev/fb0", O_RDWR ); + + if( priv->base_fd < 0 ) + { + mp_msg( MSGT_VO, MSGL_ERR, "vo_pxa: Could not open base framebuffer device\n"); + return -1; + } + + /* Get the base fb var data, so we can restore if we change video modes */ + rc = ioctl( priv->base_fd, FBIOGET_VSCREENINFO, &(priv->base_orig_fb_var) ); + + if( rc == -1 ) + { + mp_msg( MSGT_VO, MSGL_ERR, "vo_pxa: FBIOGET_VSCREENINFO preinit base_fd failed %d\n", + errno ); + + /* If this failed, close down the FD so we don't try to set this again */ + close( priv->base_fd ); + priv->base_fd = -1; + + return -1; + } + + return 0; +} + + +/***************************************************************************** + * config + * + * Config the display driver. + * params: + * src_width,srcheight: image source size + * dst_width,dst_height: size of the requested window size, just a hint + * fullscreen: flag, 0=windowd 1=fullscreen, just a hint + * title: window title, if available + * format: fourcc of pixel format + * returns : zero on successful initialization, non-zero on error. + * + ****************************************************************************/ +static int config(uint32_t src_width, uint32_t src_height, + uint32_t dst_width, uint32_t dst_height, uint32_t flags, + char *title, uint32_t format) +{ + pxa_priv_t *priv = &st_pxa_priv; + int rc; + int i; + + mp_msg(MSGT_VO, MSGL_V, "vo_pxa: config() src_width:%d, src_height:%d, dst_width:%d, dst_height:%d\n", + src_width, src_height, dst_width, dst_height); + + /* Check format */ + if( !vo_pxa_query_format(format) ) + { + mp_msg( MSGT_VO, MSGL_ERR, "vo_pxa: unsupported fourcc for this driver: %x (%s)\n", + format, vo_format_name(format) ); + goto err_out; + } + priv->format = format; + + /* Change resolution? */ + priv->vm = flags & VOFLAG_MODESWITCHING; + if( priv->vm ) + { + priv->my_fb_var = priv->base_orig_fb_var; + + /* Hard coded values suck, never mind */ + priv->my_fb_var.xres = 240; + priv->my_fb_var.yres = 320; + priv->my_fb_var.pixclock = 134617; + priv->my_fb_var.left_margin = 20; + priv->my_fb_var.right_margin = 46; + priv->my_fb_var.upper_margin = 1; + priv->my_fb_var.lower_margin = 0; + priv->my_fb_var.hsync_len = 20; + priv->my_fb_var.vsync_len = 2; + + rc = ioctl( priv->base_fd, FBIOPUT_VSCREENINFO, &(priv->my_fb_var) ); + + if( rc == -1 ) + { + mp_msg( MSGT_VO, MSGL_ERR, "vo_pxa: config() Set FBIOPUT_VSCREENINFO on base_fd failed %d\n", + errno ); + priv->vm = 0; + goto err_out; + } + + /* We need this sleep, to make the change in resolution actually happen, before we open the overlay */ + sleep(1); + } + + + /* Open up the overlay fbdev */ + priv->fd = open( "/dev/fb2", O_RDWR ); + + if( priv->fd < 0 ) + { + mp_msg( MSGT_VO, MSGL_ERR, "vo_pxa: Could not open /dev/fb2: %d\n", errno ); + goto err_out; + } + + /* Read in fb var data */ + rc = ioctl( priv->fd, FBIOGET_VSCREENINFO, &(priv->my_fb_var) ); + + if( rc == -1 ) + { + mp_msg( MSGT_VO, MSGL_ERR, "vo_pxa: config() FBIOGET_VSCREENINFO from fd failed %d\n", + errno ); + goto err_out; + } + + /* Store away the source dimensions, so we can place in centre of screen later in vm mode */ + priv->src_width = src_width; + priv->src_height = src_height; + + /* Set up the buffer */ + if( priv->vm ) + { + /* Ignore size, as the rest of the screen is toast. Use max size */ + priv->my_fb_var.xres = 240; + priv->my_fb_var.yres = 320; + + /* Do we need to rotate? */ + if( priv->src_width > priv->src_height ) + { + /* Yes */ + priv->rotate = 1; + } + + priv->width = 240; + priv->height = 320; + } + else + { + priv->my_fb_var.xres = src_width; + priv->my_fb_var.yres = src_height; + priv->width = src_width; + priv->height = src_height; + } + + priv->my_fb_var.nonstd = ( 4 << 20) /* Format YV12 */ + | ( 0 << 0) /* x position */ + | ( 0 << 10); /* y position */ + /* We have to set the bits per pixel to a valid value, even though it is + * incorrect for YV12 + */ + priv->my_fb_var.bits_per_pixel = 16; + + rc = ioctl( priv->fd, FBIOPUT_VSCREENINFO, &(priv->my_fb_var) ); + + if( rc == -1 ) + { + mp_msg( MSGT_VO, MSGL_ERR, "vo_pxa: config() FBIOPUT_VSCREENINFO to fd failed: %d\n", + errno ); + goto err_out; + } + + /* Next get the fixed fbvars, so we can mmap the data for all 3 planes */ + rc = ioctl( priv->fd, FBIOGET_FSCREENINFO, &(priv->my_fb_fix) ); + + if( rc == -1 ) + { + mp_msg( MSGT_VO, MSGL_ERR, "vo_pxa: config() FBIOGET_FSCREENINFO from fd failed: %d\n", + errno ); + goto err_out; + } + + priv->fb_mem_base = mmap( NULL, priv->my_fb_fix.smem_len, (PROT_READ | PROT_WRITE ), + MAP_SHARED, + priv->fd, + 0 ); + + if( priv->fb_mem_base == MAP_FAILED ) + { + mp_msg( MSGT_VO, MSGL_ERR, "vo_pxa: mmap fd buffer failed: %d\n", errno ); + goto err_out; + } + + /* Finally, find the offsets of each plane by getting the var data again */ + rc = ioctl( priv->fd, FBIOGET_VSCREENINFO, &(priv->my_fb_var) ); + + if( rc == -1 ) + { + mp_msg( MSGT_VO, MSGL_ERR, "vo_pxa: config() FBIOGET_VSCREENINFO from fd (2) failed %d\n", + errno ); + goto err_out; + } + + /* Fill the overlay with black */ + memset( priv->fb_mem_base + priv->my_fb_var.red.offset, 16, priv->my_fb_var.red.length ); + memset( priv->fb_mem_base + priv->my_fb_var.green.offset, 128, priv->my_fb_var.green.length ); + memset( priv->fb_mem_base + priv->my_fb_var.blue.offset, 128, priv->my_fb_var.blue.length ); + + /* Now open the OSD overlay - overlay 1, and fill with transparent */ + sleep( 1 ); + + priv->overlay_fd = open( "/dev/fb1", O_RDWR ); + + if( priv->overlay_fd < 0 ) + { + mp_msg( MSGT_VO, MSGL_ERR, "vo_pxa: Could not open /dev/fb1: %d\n", errno ); + goto err_out; + } + + /* Read in fb var data */ + rc = ioctl( priv->overlay_fd, FBIOGET_VSCREENINFO, &(priv->osd_fb_var) ); + + if( rc == -1 ) + { + mp_msg( MSGT_VO, MSGL_ERR, "vo_pxa: config() FBIOGET_VSCREENINFO from overlay_fd failed; %d\n", + errno ); + goto err_out; + } + + priv->osd_fb_var.xres = priv->width; + priv->osd_fb_var.yres = priv->height; + priv->osd_fb_var.nonstd = ( 0 << 0) /* x position */ + | ( 0 << 10); /* y position */ + /* Use 15 bit mode, with top bit transparency */ + priv->osd_fb_var.bits_per_pixel = 16; + + rc = ioctl( priv->overlay_fd, FBIOPUT_VSCREENINFO, &(priv->osd_fb_var) ); + + if( rc == -1 ) + { + mp_msg( MSGT_VO, MSGL_ERR, "vo_pxa: config() FBIOPUT_VSCREENINFO to overlay_fd failed: %d\n", + errno ); + goto err_out; + } + + /* Next get the fixed fbvars, so we can mmap the data */ + rc = ioctl( priv->overlay_fd, FBIOGET_FSCREENINFO, &(priv->osd_fb_fix) ); + + if( rc == -1 ) + { + mp_msg( MSGT_VO, MSGL_ERR, "vo_pxa: config() FBIOGET_FSCREENINFO from overlay_fd failed %d\n", + errno ); + goto err_out; + } + + priv->osd_mem_base = mmap( NULL, priv->osd_fb_fix.smem_len, (PROT_READ | PROT_WRITE ), + MAP_SHARED, + priv->overlay_fd, + 0 ); + + if( priv->osd_mem_base == MAP_FAILED ) + { + mp_msg( MSGT_VO, MSGL_ERR, "vo_pxa: mmap osd_mem_base failed: %d\n", errno ); + goto err_out; + } + + /* Fill the overlay with transparent */ + vo_pxa_clear_osd( priv->osd_mem_base, priv->osd_fb_fix.smem_len ); + + /* We are good to go! */ + mp_msg( MSGT_VO, MSGL_V, "vo_pxa: Opened video overlay %d x %d fourcc %s\n", + priv->my_fb_var.xres, + priv->my_fb_var.yres, + vo_format_name(format) ); + + return 0; + + err_out: + + /* Don't do anything here for the moment */ + return -1; +} + + +/***************************************************************************** + * + * control + * + * Control display + * + ****************************************************************************/ +static int control(uint32_t request, void *data, ...) +{ + mp_msg(MSGT_VO, MSGL_V, "vo_pxa: control %08x\n", request ); + + switch( request ) + { + case VOCTRL_QUERY_FORMAT: + return( vo_pxa_query_format( *(uint32_t *)data ) ); + break; + } + + return VO_NOTIMPL; +} + + +/***************************************************************************** + * + * draw_frame + * + * Display a new RGB/BGR frame of the video to the screen. + * params: + * src[0] - pointer to the image + * + ****************************************************************************/ +int draw_frame(uint8_t *src[]) +{ + /* This is not implimented */ + mp_msg(MSGT_VO, MSGL_ERR, "vo_pxa: dummy draw_frame() was called\n"); + return -1; +} + + +/***************************************************************************** + * + * draw_slice + * + * Draw a planar YUV slice to the buffer: + * params: + * src[3] = source image planes (Y,U,V) + * stride[3] = source image planes line widths (in bytes) + * w,h = width*height of area to be copied (in Y pixels) + * x,y = position at the destination image (in Y pixels) + * + ****************************************************************************/ +int draw_slice(uint8_t *src[], int stride[], int w,int h, int x,int y) +{ + pxa_priv_t *priv = &st_pxa_priv; + + /* This routine is only display routine actually implimented */ + mp_msg(MSGT_VO, MSGL_V, "vo_pxa: draw_slice() w %d h %d x %d y %d stride %d %d %d\n", + w, h, x, y, stride[0], stride[1], stride[2] ); + + /* It would be faster to check if source and dest have same geometry and copy + * whole block + * For the moment we just copy a line at a time + */ + + /* In vm mode rotate if wider than long */ + if( priv->vm ) + { + /* Do we nee to rotate? */ + if( priv->rotate ) + { + /* Yes, rotated version */ + int dst_x_offset = 0; + int dst_y_offset = 0; + int src_x_offset = 0; + int src_y_offset = 0; + + /* Figure out dst offset */ + if( priv->src_width < 320 ) + { + dst_x_offset = ( ( 320 - priv->src_width ) / 2 ); + /* Make it a multiple of 16 */ + dst_x_offset &= ~(0xf); + } + + if( priv->src_height < 240 ) + { + dst_y_offset = ( ( 240 - priv->src_height ) / 2 ); + /* Make it a multiple of 16 */ + dst_y_offset &= ~(0xf); + } + + dst_x_offset += x; + dst_y_offset += y; + + if( ( dst_x_offset >= 320 ) || ( dst_y_offset >= 240 ) ) + { + /* Nothing to do - drawing off the screen! */ + return( 0 ); + } + + /* Limit to drawable area */ + if( ( w + dst_x_offset ) > 320 ) + { + w = ( 320 - dst_x_offset ); + } + + if( ( h + dst_y_offset ) > 240 ) + { + h = ( 240 - dst_y_offset ); + } + + /* And source offset */ + if( priv->src_width > 320 ) + { + src_x_offset = ( ( priv->src_width - 320 ) / 2 ); + /* Make it a multiple of 16 */ + src_x_offset &= ~(0xf); + } + + if( priv->src_height > 240 ) + { + src_y_offset = ( ( priv->src_height - 240 ) / 2 ); + /* Make it a multiple of 16 */ + src_y_offset &= ~(0xf); + } + + + /* Y first */ + vo_pxa_copy_and_rotate( src[0] + src_x_offset + (src_y_offset * stride[0]), stride[0], + priv->fb_mem_base + priv->my_fb_var.red.offset + (240 * dst_x_offset) + (240 - dst_y_offset - h), + w, h, 240 ); + /* Now U */ + vo_pxa_copy_and_rotate( src[1] + src_x_offset/2 + (src_y_offset/2 * stride[1]), stride[1], + priv->fb_mem_base + priv->my_fb_var.green.offset + (120 * dst_x_offset/2) + (120 - dst_y_offset/2 - h/2), + w/2, h/2, 120 ); + vo_pxa_copy_and_rotate( src[2] + src_x_offset/2 + (src_y_offset/2 * stride[2]), stride[2], + priv->fb_mem_base + priv->my_fb_var.blue.offset + (120 * dst_x_offset/2) + (120 - dst_y_offset/2 - h/2), + w/2, h/2, 120 ); + } + else + { + /* Don't rotate */ + int i; + uint8_t *my_src; + uint8_t *dest; + int dst_x_offset = 0; + int dst_y_offset = 0; + int src_x_offset = 0; + int src_y_offset = 0; + + /* Figure out dst offset */ + if( priv->src_width < 240 ) + { + dst_x_offset = ( ( 240 - priv->src_width ) / 2 ); + /* Make it a multiple of 16 */ + dst_x_offset &= ~(0xf); + } + + if( priv->src_height < 320 ) + { + dst_y_offset = ( ( 320 - priv->src_height ) / 2 ); + /* Make it a multiple of 16 */ + dst_y_offset &= ~(0xf); + } + + dst_x_offset += x; + dst_y_offset += y; + + if( ( dst_x_offset >= 240 ) || ( dst_y_offset >= 320 ) ) + { + /* Nothing to do - drawing off the screen! */ + return( 0 ); + } + + /* Limit to drawable area */ + if( ( w + dst_x_offset ) > 240 ) + { + w = ( 240 - dst_x_offset ); + } + + if( ( h + dst_y_offset ) > 320 ) + { + h = ( 320 - dst_y_offset ); + } + + /* And source offset */ + if( priv->src_width > 240 ) + { + src_x_offset = ( ( priv->src_width - 240 ) / 2 ); + /* Make it a multiple of 16 */ + src_x_offset &= ~(0xf); + } + + if( priv->src_height > 320 ) + { + src_y_offset = ( ( priv->src_height - 320 ) / 2 ); + /* Make it a multiple of 16 */ + src_y_offset &= ~(0xf); + } + + /* First Y */ + for( i = 0; i<h; i++ ) + { + dest = priv->fb_mem_base + + priv->my_fb_var.red.offset + + ( (dst_y_offset+i) * priv->my_fb_fix.line_length ) + + dst_x_offset; + my_src = src[0] + src_x_offset + (stride[0] * (i+src_y_offset)); + memcpy( dest, my_src, w ); + } + + /* Now U */ + for( i = 0; i<(h/2); i++ ) + { + dest = priv->fb_mem_base + + priv->my_fb_var.green.offset + + ( ((dst_y_offset/2)+i) * (priv->my_fb_fix.line_length/2) ) + + dst_x_offset/2; + my_src = src[1] + src_x_offset/2 + (stride[1] * (i+(src_y_offset/2))); + memcpy( dest, my_src, w/2 ); + } + + /* Finaly V */ + for( i = 0; i<(h/2); i++ ) + { + dest = priv->fb_mem_base + + priv->my_fb_var.blue.offset + + ( ((dst_y_offset/2)+i) * (priv->my_fb_fix.line_length/2) ) + + dst_x_offset/2; + my_src = src[2] + src_x_offset/2 + (stride[2] * (i+(src_y_offset/2))); + memcpy( dest, my_src, w/2 ); + } + + } + } + else + { + /* Not full screen mode */ + uint8_t *my_src; + uint8_t *dest; + size_t length; + int i; + + /* It would be faster to check if source and dest have same geometry and copy + * whole block + * For the moment we just copy a line at a time + */ + + /* Limit area written to */ + if( x >= priv->my_fb_fix.line_length ) + { + return 0; + } + + if( w + x > priv->my_fb_fix.line_length ) + { + w = priv->my_fb_fix.line_length - x; + } + + if( y>= priv->my_fb_var.yres ) + { + return 0; + } + + if( h + y > priv->my_fb_var.yres ) + { + h = priv->my_fb_var.yres - y; + } + + /* First Y */ + for( i = 0; i<h; i++ ) + { + dest = priv->fb_mem_base + + priv->my_fb_var.red.offset + + ( (y+i) * priv->my_fb_fix.line_length ) + + x; + my_src = src[0] + stride[0] * i; + memcpy( dest, my_src, w ); + } + + /* Now U */ + for( i = 0; i<(h/2); i++ ) + { + dest = priv->fb_mem_base + + priv->my_fb_var.green.offset + + ( ((y/2)+i) * (priv->my_fb_fix.line_length/2) ) + + x; + my_src = src[1] + stride[1] * i; + memcpy( dest, my_src, w/2 ); + } + + /* Finaly V */ + for( i = 0; i<(h/2); i++ ) + { + dest = priv->fb_mem_base + + priv->my_fb_var.blue.offset + + ( ((y/2)+i) * (priv->my_fb_fix.line_length/2) ) + + x; + my_src = src[2] + stride[2] * i; + memcpy( dest, my_src, w/2 ); + } + } + return 0; +} + +static void draw_osd(void) +{ + pxa_priv_t *priv = &st_pxa_priv; + int osd_has_changed; + + /* This gets called every frame, so systems which do the OSD without a + * seperate overlay can mix in the image. We need to find out if the osd + * has actually been updated! + */ + mp_msg(MSGT_VO, MSGL_V, "vo_pxa: draw_osd() was called\n"); + + osd_has_changed = vo_update_osd( priv->width, priv->height); + + if(osd_has_changed) + { + int i; + + mp_msg(MSGT_VO, MSGL_V, "vo_pxa: Clear and update OSD\n"); + + /* Fill with transparent */ + vo_pxa_clear_osd( priv->osd_mem_base, priv->osd_fb_fix.smem_len ); + + priv->osd_cleared = 1; + + /* now update */ + if( priv->rotate ) + { + vo_draw_text( priv->width, priv->height, vo_pxa_draw_alpha_with_rotate ); + } + else + { + vo_draw_text( priv->width, priv->height, vo_pxa_draw_alpha ); + } + } +} + +/***************************************************************************** + * + * flip_page + * + * Blit/Flip buffer to the screen. Must be called after each frame! + * + * + ****************************************************************************/ +static void flip_page(void) +{ + mp_msg(MSGT_VO, MSGL_V, "vo_pxa: flip_page() was called\n"); +} + +/***************************************************************************** + * + * check_events + * + * + ****************************************************************************/ +static void check_events(void) +{ + mp_msg(MSGT_VO, MSGL_V, "vo_pxa: check_events() was called\n"); +} + +/***************************************************************************** + * + * uninit + * + * + ****************************************************************************/ +static void uninit(void) +{ + pxa_priv_t *priv = &st_pxa_priv; + int rc; + + mp_msg(MSGT_VO, MSGL_V, "vo_pxa: uninit() was called\n"); + + if( priv->vm ) + { + /* We need these sleeps, to make the change in resolution actually happen */ + sleep(1); + + /* Restore original resolution */ + if( priv->base_fd >= 0 ) + { + rc = ioctl( priv->base_fd, FBIOPUT_VSCREENINFO, &(priv->base_orig_fb_var) ); + + if( rc == -1 ) + { + mp_msg( MSGT_VO, MSGL_ERR, "vo_pxa: uninit() FBIOPUT_VSCREENINFO to base_fd failed %d\n", + errno ); + } + } + /* We need these sleeps, to make the change in resolution actually happen */ + /* For some reason, if we change the reolution the overlay buffer never gets deleted? */ + sleep(1); + } + + + /* We need to force the overlays to be really disabled, otherwise they + * will come back as zombies after suspend, resume + * This trick seems to work, but will not be needed once kernel driver + * is fixed + */ + if( priv->fd >= 0 ) + { + rc = ioctl( priv->fd, FBIOGET_VSCREENINFO, &(priv->my_fb_var) ); + + if( rc == -1 ) + { + mp_msg( MSGT_VO, MSGL_ERR, "vo_pxa: uninit() FBIOGET_VSCREENINFO from fd failed %d\n", + errno ); + } + priv->my_fb_var.bits_per_pixel = 0; + + rc = ioctl( priv->fd, FBIOPUT_VSCREENINFO, &(priv->my_fb_var) ); + + if( rc == -1 ) + { + mp_msg( MSGT_VO, MSGL_ERR, "vo_pxa: uninit() FBIOPUT_VSCREENINFO from fd failed %d\n", + errno ); + } + } + + if( priv->overlay_fd >= 0 ) + { + rc = ioctl( priv->overlay_fd, FBIOGET_VSCREENINFO, &(priv->my_fb_var) ); + + if( rc == -1 ) + { + mp_msg( MSGT_VO, MSGL_ERR, "vo_pxa: uninit() FBIOGET_VSCREENINFO from overlay_fd failed %d\n", + errno ); + } + priv->my_fb_var.bits_per_pixel = 0; + + rc = ioctl( priv->overlay_fd, FBIOPUT_VSCREENINFO, &(priv->my_fb_var) ); + + if( rc == -1 ) + { + mp_msg( MSGT_VO, MSGL_ERR, "vo_pxa: uninit() FBIOPUT_VSCREENINFO from overlay_fd failed %d\n", + errno ); + } + } + + if( priv->base_fd >= 0 ) + { + close( priv->base_fd ); + priv->base_fd = -1; + } +} + +/***************************************************************************** + * + * Internal functions, not part of mplayer API + * + ****************************************************************************/ + +static int vo_pxa_query_format( uint32_t format ) +{ + mp_msg(MSGT_VO, MSGL_V, "vo_pxa: vo_pxa_query_format was called: %x (%s)\n", + format, vo_format_name(format)); + + switch (format) + { + /* Planar YUV Formats */ + /* Warning! dropthrough */ + case IMGFMT_YV12: + case IMGFMT_IYUV: + case IMGFMT_I420: + return( VFCAP_CSP_SUPPORTED | VFCAP_CSP_SUPPORTED_BY_HW + | VFCAP_HWSCALE_UP | VFCAP_HWSCALE_DOWN | VFCAP_OSD + | VFCAP_ACCEPT_STRIDE ); + break; + } + + return 0; +} + +static void vo_pxa_copy_and_rotate( uint8_t *src, int stride, uint8_t *dst, int w, int h, int dst_stride ) +{ + int i,j; + uint8_t *my_src, *my_dst; + Vo_Pxa_Pixel_Data8 *img_dst_pixel_data8; + + /* Loop so writing consectuive data in rotated image */ + /* This produces some pretty good assembler - better than the handcoded stuff in w100 */ + for( j=0; j<w; j++ ) + { + my_src = src + j + ( stride * (h - 1) ); + + img_dst_pixel_data8 = (Vo_Pxa_Pixel_Data8 *)dst; + + /* Allow for src not multiple of 8 by running off the end a little. Should not matter */ + for( i=0; i<((h+7)/8); i++ ) + { + register Vo_Pxa_Pixel_Data8 build_pixels; + + build_pixels.a = *my_src; + my_src -= stride; + build_pixels.a |= (*my_src<<8); + my_src -= stride; + build_pixels.a |= (*my_src<<16); + my_src -= stride; + build_pixels.a |= (*my_src<<24); + my_src -= stride; + + build_pixels.b = *my_src; + my_src -= stride; + build_pixels.b |= (*my_src<<8); + my_src -= stride; + build_pixels.b |= (*my_src<<16); + my_src -= stride; + build_pixels.b |= (*my_src<<24); + my_src -= stride; + + *img_dst_pixel_data8++ = build_pixels; + } + + /* Allow source not as big as dest */ + dst += dst_stride; + } +} + +static void vo_pxa_draw_alpha( int x, int y, int w, int h, unsigned char *src, + unsigned char *srca, int stride ) +{ + /* Dump data into our 15bit buffer with transparency */ + pxa_priv_t *priv = &st_pxa_priv; + int i,j; + unsigned char *src_ptr = src; + unsigned char *a_ptr = srca; + unsigned short *out_ptr; + + mp_msg(MSGT_VO, MSGL_V, "vo_pxa: vo_pxa_draw_alpha() w %d y %d w %d h %d\n", x, y, w, h ); + + /* We ignore the alpha channel, other than off or on */ + for( i=0; i<h; i++ ) + { + out_ptr = priv->osd_mem_base + x + ( priv->width * ( y + i ) ); + src_ptr = src + ( i * stride ); + a_ptr = srca + ( i * stride ); + + for( j=0; j<w; j++ ) + { + /* The srca is a 0-255 transpaency level, where 0 is transparent. + * We only support transparent on or off + */ + if( *a_ptr++ ) + { + unsigned int grey; + /* The src is a greylevel from 0 - 255 */ + /* We may as well use this value */ + grey = *src_ptr++ >> 3; + *out_ptr++ = grey | (grey << 5) | (grey<<10); + } + else + { + *out_ptr++ = 0x8000; + src_ptr++; + } + + } + } +} + +static void vo_pxa_draw_alpha_with_rotate( int x, int y, int w, int h, unsigned char *src, + unsigned char *srca, int stride ) +{ + /* Dump data into our 15bit buffer with transparency */ + pxa_priv_t *priv = &st_pxa_priv; + int i,j; + unsigned char *src_ptr = src; + unsigned char *a_ptr = srca; + unsigned short *out_ptr; + + mp_msg(MSGT_VO, MSGL_V, "vo_pxa: vo_pxa_draw_alpha_with_rotate() x %d y %d w %d h %d\n", x, y, w, h ); + + if( x >= 320 ) + { + /* Off the screen */ + return; + } + + /* Limit to size of screen/memory */ + if( ( w + x ) > 320 ) + { + w = 320 - x; + } + + if( y >= 240 ) + { + /* Off the screen */ + return; + } + + /* Limit to size of screen/memory */ + if( ( y + h ) > 240 ) + { + h = 240 - y; + } + + + /* We ignore the alpha channel, other than off or on */ + for( i=0; i<w; i++ ) + { + out_ptr = priv->osd_mem_base + y + ( priv->width * ( x + i ) ); + src_ptr = src + i + ( stride * (h - 1)); + a_ptr = srca + i + ( stride * (h - 1)); + + for( j=0; j<h; j++ ) + { + /* The srca is a 0-255 transpaency level, where 0 is transparent. + * We only support transparent on or off + */ + if( *a_ptr ) + { + unsigned int grey; + /* The src is a greylevel from 0 - 255 */ + /* We may as well use this value */ + grey = *src_ptr >> 3; + *out_ptr++ = grey | (grey << 5) | (grey<<10); + } + else + { + *out_ptr++ = 0x8000; + src_ptr; + } + a_ptr -= stride; + src_ptr -= stride; + } + } +} + +static void vo_pxa_clear_osd( uint16_t *mem_base, int len ) +{ + /* fill whole area with 0x8000 -> trsnaparent. + * assume area is word aligned, and a mulitple of 16 bytes in length + * However I tried I could not get the compiler to generate this. + * It always wanted to to do ldmia 4 words from stack followed by + * stmia 4 words. This seems odd! + */ + __asm__ __volatile__ ( + "mov r4, %0 \n\t" + "mov r5, %1, lsr #4 \n\t" + "subs r5, r5, #1\n\t" + "mov r0, #0x80000000 \n\t" + "orr r0, r0, #0x00008000 \n\t" + "mov r1, r0 \n\t" + "mov r2, r0 \n\t" + "mov r3, r0 \n\t" + "1: \n\t" + "subs r5, r5, #1\n\t" + "stmia r4!, {r0, r1, r2, r3} \n\t" + "bne 1b \n\t" + : + : "r"(mem_base), "r"(len) + : "memory", "r0", "r1", "r2", "r3", "r4", "r5", "cc" ); +} diff --git a/recipes/mplayer/files/vo_pxa.h b/recipes/mplayer/files/vo_pxa.h new file mode 100644 index 0000000000..31cc1a7862 --- /dev/null +++ b/recipes/mplayer/files/vo_pxa.h @@ -0,0 +1,51 @@ +/* + * Video driver for PXA 27x Overlay 2, in conjunction with kernel driver + * by Tim Chick <tim (DOT) chick (AT) csr (DOT) com> + * (C) 2007 + */ + +#include <linux/fb.h> + +#include <sys/ioctl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <fcntl.h> + +typedef struct pxa_priv_s { + uint8_t *fb_mem_base; + uint16_t *osd_mem_base; + + int fd; + int base_fd; + int overlay_fd; + struct fb_var_screeninfo my_fb_var; + struct fb_fix_screeninfo my_fb_fix; + struct fb_var_screeninfo base_orig_fb_var; + struct fb_var_screeninfo osd_fb_var; + struct fb_fix_screeninfo osd_fb_fix; + int vm; + uint32_t format; + int src_width; + int src_height; + int width; + int height; + int rotate; + int osd_cleared; +} pxa_priv_t; + +typedef struct vo_pxa_pixel_data8 { + unsigned int a,b; +} Vo_Pxa_Pixel_Data8; + +#define UNUSED(v) ((void)(v)) + +/* Internal API */ +static int vo_pxa_query_format( uint32_t format ); +static void vo_pxa_copy_and_rotate( uint8_t *src, int stride, uint8_t *dst, int w, int h, int dst_stride ); +static void vo_pxa_draw_alpha( int x, int y, int w, int h, unsigned char *src, + unsigned char *srca, int stride ); +static void vo_pxa_draw_alpha_with_rotate( int x, int y, int w, int h, unsigned char *src, + unsigned char *srca, int stride ); + +static void vo_pxa_clear_osd( uint16_t *mem_base, int len ); diff --git a/recipes/mplayer/files/vo_w100.c b/recipes/mplayer/files/vo_w100.c new file mode 100644 index 0000000000..702707c656 --- /dev/null +++ b/recipes/mplayer/files/vo_w100.c @@ -0,0 +1,947 @@ +/* + * Video driver for ATI Imageon 100 (w100) + * by AGAWA Koji <i (AT) atty (DOT) jp> + * (C) 2004 + */ +/* English in this source code is written by machine translation. + Meaning also not leading, permitting. */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "config.h" +#include "video_out.h" +#include "video_out_internal.h" +#include "sub.h" +#include "aspect.h" +#include "mp_msg.h" +#include "subopt-helper.h" + +#include "vo_w100_api.h" +#include "vo_w100_fb.h" + +#define UNUSED(v) ((void)(v)) + +static vo_info_t info = { + "ATI Imageon 100", + "w100", + "AGAWA Koji <i (AT) atty (DOT) jp>", + "for Sharp Linux Zaurus SL-C700/750/760/860" +}; + +LIBVO_EXTERN(w100); + +// ---------------------------------------------------------------- +#define MAX_FRAMES 20 +typedef struct vidix_yuv_s +{ + unsigned y,u,v; +}vidix_yuv_t; + +typedef struct vidix_rect_s +{ + unsigned x,y,w,h; /* in pixels */ + vidix_yuv_t pitch; /* line-align in bytes */ +}vidix_rect_t; + +typedef struct w100_yuv_planes_s { + uint8_t *y; + uint8_t *u; + uint8_t *v; +} w100_yuv_planes_t; + +typedef struct w100_priv_s { + uint32_t format; + int src_width; + int src_height; + int nframes; /* total num of frames */ + int current_frame; /* current frame to display */ + int rotate; + int current_rotate; + + /* w100 info */ + int vram_size[2]; /* */ + void *vram_addr[2]; /* address */ + w100_yuv_planes_t frame_addrs[MAX_FRAMES]; + w100_yuv_planes_t frame_offsets[MAX_FRAMES]; + int is_graphic_window_enabled; + int eq_brightness; /* for mplayer */ + int display_brightness; /* for w100 */ + + /* overlay info */ + uint16_t overlay_handle; + ATI_OVERLAYPROP overlay_prop; + int overlay_pos_x; + int overlay_pos_y; + int overlay_expand_h; + int overlay_expand_v; + int overlay_pitch_y; + int overlay_pitch_u; + int overlay_pitch_v; + video_y_offset_u video_y_offset; + video_u_offset_u video_u_offset; + video_v_offset_u video_v_offset; +} w100_priv_t; + +static w100_priv_t st_w100_priv; +static vidix_yuv_t dstrides; + +static int test_rotate(int *arg) +{ + if ((*arg < -1) || (*arg > 3)) + return 0; + return 1; +} + +static opt_t subopts[] = { + { "rotate", OPT_ARG_INT, &st_w100_priv.rotate, (opt_test_f)test_rotate }, + { NULL } +}; + +static void draw_alpha(int x0,int y0, int w,int h, + unsigned char* src, unsigned char *srca, int stride) +{ + w100_priv_t *priv = &st_w100_priv; + uint8_t *psrc, *psrca, *pdst; + pdst = priv->frame_addrs[priv->current_frame].y; + pdst += (x0 * priv->overlay_prop.SrcPitch) + (priv->overlay_prop.SrcPitch - 1 - y0); + psrc = src; + psrca = srca; + while (h--) { + int j; + for (j = 0; j < w; ++j) { + if (psrca[j]) + pdst[j * priv->overlay_prop.SrcPitch] = + ((pdst[j * priv->overlay_prop.SrcPitch] * psrca[j]) >> 8) + psrc[j]; + } + psrc += stride; + psrca += stride; + pdst -= 1; + } +#if 0 + w100_priv_t *priv = &st_w100_priv; + uint32_t apitch, bespitch; + void *lvo_mem; + lvo_mem = priv->frame_addrs[priv->current_frame].y; + apitch = priv->overlay_pitch_y - 1; + switch (priv->format) { + case IMGFMT_YV12: + case IMGFMT_IYUV: + case IMGFMT_I420: + case IMGFMT_YVU9: + case IMGFMT_IF09: + case IMGFMT_Y8: + case IMGFMT_Y800: + bespitch = (priv->src_width + apitch) & (~apitch); + vo_draw_alpha_yv12(w,h,src,srca,stride,lvo_mem+bespitch*y0+x0,bespitch); + break; + case IMGFMT_YUY2: + bespitch = (priv->src_width*2 + apitch) & (~apitch); + vo_draw_alpha_yuy2(w,h,src,srca,stride,lvo_mem+bespitch*y0+2*x0,bespitch); + break; + case IMGFMT_UYVY: + bespitch = (priv->src_width*2 + apitch) & (~apitch); + vo_draw_alpha_yuy2(w,h,src,srca,stride,lvo_mem+bespitch*y0+2*x0+1,bespitch); + break; + case IMGFMT_RGB32: + case IMGFMT_BGR32: + bespitch = (priv->src_width*4 + apitch) & (~apitch); + vo_draw_alpha_rgb32(w,h,src,srca,stride,lvo_mem+y0*bespitch+4*x0,bespitch); + break; + case IMGFMT_RGB24: + case IMGFMT_BGR24: + bespitch = (priv->src_width*3 + apitch) & (~apitch); + vo_draw_alpha_rgb24(w,h,src,srca,stride,lvo_mem+y0*bespitch+3*x0,bespitch); + break; + case IMGFMT_RGB16: + case IMGFMT_BGR16: + bespitch = (priv->src_width*2 + apitch) & (~apitch); + vo_draw_alpha_rgb16(w,h,src,srca,stride,lvo_mem+y0*bespitch+2*x0,bespitch); + break; + case IMGFMT_RGB15: + case IMGFMT_BGR15: + bespitch = (priv->src_width*2 + apitch) & (~apitch); + vo_draw_alpha_rgb15(w,h,src,srca,stride,lvo_mem+y0*bespitch+2*x0,bespitch); + break; + default: + return; + } +#endif +} + +static uint32_t w100_draw_slice_420(uint8_t *image[], int stride[], + int w, int h, int x, int y) +{ + w100_priv_t *priv = &st_w100_priv; + uint8_t *src; + uint8_t *dest; + int i; + + /* Plane Y */ + dest = priv->frame_addrs[priv->current_frame].y; + dest += dstrides.y * y + x; + src = image[0]; + for (i = 0; i < h; ++i) { + memcpy(dest, src, w); + src += stride[0]; + dest += dstrides.y; + } + + /* Plane V */ + dest = priv->frame_addrs[priv->current_frame].u; + dest += dstrides.v * y / 4 + x; + src = image[1]; + for (i = 0; i < h / 2; ++i) { + memcpy(dest, src, w / 2); + src += stride[1]; + dest += dstrides.v / 2; + } + + /* Plane U */ + dest = priv->frame_addrs[priv->current_frame].v; + dest += dstrides.u * y / 4 + x; + src = image[2]; + for (i = 0; i < h / 2; ++i) { + memcpy(dest, src, w / 2); + src += stride[2]; + dest += dstrides.u / 2; + } + + return 0; +} + +/* + w must be multiple of 8 + */ +static uint32_t w100_draw_slice_420_rotate3(uint8_t *image[], int stride[], + int w, int h, int x, int y) +{ + w100_priv_t *priv = &st_w100_priv; + void *src, *dest; + int i, dpitch2, h_; + + h_ = h; + + for (i = 0; i < 3; ++i) { + src = image[i]; + switch (i) { + case 0: + dest = priv->frame_addrs[priv->current_frame].y; + dest += dstrides.y * x + dstrides.y - y; + dpitch2 = dstrides.y << 1; + break; + case 1: + dest = priv->frame_addrs[priv->current_frame].u; + dest += (dstrides.y >> 1) * (x >> 1) + (dstrides.y >> 1) - (y >> 1); + dpitch2 = dstrides.y; + h = h_ >> 1; + w >>= 1; + break; + case 2: + dest = priv->frame_addrs[priv->current_frame].v; + dest += (dstrides.y >> 1) * (x >> 1) + (dstrides.y >> 1) - (y >> 1); + h = h_ >> 1; + dpitch2 = dstrides.y; + break; + } + + __asm__ __volatile__ ( + "1: \n\t" + "mov r8, %[w] \n\t" + "sub %[dest], %[dest], #1 \n\t" + "mov r4, %[dest] \n\t" + "add r5, %[dest], %[dpitch2], lsr #1 \n\t" + + "2: \n\t" + "ldrb r0, [%[src]] \n\t" + "ldrb r1, [%[src], #1] \n\t" + "add %[src], %[src], #2 \n\t" + "strb r0, [r4] \n\t" + "strb r1, [r5] \n\t" + "add r4, r4, %[dpitch2] \n\t" + "add r5, r5, %[dpitch2] \n\t" + "ldrb r0, [%[src]] \n\t" + "ldrb r1, [%[src], #1] \n\t" + "add %[src], %[src], #2 \n\t" + "strb r0, [r4] \n\t" + "strb r1, [r5] \n\t" + "add r4, r4, %[dpitch2] \n\t" + "add r5, r5, %[dpitch2] \n\t" + "subs r8, r8, #4 \n\t" + "bne 2b \n\t" + + "add %[src], %[src], %[srcdiff] \n\t" + "subs %[h], %[h], #1 \n\t" + "bne 1b \n\t" + : [src]"+r"(src), [dest]"+r"(dest), [h]"+r"(h) + : [dpitch2]"r"(dpitch2), [w]"r"(w), [srcdiff]"r"(stride[i] - w) + : "memory", "r0", "r1", "r4", "r5", "r8"); + } +} + +static uint32_t w100_draw_slice_packed(uint8_t *image[], int stride[], + int w, int h, int x, int y) +{ +#if 0 + uint8_t *src; + uint8_t *dest; + int i; + + dest = st_w100_mem + vidix_play.offsets[st_next_frame] + vidix_play.offset.y; + dest += dstrides.y * y + x; + src = image[0]; + for (i = 0; i < h; ++i) { + memcpy(dest, src, w * st_image_bpp); + src += stride[0]; + dest += dstrides.y; + } +#endif + return 0; +} + +static uint32_t w100_get_image(mp_image_t *mpi) +{ +#if 0 + mp_msg(MSGT_VO, MSGL_V, "vo_w100: w100_get_image called.\n"); + + if (mpi->type == MP_IMGTYPE_STATIC && st_num_frames > 1) + return VO_FALSE; + if (mpi->flags & MP_IMGFLAG_READABLE) + return VO_FALSE; /* slow video ram */ + if (((mpi->stride[0] == dstrides.y && + (!(mpi->flags & MP_IMGFLAG_PLANAR) || + (mpi->stride[1] == dstrides.u && mpi->stride[2]==dstrides.v))) + || (mpi->flags & (MP_IMGFLAG_ACCEPT_STRIDE | MP_IMGFLAG_ACCEPT_WIDTH))) && + (!(vidix_play.flags & VID_PLAY_INTERLEAVED_UV))) { + if (mpi->flags & MP_IMGFLAG_ACCEPT_WIDTH) { + // check if only width is enough to represent strides: + if (mpi->flags & MP_IMGFLAG_PLANAR) { + if ((dstrides.y >> 1) != dstrides.v || dstrides.v != dstrides.u) + return VO_FALSE; + } else { + if (dstrides.y % (mpi->bpp / 8)) + return VO_FALSE; + } + } + mpi->planes[0] = st_w100_mem + vidix_play.offsets[st_next_frame] + + vidix_play.offset.y; + mpi->width = mpi->stride[0] = dstrides.y; + if (mpi->flags & MP_IMGFLAG_PLANAR) { + mpi->planes[1] = st_w100_mem + vidix_play.offsets[st_next_frame] + + vidix_play.offset.v; + mpi->stride[1] = dstrides.v >> mpi->chroma_x_shift; + mpi->planes[2] = st_w100_mem + vidix_play.offsets[st_next_frame] + + vidix_play.offset.u; + mpi->stride[2] = dstrides.u >> mpi->chroma_x_shift; + } else + mpi->width /= mpi->bpp / 8; + mpi->flags |= MP_IMGFLAG_DIRECT; + return VO_TRUE; + } +#endif + return VO_FALSE; +} + +static void w100_set_yuv_addrs(w100_priv_t *priv, w100_yuv_planes_t *offsets) +{ + uint32_t val; + + priv->video_y_offset.f.y_offset = GetRealMemAddr((uint32_t)offsets->y); + priv->video_u_offset.f.u_offset = GetRealMemAddr((uint32_t)offsets->u); + priv->video_v_offset.f.v_offset = GetRealMemAddr((uint32_t)offsets->v); + AtiCore_WriteReg(mmVIDEO_Y_OFFSET, (uint32_t *)&priv->video_y_offset); + AtiCore_WriteReg(mmVIDEO_U_OFFSET, (uint32_t *)&priv->video_u_offset); + AtiCore_WriteReg(mmVIDEO_V_OFFSET, (uint32_t *)&priv->video_v_offset); + + val = 0x7B; + AtiCore_WriteReg(mmDISP_DB_BUF_CNTL, &val); +} + +static void w100_set_overlay_expand(w100_priv_t *priv, int exp_h, int exp_v) +{ + video_ctrl_u video_ctrl; + + priv->overlay_expand_h = exp_h; + priv->overlay_expand_v = exp_v; + + AtiCore_ReadReg(mmVIDEO_CTRL, (uint32_t *)&video_ctrl); + video_ctrl.f.video_hor_exp = exp_h; + video_ctrl.f.video_ver_exp = exp_v; + AtiCore_WriteReg(mmVIDEO_CTRL, (uint32_t *)&video_ctrl); +} + +static int w100_setup(w100_priv_t *priv) +{ + if (!AtiCore_AllocOverlay(&priv->overlay_handle)) { + mp_msg(MSGT_VO, MSGL_FATAL, + "vo_w100: AtiCore_AllocOverlay failed.\n"); + return 0; + } + if (!AtiCore_SetupOverlay(priv->overlay_handle, &priv->overlay_prop)) { + mp_msg(MSGT_VO, MSGL_FATAL, + "vo_w100: AtiCore_SetupOverlay failed.\n"); + return 0; + } + AtiCore_SetOverlayPos(priv->overlay_handle, + priv->overlay_pos_x, priv->overlay_pos_y); + AtiCore_SetOverlayOnOff(priv->overlay_handle, 1); + w100_set_yuv_addrs(priv, &priv->frame_offsets[priv->current_frame]); + w100_set_overlay_expand(priv, priv->overlay_expand_h, priv->overlay_expand_v); + AtiCore_SetDisplayBrightness(priv->display_brightness); + AtiCore_SetGraphicWindowOnOff(priv->is_graphic_window_enabled); + +/* graphic_ctrl_t gc; */ +/* AtiCore_ReadReg(mmGRAPHIC_CTRL, &gc); */ +/* gc.low_power_on = 0; */ +/* AtiCore_WriteReg(mmGRAPHIC_CTRL, &gc); */ + + return 1; +} + +static void *w100_offset2addr(uint32_t offset) +{ + void *addr; + AtiCore_SetupMemoryTransfer((uint32_t)offset, &addr); + AtiCore_TerminateMemoryTransfer(); + return addr; +} + +// ---------------------------------------------------------------- interfaces +/* + * Preinitializes driver (real INITIALIZATION) + * arg - currently it's vo_subdevice + * returns: zero on successful initialization, non-zero on error. + */ +static int preinit(const char *vo_subdevice) +{ + w100_priv_t *priv = &st_w100_priv; + + mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: preinit() was called\n"); + + if (!AtiCore_ProcessAttach()) + return -1; + + /* fill w100_priv_t information */ + memset(priv, 0, sizeof(*priv)); + priv->rotate = -1; + + if (subopt_parse(vo_subdevice, subopts) != 0) { + return -1; + } + + priv->is_graphic_window_enabled = 1; + priv->eq_brightness = 0; /* FIXME */ + + GetAvailableVideoMem(&priv->vram_size[INTERNAL_VRAM], + &priv->vram_size[EXTERNAL_VRAM]); + mp_msg(MSGT_VO, MSGL_V, "vo_w100: VRAM size %dKB/%dKB\n", + priv->vram_size[INTERNAL_VRAM] / 1024, + priv->vram_size[EXTERNAL_VRAM] / 1024); + + priv->vram_addr[INTERNAL_VRAM] = w100_offset2addr(VRAM_OFFSET_INTERNAL); + priv->vram_addr[EXTERNAL_VRAM] = w100_offset2addr(VRAM_OFFSET_EXTERNAL); + mp_msg(MSGT_VO, MSGL_V, "vo_w100: VRAM address 0x%08x/0x%08x\n", + priv->vram_addr[INTERNAL_VRAM], priv->vram_addr[EXTERNAL_VRAM]); + + lcd_background_color_u lbc; + lbc.f.lcd_bg_red = 0; + lbc.f.lcd_bg_green = 0; + lbc.f.lcd_bg_blue = 0; + AtiCore_WriteReg(mmLCD_BACKGROUND_COLOR, &lbc); + + return 0; +} + +/* + * Initialize (means CONFIGURE) the display driver. + * params: + * src_width,srcheight: image source size + * dst_width,dst_height: size of the requested window size, just a hint + * fullscreen: flag, 0=windowd 1=fullscreen, just a hint + * title: window title, if available + * format: fourcc of pixel format + * returns : zero on successful initialization, non-zero on error. + */ +static int config(uint32_t src_width, uint32_t src_height, + uint32_t dst_width, uint32_t dst_height, uint32_t flags, + char *title, uint32_t format) +{ + w100_priv_t *priv = &st_w100_priv; + int fs = flags & VOFLAG_FULLSCREEN; + int vm = flags & VOFLAG_MODESWITCHING; + int zoom = flags & VOFLAG_SWSCALE; + int y_pitch, uv_pitch; + int x_res = 480, y_res = 640; + uint32_t apitch; + int i; + uint32_t plane_flags = 0; + + mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: config() was called\n"); + mp_msg(MSGT_VO, MSGL_V, "vo_w100: src_width:%d, src_height:%d, dst_width:%d, dst_height:%d\n", + src_width, src_height, dst_width, dst_height); + + if (!query_format(format)) { + printf("vo_w100: unsupported fourcc for this w100 driver: %x (%s)\n", + format, vo_format_name(format)); + return -1; + } + priv->format = format; + + // rotate + if (priv->rotate < 0) { + if (src_width > src_height) { + priv->current_rotate = 3; + } else { + priv->current_rotate = 0; + } + } else + priv->current_rotate = priv->rotate; + if (priv->current_rotate != 0 && priv->current_rotate != 3) { + mp_msg(MSGT_VO, MSGL_FATAL, "vo_w100: Rotate %d not supported\n", priv->current_rotate); + return -1; + } + + if (priv->current_rotate == 1 || priv->current_rotate == 3) { + i = src_width; + src_width = src_height; + src_height = i; + } + + dst_width = src_width; + dst_height = src_height; + + if (fs) { + int arg[] = { 0, 0, 1, 1, 2, 2, 2, 2, 3 }; + int arg2[] = { 1, 2, 4, 8 }; + int hor_exp = x_res / src_width; + int ver_exp = y_res / (src_height - 32); + int expand; + mp_msg(MSGT_VO, MSGL_V, "vo_w100: hor_exp:%d, ver_exp:%d\n", + hor_exp, ver_exp); + if ((hor_exp > 0 && hor_exp <= 8 && arg[hor_exp] >= 0) && + (ver_exp > 0 && ver_exp <= 8 && arg[ver_exp] >= 0)) { + if (arg[hor_exp] > arg[ver_exp]) + expand = arg[ver_exp]; + else + expand = arg[hor_exp]; + } + priv->overlay_expand_h = priv->overlay_expand_v = expand; + dst_width *= arg2[expand]; + dst_height *= arg2[expand]; + if (dst_height > y_res) + dst_height = y_res; + } + + // ɽ¼¨Îΰè¤ò¥»¥ó¥¿¥ê¥ó¥° + priv->overlay_pos_x = (x_res - dst_width) / 2; + priv->overlay_pos_y = (y_res - dst_height) / 2; + + // Hardware scaling + geometry(&priv->overlay_pos_x, &priv->overlay_pos_y, + &dst_width, &dst_height, x_res, y_res); + mp_msg(MSGT_VO, MSGL_V, "vo_w100: overlay pos(%d, %d)\n", + priv->overlay_pos_x, priv->overlay_pos_y); + mp_msg(MSGT_VO, MSGL_V, "vo_w100: src size(%dx%d), dst size(%dx%d)\n", + src_width, src_height, dst_width, dst_height); + + /* select first frame */ + priv->current_frame = 0; + + priv->src_width = src_width; + priv->src_height = src_height; + priv->overlay_pitch_y = 16; + priv->overlay_pitch_u = 16; + priv->overlay_pitch_v = 16; + + switch (format) { + case IMGFMT_YV12: + case IMGFMT_IYUV: + case IMGFMT_I420: + case IMGFMT_YVU9: + case IMGFMT_IF09: + case IMGFMT_Y8: + case IMGFMT_Y800: + y_pitch = (src_width + 15) & ~15; + uv_pitch = ((src_width / 2) + 7) & ~7; + break; + default: + return -1; + } + + /* ¥µ¡¼¥Õ¥§¥¤¥¹¤¬Æâ¢VRAM¤Ë¼ý¤Þ¤é¤Ê¤¤¾ì¹ç¤Ï¡¢V-Plane¤ò³°ÉôVRAM¤ËÄɤ¤½Ð¤¹¡£ */ + if (y_pitch * src_height + uv_pitch * src_height > priv->vram_size[INTERNAL_VRAM]) + plane_flags = 4; + + if (vo_doublebuffering) { + if (y_pitch * src_height + uv_pitch * src_height * 2> priv->vram_size[INTERNAL_VRAM]) + plane_flags = 4; + } + + /* ³°ÉôVRAM¤Ë¥×¥ì¡¼¥ó¤òÃÖ¤¤¤¿¾ì¹ç¤Ï¡¢Graphic window¤òÀÚ¤é¤Ê¤¤¤È²èÁü¤¬Íð¤ì¤ë */ +/* priv->is_graphic_window_enabled = (plane_flags != 0) ? 0 : 1; */ + priv->is_graphic_window_enabled = 0; + + uint32_t p[2] = { + VRAM_OFFSET_INTERNAL, + VRAM_OFFSET_EXTERNAL + 640 * 480 * 2 + }; + i = 0; + while (i < MAX_FRAMES) { + int sel, j; + /* Y-plane */ + sel = plane_flags & 1 ? EXTERNAL_VRAM : INTERNAL_VRAM; + priv->frame_offsets[i].y = (void *)p[sel]; + priv->frame_addrs[i].y = w100_offset2addr(p[sel]); + p[sel] += y_pitch * src_height; + /* U-plane */ + sel = plane_flags & 2 ? EXTERNAL_VRAM : INTERNAL_VRAM; + priv->frame_offsets[i].u = (void *)p[sel]; + priv->frame_addrs[i].u = w100_offset2addr(p[sel]); + p[sel] += uv_pitch * (src_height / 2); + /* V-plane */ + sel = plane_flags & 4 ? EXTERNAL_VRAM : INTERNAL_VRAM; + priv->frame_offsets[i].v = (void *)p[sel]; + priv->frame_addrs[i].v = w100_offset2addr(p[sel]); + p[sel] += uv_pitch * (src_height / 2); + if ((p[INTERNAL_VRAM] - VRAM_OFFSET_INTERNAL >= priv->vram_size[INTERNAL_VRAM]) || + (p[EXTERNAL_VRAM] - VRAM_OFFSET_EXTERNAL >= priv->vram_size[EXTERNAL_VRAM])) + break; + mp_msg(MSGT_VO, MSGL_V, "vo_w100: frame_offsets[%d].y = 0x%08x\n", i, priv->frame_offsets[i].y); + mp_msg(MSGT_VO, MSGL_V, "vo_w100: frame_offsets[%d].u = 0x%08x\n", i, priv->frame_offsets[i].u); + mp_msg(MSGT_VO, MSGL_V, "vo_w100: frame_offsets[%d].v = 0x%08x\n", i, priv->frame_offsets[i].v); + ++i; + } + priv->nframes = i; + if (priv->nframes > MAX_FRAMES) + priv->nframes = MAX_FRAMES; + mp_msg(MSGT_VO, MSGL_V, "vo_w100: nframes = %d\n", priv->nframes); + + priv->overlay_prop.lpSrcBitmap = (void *)(priv->frame_offsets[0].y); + priv->overlay_prop.XCoord = 0; + priv->overlay_prop.YCoord = 0; + priv->overlay_prop.SrcPitch = y_pitch; + priv->overlay_prop.SrcHeight = src_height; + priv->overlay_prop.OverlayWidth = dst_width; + priv->overlay_prop.OverlayHeight = dst_height; + priv->overlay_prop.lpOverlayKey = 0; + priv->overlay_prop.OverlayFormat = OVLTYPE_YUV420; + + priv->display_brightness = 127; + + w100_set_yuv_addrs(priv, &priv->frame_offsets[0]); + + /* clear every frame */ + memset(priv->vram_addr[INTERNAL_VRAM], 0, priv->vram_size[INTERNAL_VRAM]); + memset(priv->vram_addr[EXTERNAL_VRAM] + 640 * 480 * 2, 0, + priv->vram_size[EXTERNAL_VRAM] - 640 * 480 * 2); + + switch (format) { + case IMGFMT_YV12: + case IMGFMT_I420: + case IMGFMT_IYUV: + case IMGFMT_YVU9: + case IMGFMT_IF09: + case IMGFMT_Y800: + case IMGFMT_Y8: + apitch = priv->overlay_pitch_y - 1; + dstrides.y = (src_width + apitch) & ~apitch; + apitch = priv->overlay_pitch_v - 1; + dstrides.v = (src_width + apitch) & ~apitch; + apitch = priv->overlay_pitch_u - 1; + dstrides.u = (src_width + apitch) & ~apitch; +/* st_image_bpp = 1; */ + break; + case IMGFMT_RGB32: + case IMGFMT_BGR32: + apitch = priv->overlay_pitch_y - 1; + dstrides.y = (src_width * 4 + apitch) & ~apitch; + dstrides.u = dstrides.v = 0; +/* st_image_bpp = 4; */ + break; + case IMGFMT_RGB24: + case IMGFMT_BGR24: + apitch = priv->overlay_pitch_y - 1; + dstrides.y = (src_width * 3 + apitch) & ~apitch; + dstrides.u = dstrides.v = 0; +/* st_image_bpp = 3; */ + break; + default: + apitch = priv->overlay_pitch_y - 1; + dstrides.y = (src_width * 2 + apitch) & ~apitch; + dstrides.u = dstrides.v = 0; +/* st_image_bpp = 2; */ + break; + } + + if (format == IMGFMT_YV12 || format == IMGFMT_I420 || format == IMGFMT_IYUV) { + switch (priv->current_rotate) { + case 0: + video_out_w100.draw_slice = w100_draw_slice_420; + break; + case 1: + break; + case 2: + break; + case 3: + video_out_w100.draw_slice = w100_draw_slice_420_rotate3; + break; + default: + video_out_w100.draw_slice = w100_draw_slice_420; + break; + } + } + /* else if (format == IMGFMT_YVU9 || format == IMGFMT_IF09) */ + /* vo_server->draw_slice = w100_draw_slice_410; */ + else + video_out_w100.draw_slice = w100_draw_slice_packed; + + if (!w100_setup(priv)) + return -1; + + return 0; +} + +/* + * Control interface + */ +static int control(uint32_t request, void *data, ...) +{ + w100_priv_t *priv = &st_w100_priv; + switch (request) { + case VOCTRL_GET_IMAGE: + return w100_get_image(data); + case VOCTRL_QUERY_FORMAT: + return query_format(*((uint32_t *)data)); + case VOCTRL_SET_EQUALIZER: + { + va_list ap; + int value; + + va_start(ap, data); + value = va_arg(ap, int); + va_end(ap); + + if (!strcasecmp(data, "brightness")) { + int br; + priv->eq_brightness = value * 10; + br = (priv->eq_brightness + 1000) * 127 / 2000; + if (br < 0) + br = 0; + if (br > 127) + br = 127; + if (br > 64) + br -= 64; + else + br += 64; + priv->display_brightness = br; + + mp_msg(MSGT_VO, MSGL_V, + "vo_w100: control(VOCTRL_SET_EQUALIZER) %d %d\n", + value, br); + + if (AtiCore_SetDisplayBrightness(priv->display_brightness)) + return VO_TRUE; + else + return VO_FALSE; + } + } + case VOCTRL_GET_EQUALIZER: + { + va_list ap; + int *value; + + va_start(ap, data); + value = va_arg(ap, int*); + va_end(ap); + + if (!strcasecmp(data, "brightness")) { + *value = priv->eq_brightness; + return VO_TRUE; + } else + return VO_FALSE; + } + } + + return VO_NOTIMPL; +} + +/* + * Display a new RGB/BGR frame of the video to the screen. + * params: + * src[0] - pointer to the image + */ +int draw_frame(uint8_t *src[]) +{ + mp_msg(MSGT_VO, MSGL_V, "vo_w100: dummy draw_frame() was called\n"); + return -1; +} + +/* + * Draw a planar YUV slice to the buffer: + * params: + * src[3] = source image planes (Y,U,V) + * stride[3] = source image planes line widths (in bytes) + * w,h = width*height of area to be copied (in Y pixels) + * x,y = position at the destination image (in Y pixels) + */ +int draw_slice(uint8_t *src[], int stride[], int w,int h, int x,int y) +{ + mp_msg(MSGT_VO, MSGL_V, "vo_w100: dummy draw_slice() was called\n"); + return -1; +} + + +/* + * Draws OSD to the screen buffer + */ +static void draw_osd(void) +{ + mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: draw_osd() was called\n"); + vo_draw_text(st_w100_priv.src_height, st_w100_priv.src_width, draw_alpha); +} + +/* + * Blit/Flip buffer to the screen. Must be called after each frame! + */ +void flip_page(void) +{ + w100_priv_t *priv = &st_w100_priv; + + mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: flip_page() was called\n"); + if (vo_doublebuffering) { + w100_set_yuv_addrs(priv, &priv->frame_offsets[priv->current_frame]); + priv->current_frame = (priv->current_frame + 1) % priv->nframes; + } +} + +/* + * This func is called after every frames to handle keyboard and + * other events. It's called in PAUSE mode too! + */ +extern int g_sigcont; +void check_events(void) +{ + w100_priv_t *priv = &st_w100_priv; + + if (g_sigcont) { + mp_msg(MSGT_VO, MSGL_INFO, "vo_w100: SIGCONT recived.\n"); + + /* Immediately after resuming, because kernel modifies the register, it + waits for that. */ + usleep(1000 * 1000); + + /* re-attach */ +#if 0 + /* Hmm... With respect to of context is necessary, but really it + fails. It does not release and also there is no problem. */ + if (!AtiCore_ReleaseOverlay(priv->overlay_handle)) { + mp_msg(MSGT_VO, MSGL_FATAL, + "vo_w100: AtiCore_ReleaseOverlay failed.\n"); + exit_player(NULL); + } +#endif + if (!AtiCore_ProcessDetach()) { + mp_msg(MSGT_VO, MSGL_FATAL, + "vo_w100: AtiCore_ProcessDetach failed.\n"); + exit_player(NULL); + } + if (!AtiCore_ProcessAttach()) { + mp_msg(MSGT_VO, MSGL_FATAL, + "vo_w100: AtiCore_ProcessAttach failed.\n"); + exit_player(NULL); + } + + /* re-setup */ + if (!w100_setup(priv)) + exit_player(NULL); + + g_sigcont = 0; + } +} + +/* + * Closes driver. Should restore the original state of the system. + */ +static void uninit(void) +{ + mp_msg(MSGT_VO, MSGL_V, "vo_w100: uninit() was called\n"); + + AtiCore_SetOverlayOnOff(st_w100_priv.overlay_handle, 0); + AtiCore_ReleaseOverlay(st_w100_priv.overlay_handle); + AtiCore_SetGraphicWindowOnOff(1); + AtiCore_ProcessDetach(); +} + + +// ---------------------------------------------------------------- +static int query_format(uint32_t format) +{ + mp_msg(MSGT_VO, MSGL_V, "vo_w100: query_format was called: %x (%s)\n", + format, vo_format_name(format)); + + if (IMGFMT_IS_RGB(format)) { + /* RGB/BGR Formats */ + // TODO + return 0; + + switch (IMGFMT_RGB_DEPTH(format)) { + case 16: + return VFCAP_CSP_SUPPORTED | VFCAP_CSP_SUPPORTED_BY_HW + | VFCAP_HWSCALE_UP | VFCAP_HWSCALE_DOWN | VFCAP_OSD | VFCAP_ACCEPT_STRIDE; + break; + } + } else { + /* Planar YUV Formats */ + switch (format) { + case IMGFMT_YV12: + case IMGFMT_IYUV: + case IMGFMT_I420: + case IMGFMT_YVU9: + case IMGFMT_IF09: + case IMGFMT_Y8: + case IMGFMT_Y800: + return VFCAP_CSP_SUPPORTED | VFCAP_CSP_SUPPORTED_BY_HW + | VFCAP_HWSCALE_UP | VFCAP_HWSCALE_DOWN | VFCAP_OSD | VFCAP_ACCEPT_STRIDE; + break; + } + } + + return 0; +} + +static void dump_vo_info(void) +{ + mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: ================================\n"); + mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: vo_flags:%x\n", vo_flags); + mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: vo_depthonscreen:%d\n", vo_depthonscreen); + mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: vo_screenwidth:%d\n", vo_screenwidth); + mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: vo_screenheight:%d\n", vo_screenheight); + mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: vo_dx:%d\n", vo_dx); + mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: vo_dy:%d\n", vo_dy); + mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: vo_dwidth:%d\n", vo_dwidth); + mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: vo_dheight:%d\n", vo_dheight); + mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: vo_dbpp:%d\n", vo_dbpp); + mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: vo_grabpointer:%d\n", vo_grabpointer); + mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: vo_doublebuffering:%d\n", vo_doublebuffering); + mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: vo_directrendering:%d\n", vo_directrendering); + mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: vo_vsync:%d\n", vo_vsync); + mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: vo_fs:%d\n", vo_fs); + mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: vo_fsmode:%d\n", vo_fsmode); + mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: vo_panscan:%f\n", vo_panscan); + mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: vo_adapter_num:%d\n", vo_adapter_num); + mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: vo_refresh_rate:%d\n", vo_refresh_rate); + mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: vo_gamma_brightness:%d\n", vo_gamma_brightness); + mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: vo_gamma_saturation:%d\n", vo_gamma_saturation); + mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: vo_gamma_contrast:%d\n", vo_gamma_contrast); + mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: vo_gamma_hue:%d\n", vo_gamma_hue); +/* mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: vo_gamma_red_intensity:%d\n", vo_gamma_red_intensity); */ +/* mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: vo_gamma_green_intensity:%d\n", vo_gamma_green_intensity); */ +/* mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: vo_gamma_blue_intensity:%d\n", vo_gamma_blue_intensity); */ +/* mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: vo_mouse_timer_const:%d\n", vo_mouse_timer_const); */ + mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: vo_nomouse_input:%d\n", vo_nomouse_input); + mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: vo_pts:%d\n", vo_pts); + mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: vo_fps:%f\n", vo_fps); + mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: vo_colorkey:%d\n", vo_colorkey); + mp_msg(MSGT_VO, MSGL_DBG2, "vo_w100: ================================\n"); +} diff --git a/recipes/mplayer/files/vo_w100_api.h b/recipes/mplayer/files/vo_w100_api.h new file mode 100644 index 0000000000..59cf58be26 --- /dev/null +++ b/recipes/mplayer/files/vo_w100_api.h @@ -0,0 +1,306 @@ +/* -*- mode: c++; tab-width: 4 -*- */ + +/* $Id$ */ + +/* + * Copyright (C) 2003-2004 AGAWA Koji <i (AT) atty (DOT) jp> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +/** + * @file w100api.h + * @brief + * + * + * + * @author AGAWA Koji + * @date $Date$ + * @version $Revision$ + */ + +#ifndef W100API_H_INCLUDED +#define W100API_H_INCLUDED + +#ifdef __cplusplus +# define EXTERN_C_BEGIN extern "C" { +# define EXTERN_C_END } +#else +# define EXTERN_C_BEGIN +# define EXTERN_C_END +#endif + +EXTERN_C_BEGIN + +#include <inttypes.h> + +typedef int8_t s8; +typedef int16_t s16; +typedef int32_t s32; +typedef uint8_t u8; +typedef uint16_t u16; +typedef uint32_t u32; + +#include "vo_w100_fb.h" + +// DP_GUI_MASTER_CNTL.GMC_Dst_DataType +// DP_DATATYPE.Dp_Dst_DataType +/* #define DSTTYPE_8BPP 2 // 8 bpp grey scale */ +/* #define DSTTYPE_16BPP_1555 3 //16 bpp aRGB 1555 */ +/* #define DSTTYPE_16BPP_444 5 //16 bpp aRGB 4444 */ +#define DSTTYPE_8BPP 1 + +// DP_GUI_MASTER_CNTL.GMC_Src_DataType +// DP_DATATYPE.Dp_Src_DataType +#define SRCTYPE_1BPP_OPA 0 //mono (expanded to frgd, bkgd) +#define SRCTYPE_1BPP_TRA 1 //mono (expanded to frgd, leave_alone) +#define SRCTYPE_EQU_DST 3 //color (same as DST) +#define SRCTYPE_SOLID_COLOR_BLT 4 //solid color for Blt (use frgd) +#define SRCTYPE_4BPP 5 //4 bpp +#define SRCTYPE_12BPP_PACKED 6 //12 bpp packed + +#define ROP3_SRCCOPY 0xCC + +#define OVLTYPE_YUV420 0x07 + +#define INTERNAL_VRAM 0 +#define EXTERNAL_VRAM 1 + +#define VRAM_OFFSET_INTERNAL 0x00000000 +#define VRAM_OFFSET_EXTERNAL 0x0F000000 + +typedef struct { + /* ½ç½ø¤È·¿¤¬Ì¤³ÎÄê */ + int16_t XCoord; + int16_t YCoord; +} ATI_POINT; + +typedef struct { + /* ½ç½ø¤È·¿¤¬Ì¤³ÎÄê */ + int16_t XCoord; + int16_t YCoord; + int16_t Width; + int16_t Height; +} ATI_RECT; + +typedef struct { + /* ½ç½ø¤È·¿¤¬Ì¤³ÎÄê */ + uint32_t Count; /* +0 */ + uint8_t ScaleXFactor; /* +4 ³ÎÄê */ + uint8_t ScaleYFactor; /* +5 ³ÎÄê */ + uint8_t BlendOn; /* +6 ³ÎÄê */ + uint8_t dummy1; +} ATI_STRETCH; /* 8bytes? */ + +typedef struct { + uint32_t *lpSrcBitmap; + uint16_t XCoord; /* +4 ³ÎÄê */ + uint16_t YCoord; /* +6 ³ÎÄê */ + uint16_t SrcPitch; /* +8 ³ÎÄê */ + uint16_t SrcHeight; /* +10 ³ÎÄê */ + uint16_t OverlayWidth; + uint16_t OverlayHeight; + uint16_t *lpOverlayKey; /* +16 ³ÎÄê */ + // uint16_t key[2] ¤Ø¤Î¥Ý¥¤¥ó¥¿ + uint8_t OverlayFormat; /* +20 ³ÎÄê */ + uint8_t dummy1; + uint16_t dummy2; +} ATI_OVERLAYPROP; /* 24bytes? */ + +typedef struct { + int HInvert; + int VInvert; +} ATI_EXTVIDEOPROP; + +typedef struct { + ATI_EXTVIDEOPROP ExtVideoProp; +} ATI_UNKNOWN1; + +typedef struct { + ATI_UNKNOWN1 u1; // ¤³¤Ã¤Á¡© + uint8_t HExpansion; /* +8 ³ÎÄê */ + uint8_t VExpansion; /* +9 ³ÎÄê */ + uint8_t RConversion; /* +12 ³ÎÄê */ +/* ATI_UNKNOWN1 x; */ +} ATI_EXTENDEDOVERLAYPROP; /* 16byte? */ + +/** + * ¥¢¥¯¥»¥é¥ì¡¼¥·¥ç¥ó¤ÎÍøÍѤò³«»Ï¤¹¤ë¡£ + * + * @return 1:success, 0:fail + */ +int AtiCore_ProcessAttach(void); +int AtiCore_ProcessAttachSpecialMode(uint32_t); + +/** + * ¥¢¥¯¥»¥é¥ì¡¼¥·¥ç¥ó¤ÎÍøÍѤò½ªÎ»¤¹¤ë¡£ + * + * @return *ÉÔÌÀ* + */ +int AtiCore_ProcessDetach(void); + +/** + * ¥µ¡¼¥Õ¥§¥¹¤ò¥Ó¥Ç¥ª¥á¥â¥ê¾å¤ËºîÀ®¤¹¤ë¡£ + * + * @arg handle (ÊÖÃÍ)¥µ¡¼¥Õ¥§¥¹¤Î¥Ï¥ó¥É¥ë + * @arg offset (ÊÖÃÍ)¥µ¡¼¥Õ¥§¥¹¤Î¥ª¥Õ¥»¥Ã¥È + * @arg size ¥µ¡¼¥Õ¥§¥¹¤Î¥µ¥¤¥º + * @arg direction (0:Äã°Ì¤«¤é¹â°Ì¤Ø, 1:¹â°Ì¤«¤éÄã°Ì¤Ø)¸þ¤«¤Ã¤Æ³ÎÊÝ + * @return 1:success, 0:fail + */ +int AtiCore_AllocateSurface(uint16_t *handle, uint32_t *offset, + uint32_t size, uint32_t direction); + +/** + * ¥µ¡¼¥Õ¥§¥¹¤òÇË´þ¤¹¤ë¡£ + * + * @arg handle ¥µ¡¼¥Õ¥§¥¹¤Î¥Ï¥ó¥É¥ë + * @return 1:success, 0:fail + */ +int AtiCore_DestroySurface(uint16_t handle); + +/*8 + * @param rop 8¥Ó¥Ã¥È¤Î¥Õ¥é¥°¤È»×¤ï¤ì¤ë + */ +int AtiCore_SetRopOperation(uint32_t rop); + +int AtiCore_SetDstType(uint32_t); +int AtiCore_SetSrcType(uint32_t); +int AtiCore_SetSrcClippingRect(ATI_CLIPRECT *cliprect); +int AtiCore_SetDstClippingRect(ATI_CLIPRECT *cliprect); +int AtiCore_SetSrcPitchOffset(int pitch, int offset); +int AtiCore_SetDstPitchOffset(int pitch, int offset); + +int AtiCore_BitBltFilpRotate(int blt090Rotate, + ATI_RECT *dstRect, ATI_RECT *srcRect); +int AtiCore_StretchBlt(ATI_STRETCH *option, + ATI_POINT *point, ATI_RECT *srcRect); + + +/** + * (BitBlt¤Ê¤É¤Î)½èÍý¤¬´°Î»¤¹¤ë¤Î¤òÂԤġ£ + * + * @param nsec ¥¦¥§¥¤¥È»þ´Ö(msec) + * @return 1:½èÍý¤¬´°Î»¤·¤¿, 0:½èÍý¤Ï¤Þ¤À½ª¤ï¤Ã¤Æ¤¤¤Ê¤¤ + */ +int AtiCore_WaitComplete(int msec); + +/** + * ¥ª¡¼¥Ð¥ì¥¤¤òºîÀ®¤¹¤ë¡£ + * + * @param handle (ÊÖÃÍ)¥ª¡¼¥Ð¥ì¥¤¤Î¥Ï¥ó¥É¥ë + * @return 1:success, 0:fail + */ +int AtiCore_AllocOverlay(uint16_t *handle); + +int AtiCore_ReleaseOverlay(uint16_t handle); + +/** + * @return 1:success, 0:fail + */ +int AtiCore_SetupOverlay(uint16_t handle, ATI_OVERLAYPROP *prop); + +int AtiCore_SetupOverlayExtended(uint16_t handle, ATI_EXTENDEDOVERLAYPROP *prop); + +/** + * @return 1:success, 0:fail + */ +int AtiCore_SetOverlayOnOff(uint16_t handle, int isEnable); + +int AtiCore_SetOverlayPos(uint16_t handle, uint16_t x, uint16_t y); + +int AtiCore_SetupMemoryTransfer(uint32_t offset, void **regdata); +int AtiCore_TerminateMemoryTransfer(void); + +int AtiCore_GetFrontBufferPitchOffset(uint32_t *pitch, uint32_t *offset); + +/** + * @return 1:success, 0:fail + */ +int AtiCore_SetDisplayBrightness(int brightness); + +/** + * @return 1:success, 0:fail + */ +int GetAvailableVideoMem(uint32_t *internal, uint32_t *external); + +/* + * 1 ; 0 + */ +int AtiCore_SetGraphicWindowOnOff(int ); + +int AtiCore_ReadReg(uint32_t reg, void *val); +int AtiCore_WriteReg(uint32_t reg, void *val); + +uint32_t GetRealMemAddr(uint32_t offset); + +int AtiCore_SetBkgColour(uint32_t); + +/* ================================================================ */ +/* from libqte.so.2.3.2 */ +/* +AtiCore_AlphaBlend +AtiCore_BitBlt +AtiCore_BrushType +AtiCore_CursorOnOff +AtiCore_DrawPixel +AtiCore_Flush +AtiCore_GammaCorrection +AtiCore_GetCRC +AtiCore_GetCursorPos +AtiCore_GetDeviceInfo +AtiCore_GetGPIO_Data +AtiCore_GetGraphicExtended +AtiCore_GetGraphicWindowPos +AtiCore_GetLargestVideoMemBlock +AtiCore_GetLastError +AtiCore_GetMultiCRC +AtiCore_GetOverlayPos +AtiCore_GetPitchOffsetProperty +AtiCore_Host +AtiCore_LoadCursorBitMap +AtiCore_PaintRect +AtiCore_PolyScanline +AtiCore_Polyline +AtiCore_ProcessAttachMinimal +AtiCore_ProcessAttachSpecialMode +AtiCore_ProcessDetachMinimal +AtiCore_ProcessDetachSpecialMode +AtiCore_ReadCfgReg +AtiCore_ScanlineShading +AtiCore_SetApertures +AtiCore_SetBkgColour +AtiCore_SetBytePixelOrder +AtiCore_SetCursorPos +AtiCore_SetDisplayParameters +AtiCore_SetDriverBehaviour +AtiCore_SetFrgColour +AtiCore_SetFrontBuffer +AtiCore_SetGPIO_Data +AtiCore_SetGraphicWindowPos +AtiCore_SetOverlayPosUsingGraphicWindowXY +AtiCore_SetPartialCursor +AtiCore_SetupGraphicExtended +AtiCore_SetupGraphicWindow +AtiCore_SetupPM4 +AtiCore_SmallText +AtiCore_SubmitPM4Packet +AtiCore_TransBitBlt +AtiCore_WriteCfgReg + */ + +EXTERN_C_END + +#endif /* W100API_H_INCLUDED */ diff --git a/recipes/mplayer/files/vo_w100_fb.h b/recipes/mplayer/files/vo_w100_fb.h new file mode 100644 index 0000000000..39318c645b --- /dev/null +++ b/recipes/mplayer/files/vo_w100_fb.h @@ -0,0 +1,4338 @@ +/* + * linux/drivers/video/w100fb.h + * + * Frame Buffer Device for ATI w100 (Wallaby) + * + * Copyright (C) 2002, ATI Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * ChangeLog: + * + */ + +#if !defined (_W100FB_H) +#define _W100FB_H + +/* Block CIF Start: */ +#define mmCHIP_ID 0x0000 +#define mmREVISION_ID 0x0004 +#define mmWRAP_BUF_A 0x0008 +#define mmWRAP_BUF_B 0x000C +#define mmWRAP_TOP_DIR 0x0010 +#define mmWRAP_START_DIR 0x0014 +#define mmCIF_CNTL 0x0018 +#define mmCFGREG_BASE 0x001C +#define mmCIF_IO 0x0020 +#define mmCIF_READ_DBG 0x0024 +#define mmCIF_WRITE_DBG 0x0028 +#define cfgIND_ADDR_A_0 0x0000 +#define cfgIND_ADDR_A_1 0x0001 +#define cfgIND_ADDR_A_2 0x0002 +#define cfgIND_DATA_A 0x0003 +#define cfgREG_BASE 0x0004 +#define cfgINTF_CNTL 0x0005 +#define cfgSTATUS 0x0006 +#define cfgCPU_DEFAULTS 0x0007 +#define cfgIND_ADDR_B_0 0x0008 +#define cfgIND_ADDR_B_1 0x0009 +#define cfgIND_ADDR_B_2 0x000A +#define cfgIND_DATA_B 0x000B +#define cfgPM4_RPTR 0x000C +#define cfgSCRATCH 0x000D +#define cfgPM4_WRPTR_0 0x000E +#define cfgPM4_WRPTR_1 0x000F +/* Block CIF End: */ + +/* Block CP Start: */ +#define mmCP_RB_CNTL 0x0210 +#define mmCP_RB_BASE 0x0214 +#define mmCP_RB_RPTR_ADDR 0x0218 +#define mmCP_RB_RPTR 0x021C +#define mmCP_RB_RPTR_WR 0x02F8 +#define mmCP_RB_WPTR 0x0220 +#define mmCP_IB_BASE 0x0228 +#define mmCP_IB_BUFSZ 0x022C +#define mmCP_CSQ_CNTL 0x0230 +#define mmCP_CSQ_APER_PRIMARY 0x0300 +#define mmCP_CSQ_APER_INDIRECT 0x0340 +#define mmCP_ME_CNTL 0x0240 +#define mmCP_ME_RAM_ADDR 0x0244 +#define mmCP_ME_RAM_RADDR 0x0248 +#define mmCP_ME_RAM_DATAH 0x024C +#define mmCP_ME_RAM_DATAL 0x0250 +#define mmCP_DEBUG 0x025C +#define mmSCRATCH_REG0 0x0260 +#define mmSCRATCH_REG1 0x0264 +#define mmSCRATCH_REG2 0x0268 +#define mmSCRATCH_REG3 0x026C +#define mmSCRATCH_REG4 0x0270 +#define mmSCRATCH_REG5 0x0274 +#define mmSCRATCH_UMSK 0x0280 +#define mmSCRATCH_ADDR 0x0284 +#define mmCP_CSQ_ADDR 0x02E4 +#define mmCP_CSQ_DATA 0x02E8 +#define mmCP_CSQ_STAT 0x02EC +#define mmCP_STAT 0x02F0 +#define mmGEN_INT_CNTL 0x0200 +#define mmGEN_INT_STATUS 0x0204 +/* Block CP End: */ + +/* Block DISPLAY Start: */ +#define mmLCD_FORMAT 0x0410 +#define mmGRAPHIC_CTRL 0x0414 +#define mmGRAPHIC_OFFSET 0x0418 +#define mmGRAPHIC_PITCH 0x041C +#define mmCRTC_TOTAL 0x0420 +#define mmACTIVE_H_DISP 0x0424 +#define mmACTIVE_V_DISP 0x0428 +#define mmGRAPHIC_H_DISP 0x042C +#define mmGRAPHIC_V_DISP 0x0430 +#define mmVIDEO_CTRL 0x0434 +#define mmGRAPHIC_KEY 0x0438 +#define mmVIDEO_Y_OFFSET 0x043C +#define mmVIDEO_Y_PITCH 0x0440 +#define mmVIDEO_U_OFFSET 0x0444 +#define mmVIDEO_U_PITCH 0x0448 +#define mmVIDEO_V_OFFSET 0x044C +#define mmVIDEO_V_PITCH 0x0450 +#define mmVIDEO_H_POS 0x0454 +#define mmVIDEO_V_POS 0x0458 +#define mmBRIGHTNESS_CNTL 0x045C +#define mmCURSOR1_OFFSET 0x0460 +#define mmCURSOR1_H_POS 0x0464 +#define mmCURSOR1_V_POS 0x0468 +#define mmCURSOR1_COLOR0 0x046C +#define mmCURSOR1_COLOR1 0x0470 +#define mmCURSOR2_OFFSET 0x0474 +#define mmCURSOR2_H_POS 0x0478 +#define mmCURSOR2_V_POS 0x047C +#define mmCURSOR2_COLOR0 0x0480 +#define mmCURSOR2_COLOR1 0x0484 +#define mmDISP_INT_CNTL 0x0488 +#define mmCRTC_SS 0x048C +#define mmCRTC_LS 0x0490 +#define mmCRTC_REV 0x0494 +#define mmCRTC_DCLK 0x049C +#define mmCRTC_GS 0x04A0 +#define mmCRTC_VPOS_GS 0x04A4 +#define mmCRTC_GCLK 0x04A8 +#define mmCRTC_GOE 0x04AC +#define mmCRTC_FRAME 0x04B0 +#define mmCRTC_FRAME_VPOS 0x04B4 +#define mmGPIO_DATA 0x04B8 +#define mmGPIO_CNTL1 0x04BC +#define mmGPIO_CNTL2 0x04C0 +#define mmLCDD_CNTL1 0x04C4 +#define mmLCDD_CNTL2 0x04C8 +#define mmGENLCD_CNTL1 0x04CC +#define mmGENLCD_CNTL2 0x04D0 +#define mmDISP_DEBUG 0x04D4 +#define mmDISP_DB_BUF_CNTL 0x04D8 +#define mmDISP_CRC_SIG 0x04DC +#define mmCRTC_DEFAULT_COUNT 0x04E0 +#define mmLCD_BACKGROUND_COLOR 0x04E4 +#define mmCRTC_PS2 0x04E8 +#define mmCRTC_PS2_VPOS 0x04EC +#define mmCRTC_PS1_ACTIVE 0x04F0 +#define mmCRTC_PS1_NACTIVE 0x04F4 +#define mmCRTC_GCLK_EXT 0x04F8 +#define mmCRTC_ALW 0x04FC +#define mmCRTC_ALW_VPOS 0x0500 +#define mmCRTC_PSK 0x0504 +#define mmCRTC_PSK_HPOS 0x0508 +#define mmCRTC_CV4_START 0x050C +#define mmCRTC_CV4_END 0x0510 +#define mmCRTC_CV4_HPOS 0x0514 +#define mmCRTC_ECK 0x051C +#define mmREFRESH_CNTL 0x0520 +#define mmGENLCD_CNTL3 0x0524 +#define mmGPIO_DATA2 0x0528 +#define mmGPIO_CNTL3 0x052C +#define mmGPIO_CNTL4 0x0530 +#define mmCHIP_STRAP 0x0534 +#define mmDISP_DEBUG2 0x0538 +#define mmDEBUG_BUS_CNTL 0x053C +#define mmGAMMA_VALUE1 0x0540 +#define mmGAMMA_VALUE2 0x0544 +#define mmGAMMA_SLOPE 0x0548 +#define mmGEN_STATUS 0x054C +#define mmHW_INT 0x0550 +/* Block DISPLAY End: */ + +/* Block GFX Start: */ +#define mmDST_OFFSET 0x1004 +#define mmDST_PITCH 0x1008 +#define mmDST_PITCH_OFFSET 0x102C +#define mmDST_X 0x101C +#define mmDST_Y 0x1020 +#define mmDST_X_Y 0x1194 +#define mmDST_Y_X 0x1038 +#define mmDST_WIDTH 0x100C +#define mmDST_HEIGHT 0x1010 +#define mmDST_WIDTH_HEIGHT 0x1198 +#define mmDST_HEIGHT_WIDTH 0x103C +#define mmDST_HEIGHT_WIDTH_8 0x118C +#define mmDST_HEIGHT_Y 0x11A0 +#define mmDST_WIDTH_X 0x1188 +#define mmDST_WIDTH_X_INCY 0x119C +#define mmDST_LINE_START 0x1090 +#define mmDST_LINE_END 0x1094 +#define mmBRUSH_OFFSET 0x108C +#define mmBRUSH_Y_X 0x1074 +#define mmDP_BRUSH_FRGD_CLR 0x107C +#define mmDP_BRUSH_BKGD_CLR 0x1078 +#define mmSRC2_OFFSET 0x1060 +#define mmSRC2_PITCH 0x1064 +#define mmSRC2_PITCH_OFFSET 0x1068 +#define mmSRC2_X 0x1050 +#define mmSRC2_Y 0x1054 +#define mmSRC2_X_Y 0x1058 +#define mmSRC2_WIDTH 0x1080 +#define mmSRC2_HEIGHT 0x1084 +#define mmSRC2_INC 0x1088 +#define mmSRC_OFFSET 0x11AC +#define mmSRC_PITCH 0x11B0 +#define mmSRC_PITCH_OFFSET 0x1028 +#define mmSRC_X 0x1014 +#define mmSRC_Y 0x1018 +#define mmSRC_X_Y 0x1190 +#define mmSRC_Y_X 0x1034 +#define mmSRC_WIDTH 0x1040 +#define mmSRC_HEIGHT 0x1044 +#define mmSRC_INC 0x1048 +#define mmHOST_DATA0 0x13C0 +#define mmHOST_DATA1 0x13C4 +#define mmHOST_DATA2 0x13C8 +#define mmHOST_DATA3 0x13CC +#define mmHOST_DATA4 0x13D0 +#define mmHOST_DATA5 0x13D4 +#define mmHOST_DATA6 0x13D8 +#define mmHOST_DATA7 0x13DC +#define mmHOST_DATA_LAST 0x13E0 +#define mmDP_SRC_FRGD_CLR 0x1240 +#define mmDP_SRC_BKGD_CLR 0x1244 +#define mmSC_LEFT 0x1140 +#define mmSC_RIGHT 0x1144 +#define mmSC_TOP 0x1148 +#define mmSC_BOTTOM 0x114C +#define mmSRC_SC_RIGHT 0x1154 +#define mmSRC_SC_BOTTOM 0x115C +#define mmDP_CNTL 0x11C8 +#define mmDP_CNTL_DST_DIR 0x11CC +#define mmDP_DATATYPE 0x12C4 +#define mmDP_MIX 0x12C8 +#define mmDP_WRITE_MSK 0x12CC +#define mmCLR_CMP_CLR_SRC 0x1234 +#define mmCLR_CMP_CLR_DST 0x1238 +#define mmCLR_CMP_CNTL 0x1230 +#define mmCLR_CMP_MSK 0x123C +#define mmDEFAULT_PITCH_OFFSET 0x10A0 +#define mmDEFAULT_SC_BOTTOM_RIGHT 0x10A8 +#define mmDEFAULT2_SC_BOTTOM_RIGHT 0x10AC +#define mmREF1_PITCH_OFFSET 0x10B8 +#define mmREF2_PITCH_OFFSET 0x10BC +#define mmREF3_PITCH_OFFSET 0x10C0 +#define mmREF4_PITCH_OFFSET 0x10C4 +#define mmREF5_PITCH_OFFSET 0x10C8 +#define mmREF6_PITCH_OFFSET 0x10CC +#define mmDP_GUI_MASTER_CNTL 0x106C +#define mmSC_TOP_LEFT 0x11BC +#define mmSC_BOTTOM_RIGHT 0x11C0 +#define mmSRC_SC_BOTTOM_RIGHT 0x11C4 +#define mmGLOBAL_ALPHA 0x1210 +#define mmFILTER_COEF 0x1214 +#define mmMVC_CNTL_START 0x11E0 +#define mmE2_ARITHMETIC_CNTL 0x1220 +#define mmDEBUG0 0x1280 +#define mmDEBUG1 0x1284 +#define mmDEBUG2 0x1288 +#define mmDEBUG3 0x128C +#define mmDEBUG4 0x1290 +#define mmDEBUG5 0x1294 +#define mmDEBUG6 0x1298 +#define mmDEBUG7 0x129C +#define mmDEBUG8 0x12A0 +#define mmDEBUG9 0x12A4 +#define mmDEBUG10 0x12A8 +#define mmDEBUG11 0x12AC +#define mmDEBUG12 0x12B0 +#define mmDEBUG13 0x12B4 +#define mmDEBUG14 0x12B8 +#define mmDEBUG15 0x12BC +#define mmENG_CNTL 0x13E8 +#define mmENG_PERF_CNT 0x13F0 +/* Block GFX End: */ + +/* Block IDCT Start: */ +#define mmIDCT_RUNS 0x0C00 +#define mmIDCT_LEVELS 0x0C04 +#define mmIDCT_CONTROL 0x0C3C +#define mmIDCT_AUTH_CONTROL 0x0C08 +#define mmIDCT_AUTH 0x0C0C +/* Block IDCT End: */ + +/* Block MC Start: */ +#define mmMEM_CNTL 0x0180 +#define mmMEM_ARB 0x0184 +#define mmMC_FB_LOCATION 0x0188 +#define mmMEM_EXT_CNTL 0x018C +#define mmMC_EXT_MEM_LOCATION 0x0190 +#define mmMEM_EXT_TIMING_CNTL 0x0194 +#define mmMEM_SDRAM_MODE_REG 0x0198 +#define mmMEM_IO_CNTL 0x019C +#define mmMC_DEBUG 0x01A0 +#define mmMC_BIST_CTRL 0x01A4 +#define mmMC_BIST_COLLAR_READ 0x01A8 +#define mmTC_MISMATCH 0x01AC +#define mmMC_PERF_MON_CNTL 0x01B0 +#define mmMC_PERF_COUNTERS 0x01B4 +/* Block MC End: */ + +/* Block RBBM Start: */ +#define mmWAIT_UNTIL 0x1400 +#define mmISYNC_CNTL 0x1404 +#define mmRBBM_GUICNTL 0x1408 +#define mmRBBM_STATUS 0x0140 +#define mmRBBM_STATUS_alt_1 0x140C +#define mmRBBM_CNTL 0x0144 +#define mmRBBM_SOFT_RESET 0x0148 +#define mmNQWAIT_UNTIL 0x0150 +#define mmRBBM_DEBUG 0x016C +#define mmRBBM_CMDFIFO_ADDR 0x0170 +#define mmRBBM_CMDFIFO_DATAL 0x0174 +#define mmRBBM_CMDFIFO_DATAH 0x0178 +#define mmRBBM_CMDFIFO_STAT 0x017C +/* Block RBBM End: */ + +/* Block CG Start: */ +#define mmCLK_PIN_CNTL 0x0080 +#define mmPLL_REF_FB_DIV 0x0084 +#define mmPLL_CNTL 0x0088 +#define mmSCLK_CNTL 0x008C +#define mmPCLK_CNTL 0x0090 +#define mmCLK_TEST_CNTL 0x0094 +#define mmPWRMGT_CNTL 0x0098 +#define mmPWRMGT_STATUS 0x009C +/* Block CG End: */ + +/* default value definitions */ +#define defCHIP_ID 0x00001002 +#define defREVISION_ID 0x00000000 +#define defWRAP_BUF_A 0x01000000 +#define defWRAP_BUF_B 0x01000000 +#define defWRAP_TOP_DIR 0x00000000 +#define defWRAP_START_DIR 0x00000000 +//#define defCIF_CNTL 0x00082900 +#define defCIF_CNTL 0x00182d00 //??updated by Tobey Z.for Sharp,Oct11,2002 +#define defCFGREG_BASE 0x00000000 +//#define defCIF_IO 0x000c0800 +#define defCIF_IO 0x000C0902 //??updated by Tobey Z.for Sharp,Oct11,2002 +#define defCIF_READ_DBG 0x00018223 +#define defCIF_WRITE_DBG 0x00002100 +#define defIND_ADDR_A_0 0x00000000 +#define defIND_ADDR_A_1 0x00000000 +#define defIND_ADDR_A_2 0x00000000 +#define defIND_DATA_A 0x00000000 +#define defREG_BASE 0x00000001 +#define defINTF_CNTL 0x00000011 +#define defSTATUS 0x00000000 +#define defCPU_DEFAULTS 0x00000006 +#define defIND_ADDR_B_0 0x00000000 +#define defIND_ADDR_B_1 0x00000000 +#define defIND_ADDR_B_2 0x00000000 +#define defIND_DATA_B 0x00000000 +#define defPM4_RPTR 0x00000000 +#define defSCRATCH 0x00000000 +#define defPM4_WRPTR_0 0x00000000 +#define defPM4_WRPTR_1 0x00000000 +#define defCP_RB_CNTL 0x00000000 +#define defCP_RB_BASE 0x00000000 +#define defCP_RB_RPTR_ADDR 0x00000000 +#define defCP_RB_RPTR 0x00000000 +#define defCP_RB_RPTR_WR 0x00000000 +#define defCP_RB_WPTR 0x00000000 +#define defCP_IB_BASE 0x00000000 +#define defCP_IB_BUFSZ 0x00000000 +#define defCP_CSQ_CNTL 0x00000000 +#define defCP_CSQ_APER_PRIMARY 0x00000000 +#define defCP_CSQ_APER_INDIRECT 0x00000000 +#define defCP_ME_CNTL 0x40000000 +#define defCP_ME_RAM_ADDR 0x00000000 +#define defCP_ME_RAM_RADDR 0x00000000 +#define defCP_ME_RAM_DATAH 0x00000000 +#define defCP_ME_RAM_DATAL 0x00000000 +#define defCP_DEBUG 0x00000000 +#define defSCRATCH_REG0 0x00000000 +#define defSCRATCH_REG1 0x00000000 +#define defSCRATCH_REG2 0x00000000 +#define defSCRATCH_REG3 0x00000000 +#define defSCRATCH_REG4 0x00000000 +#define defSCRATCH_REG5 0x00000000 +#define defSCRATCH_UMSK 0x00000000 +#define defSCRATCH_ADDR 0x00000000 +#define defCP_CSQ_ADDR 0x00000000 +#define defCP_CSQ_DATA 0x00000000 +#define defCP_CSQ_STAT 0x00000000 +#define defCP_STAT 0x00000000 +#define defGEN_INT_CNTL 0x00000000 +#define defGEN_INT_STATUS_rd 0x00080000 +#define defGEN_INT_STATUS_wr 0x00000000 +#define defLCD_FORMAT 0x00000000 +#define defGRAPHIC_CTRL 0x00000000 +#define defGRAPHIC_OFFSET 0x00000000 +#define defGRAPHIC_PITCH 0x00000000 +#define defCRTC_TOTAL 0x00000000 +#define defACTIVE_H_DISP 0x00000000 +#define defACTIVE_V_DISP 0x00000000 +#define defGRAPHIC_H_DISP 0x00000000 +#define defGRAPHIC_V_DISP 0x00000000 +#define defVIDEO_CTRL 0x00000000 +#define defGRAPHIC_KEY 0x00000000 +#define defVIDEO_Y_OFFSET 0x00000000 +#define defVIDEO_Y_PITCH 0x00000000 +#define defVIDEO_U_OFFSET 0x00000000 +#define defVIDEO_U_PITCH 0x00000000 +#define defVIDEO_V_OFFSET 0x00000000 +#define defVIDEO_V_PITCH 0x00000000 +#define defVIDEO_H_POS 0x00000000 +#define defVIDEO_V_POS 0x00000000 +#define defBRIGHTNESS_CNTL 0x00000000 +#define defCURSOR1_OFFSET 0x00000000 +#define defCURSOR1_H_POS 0x00000000 +#define defCURSOR1_V_POS 0x00000000 +#define defCURSOR1_COLOR0 0x00000000 +#define defCURSOR1_COLOR1 0x00000000 +#define defCURSOR2_OFFSET 0x00000000 +#define defCURSOR2_H_POS 0x00000000 +#define defCURSOR2_V_POS 0x00000000 +#define defCURSOR2_COLOR0 0x00000000 +#define defCURSOR2_COLOR1 0x00000000 +#define defDISP_INT_CNTL 0x00000000 +#define defCRTC_SS 0x00000000 +#define defCRTC_LS 0x00000000 +#define defCRTC_REV 0x00000000 +#define defCRTC_DCLK 0x00000000 +#define defCRTC_GS 0x00000000 +#define defCRTC_VPOS_GS 0x00000000 +#define defCRTC_GCLK 0x00000000 +#define defCRTC_GOE 0x00000000 +#define defCRTC_FRAME 0x00000000 +#define defCRTC_FRAME_VPOS 0x00000000 +#define defGPIO_DATA 0x00000000 +#define defGPIO_CNTL1 0xff00ff00 +#define defGPIO_CNTL2 0x00000000 +#define defLCDD_CNTL1 0x0000ffff +#define defLCDD_CNTL2 0x00000000 +#define defGENLCD_CNTL1 0x00aaa002 +#define defGENLCD_CNTL2 0x00000002 +#define defDISP_DEBUG 0x00000000 +#define defDISP_DB_BUF_CNTL_rd 0x00000000 +#define defDISP_DB_BUF_CNTL_wr 0x00000000 +#define defDISP_CRC_SIG 0x00000000 +#define defCRTC_DEFAULT_COUNT 0x00000000 +#define defLCD_BACKGROUND_COLOR 0x00000000 +#define defCRTC_PS2 0x00000000 +#define defCRTC_PS2_VPOS 0x00000000 +#define defCRTC_PS1_ACTIVE 0x00000000 +#define defCRTC_PS1_NACTIVE 0x00000000 +#define defCRTC_GCLK_EXT 0x00000000 +#define defCRTC_ALW 0x00000000 +#define defCRTC_ALW_VPOS 0x00000000 +#define defCRTC_PSK 0x00000000 +#define defCRTC_PSK_HPOS 0x00000000 +#define defCRTC_CV4_START 0x00000000 +#define defCRTC_CV4_END 0x00000000 +#define defCRTC_CV4_HPOS 0x00000000 +#define defCRTC_ECK 0x00000000 +#define defREFRESH_CNTL 0x00000000 +#define defGENLCD_CNTL3 0x000002aa +#define defGPIO_DATA2 0x00000000 +#define defGPIO_CNTL3 0x00000000 +#define defGPIO_CNTL4 0x00000000 +#define defCHIP_STRAP 0x00000000 +#define defDISP_DEBUG2 0x00000000 +#define defDEBUG_BUS_CNTL 0x00000000 +#define defGAMMA_VALUE1 0x00000000 +#define defGAMMA_VALUE2 0x00000000 +#define defGAMMA_SLOPE 0x00000000 +#define defGEN_STATUS 0x00000000 +#define defHW_INT 0x00000000 +#define defDST_OFFSET 0x00000000 +#define defDST_PITCH 0x00000000 +#define defDST_PITCH_OFFSET 0x00000000 +#define defDST_X 0x00000000 +#define defDST_Y 0x00000000 +#define defDST_X_Y 0x00000000 +#define defDST_Y_X 0x00000000 +#define defDST_WIDTH 0x00000000 +#define defDST_HEIGHT 0x00000000 +#define defDST_WIDTH_HEIGHT 0x00000000 +#define defDST_HEIGHT_WIDTH 0x00000000 +#define defDST_HEIGHT_WIDTH_8 0x00000000 +#define defDST_HEIGHT_Y 0x00000000 +#define defDST_WIDTH_X 0x00000000 +#define defDST_WIDTH_X_INCY 0x00000000 +#define defDST_LINE_START 0x00000000 +#define defDST_LINE_END 0x00000000 +#define defBRUSH_OFFSET 0x00000000 +#define defBRUSH_Y_X 0x00000000 +#define defDP_BRUSH_FRGD_CLR 0x00000000 +#define defDP_BRUSH_BKGD_CLR 0x00000000 +#define defSRC2_OFFSET 0x00000000 +#define defSRC2_PITCH 0x00000000 +#define defSRC2_PITCH_OFFSET 0x00000000 +#define defSRC2_X 0x00000000 +#define defSRC2_Y 0x00000000 +#define defSRC2_X_Y 0x00000000 +#define defSRC2_WIDTH 0x00000000 +#define defSRC2_HEIGHT 0x00000000 +#define defSRC2_INC 0x00000000 +#define defSRC_OFFSET 0x00000000 +#define defSRC_PITCH 0x00000000 +#define defSRC_PITCH_OFFSET 0x00000000 +#define defSRC_X 0x00000000 +#define defSRC_Y 0x00000000 +#define defSRC_X_Y 0x00000000 +#define defSRC_Y_X 0x00000000 +#define defSRC_WIDTH 0x00000000 +#define defSRC_HEIGHT 0x00000000 +#define defSRC_INC 0x00000000 +#define defHOST_DATA0 0x00000000 +#define defHOST_DATA1 0x00000000 +#define defHOST_DATA2 0x00000000 +#define defHOST_DATA3 0x00000000 +#define defHOST_DATA4 0x00000000 +#define defHOST_DATA5 0x00000000 +#define defHOST_DATA6 0x00000000 +#define defHOST_DATA7 0x00000000 +#define defHOST_DATA_LAST 0x00000000 +#define defDP_SRC_FRGD_CLR 0x00000000 +#define defDP_SRC_BKGD_CLR 0x00000000 +#define defSC_LEFT 0x00000000 +#define defSC_RIGHT 0x00000000 +#define defSC_TOP 0x00000000 +#define defSC_BOTTOM 0x00000000 +#define defSRC_SC_RIGHT 0x00000000 +#define defSRC_SC_BOTTOM 0x00000000 +#define defDP_CNTL 0x00000000 +#define defDP_CNTL_DST_DIR 0x00000000 +#define defDP_DATATYPE 0x00000000 +#define defDP_MIX 0x00000000 +#define defDP_WRITE_MSK 0x00000000 +#define defCLR_CMP_CLR_SRC 0x00000000 +#define defCLR_CMP_CLR_DST 0x00000000 +#define defCLR_CMP_CNTL 0x00000000 +#define defCLR_CMP_MSK 0x00000000 +#define defDEFAULT_PITCH_OFFSET 0x00000000 +#define defDEFAULT_SC_BOTTOM_RIGHT 0x00000000 +#define defDEFAULT2_SC_BOTTOM_RIGHT 0x00000000 +#define defREF1_PITCH_OFFSET 0x00000000 +#define defREF2_PITCH_OFFSET 0x00000000 +#define defREF3_PITCH_OFFSET 0x00000000 +#define defREF4_PITCH_OFFSET 0x00000000 +#define defREF5_PITCH_OFFSET 0x00000000 +#define defREF6_PITCH_OFFSET 0x00000000 +#define defDP_GUI_MASTER_CNTL 0x00000000 +#define defSC_TOP_LEFT 0x00000000 +#define defSC_BOTTOM_RIGHT 0x00000000 +#define defSRC_SC_BOTTOM_RIGHT 0x00000000 +#define defGLOBAL_ALPHA 0x00000000 +#define defFILTER_COEF 0x00000000 +#define defMVC_CNTL_START 0x00000000 +#define defE2_ARITHMETIC_CNTL 0x00000000 +#define defDEBUG0 0x00000000 +#define defDEBUG1 0x00000000 +#define defDEBUG2 0x00000000 +#define defDEBUG3 0x00000000 +#define defDEBUG4 0x00000000 +#define defDEBUG5 0x00000000 +#define defDEBUG6 0x00000000 +#define defDEBUG7 0x00000000 +#define defDEBUG8 0x00000000 +#define defDEBUG9 0x00000000 +#define defDEBUG10 0x00000000 +#define defDEBUG11 0x00000000 +#define defDEBUG12 0x00000000 +#define defDEBUG13 0x00000000 +#define defDEBUG14 0x00000000 +#define defDEBUG15 0x00000000 +#define defENG_CNTL 0x00000003 +#define defENG_PERF_CNT 0x00000000 +#define defIDCT_RUNS 0x00000000 +#define defIDCT_LEVELS 0x00000000 +#define defIDCT_CONTROL 0x00000000 +#define defIDCT_AUTH_CONTROL 0x00000000 +#define defIDCT_AUTH 0x00000000 +#define defMEM_CNTL 0x00000006 +#define defMEM_ARB 0x00000000 +#define defMC_FB_LOCATION 0x00ff0000 +#define defMEM_EXT_CNTL 0x00040010 +#define defMC_EXT_MEM_LOCATION 0x07ff0000 +#define defMEM_EXT_TIMING_CNTL 0x00140c73 +#define defMEM_SDRAM_MODE_REG 0x00050000 +#define defMEM_IO_CNTL 0x00ff00ff +#define defMC_DEBUG 0x00000000 +#define defMC_BIST_CTRL 0x00000000 +#define defMC_BIST_COLLAR_READ 0x00000000 +#define defTC_MISMATCH 0x00000000 +#define defMC_PERF_MON_CNTL 0x00000000 +#define defMC_PERF_COUNTERS 0x00000000 +#define defWAIT_UNTIL 0xc5cdcdcd +#define defISYNC_CNTL 0x00000000 +#define defRBBM_GUICNTL 0x00000000 +#define defRBBM_STATUS 0x81cdcd40 +#define defRBBM_CNTL 0x0000000f +#define defRBBM_SOFT_RESET 0x00000000 +#define defNQWAIT_UNTIL 0x00000001 +#define defRBBM_DEBUG 0x00000000 +#define defRBBM_CMDFIFO_ADDR 0x0000000d +#define defRBBM_CMDFIFO_DATAL 0xcdcdcdcd +#define defRBBM_CMDFIFO_DATAH 0x00000dcd +#define defRBBM_CMDFIFO_STAT 0x00000d0d +#define defCLK_PIN_CNTL 0x0000003f +#define defPLL_REF_FB_DIV 0x5a500000 +#define defPLL_CNTL 0x4b000203 +#define defSCLK_CNTL 0x00ff0300 +#define defPCLK_CNTL 0x00010000 +#define defCLK_TEST_CNTL 0x00000000 +#define defPWRMGT_CNTL 0x00000004 +#define defPWRMGT_STATUS 0x00000001 + +#define CFG_BASE_BOOT_DEFAULT 0x0 +#define CFG_BASE_VALUE 0x0 +#define REG_BASE_BOOT_DEFAULT 0x01 +#define REG_BASE_VALUE 0x10000 +#define MEM_INT_BASE_VALUE 0x100000 +#define MEM_INT_TOP_VALUE_W100 0x15ffff +#define MEM_EXT_BASE_VALUE 0x800000 +#define MEM_EXT_TOP_VALUE 0x9fffff +#define WRAP_BUF_BASE_VALUE 0x80000 +#define WRAP_BUF_TOP_VALUE 0xbffff + +//---------------------------------------------------------------------------- +// Registers Field Definitions + +// DP_GUI_MASTER_CNTL.GMC_Brush_DataType +// DP_DATATYPE.Brush_DataType +#define DP_BRUSH_8x8MONOOPA 0 //8x8 mono pattern (expanded to frgd, bkgd) +#define DP_BRUSH_8x8MONOTRA 1 //8x8 mono pattern (expanded to frgd, leave_alone) +#define DP_PEN_32x1MONOOPA 6 //32x1 mono pattern (expanded to frgd, bkgd) +#define DP_PEN_32x1MONOTRA 7 //32x1 mono pattern (expanded to frgd, leave_alone) +#define DP_BRUSH_8x8COLOR 10 //8x8 color pattern +#define DP_BRUSH_SOLIDCOLOR 13 //solid color pattern (frgd) +#define DP_BRUSH_NONE 15 //no brush used + +#define SIZE_BRUSH_8x8MONO 2 +#define SIZE_PEN_32x1MONO 1 +#define SIZE_BRUSH_8x8COLOR_8 16 +#define SIZE_BRUSH_8x8COLOR_16 32 +#define MAX_BRUSH_SIZE SIZE_BRUSH_8x8COLOR_16 + +// DP_GUI_MASTER_CNTL.GMC_Dst_DataType +// DP_DATATYPE.Dp_Dst_DataType +#define DP_DST_8BPP 2 // 8 bpp grey scale +#define DP_DST_16BPP_1555 3 //16 bpp aRGB 1555 +#define DP_DST_16BPP_444 5 //16 bpp aRGB 4444 + +// DP_GUI_MASTER_CNTL.GMC_Src_DataType +// DP_DATATYPE.Dp_Src_DataType +#define DP_SRC_1BPP_OPA 0 //mono (expanded to frgd, bkgd) +#define DP_SRC_1BPP_TRA 1 //mono (expanded to frgd, leave_alone) +#define DP_SRC_COLOR_SAME_AS_DST 3 //color (same as DST) +#define DP_SRC_SOLID_COLOR_BLT 4 //solid color for Blt (use frgd) +#define DP_SRC_4BPP 5 //4 bpp +#define DP_SRC_12BPP_PACKED 6 //12 bpp packed + +// DP_GUI_MASTER_CNTL.GMC_Byte_Pix_Order +// DP_DATATYPE.Dp_Byte_Pix_Order +#define DP_PIX_ORDER_MSB2LSB 0 //monochrome pixel order from MSBit to LSBit +#define DP_PIX_ORDER_LSB2MSB 1 //monochrome pixel order from LSBit to MSBit + +// DP_GUI_MASTER_CNTL.GMC_Dp_Src_Source +#define DP_SRC_MEM_LINEAR 1 //loaded from memory (linear trajectory) +#define DP_SRC_MEM_RECTANGULAR 2 //loaded from memory (rectangular trajectory) +#define DP_SRC_HOSTDATA_BIT 3 //loaded from hostdata (linear trajectory) +#define DP_SRC_HOSTDATA_BYTE 4 //loaded from hostdata (linear trajectory & byte-aligned) + +// DP_GUI_MASTER_CNTL.GMC_Dp_Op +#define DP_OP_ROP 0 +#define DP_OP_ARITHMETIC 1 + +// E2_ARITHMETIC_CNTL.opcode +#define E2_OPC_GLBALP_ADD_SRC2 0 +#define E2_OPC_GLBALP_SUB_SRC2 1 +#define E2_OPC_SRC1_ADD_SRC2 2 +#define E2_OPC_SRC1_SUB_SRC2 3 +#define E2_OPC_DST_SADDBLEND_SRC2 4 +#define E2_OPC_DST_CADDBLEND_SRC2 5 +#define E2_OPC_DST_CSUBBLEND_SRC2 6 +#define E2_OPC_LF_SRC2 7 +#define E2_OPC_SCALE_SRC2 8 +#define E2_OPC_STRETCH_SRC2 9 +#define E2_OPC_SRC1_4BPPCPYWEXP 10 +#define E2_OPC_MC1 11 +#define E2_OPC_MC2 12 +#define E2_OPC_MC1_IDCT 13 +#define E2_OPC_MC2_IDCT 14 +#define E2_OPC_IDCT_ONLY_IFRAME 15 + +// E2_ARITHMETIC_CNTL.clamp +#define E2_CLAMP_OFF 0 +#define E2_CLAMP_ON 1 + +// E2_ARITHMETIC_CNTL.rounding +#define E2_ROUNDING_TRUNCATE 0 +#define E2_ROUNDING_TO_INFINITY 1 + +// E2_ARITHMETIC_CNTL.srcblend +#define E2_SRCBLEND_GLOBALALPHA 0 +#define E2_SRCBLEND_ZERO 1 +#define E2_SRCBLEND_SRC2ALPHA 2 +#define E2_SRCBLEND_DSTALPHA 3 +#define E2_SRCBLEND_ALPHA1PLANE 4 + +// E2_ARITHMETIC_CNTL.destblend +#define E2_DSTBLEND_GLOBALALPHA 0 +#define E2_DSTBLEND_ZERO 1 +#define E2_DSTBLEND_SRC2ALPHA 2 +#define E2_DSTBLEND_DSTALPHA 3 +#define E2_DSTBLEND_ALPHA1PLANE 4 + +// LCD_FORMAT.lcd_type +#define LCDTYPE_TFT333 0 +#define LCDTYPE_TFT444 1 +#define LCDTYPE_TFT555 2 +#define LCDTYPE_TFT666 3 +#define LCDTYPE_COLSTNPACK4 4 +#define LCDTYPE_COLSTNPACK8F1 5 +#define LCDTYPE_COLSTNPACK8F2 6 +#define LCDTYPE_COLSTNPACK16 7 +#define LCDTYPE_MONSTNPACK4 8 +#define LCDTYPE_MONSTNPACK8 9 + +// CP_RB_CNTL.rb_bufsz +#define RB_SIZE_2K 8 +#define RB_SIZE_4K 9 +#define RB_SIZE_8K 10 +#define RB_SIZE_16K 11 +#define RB_SIZE_32K 12 +#define RB_SIZE_64K 13 + +// GRAPHIC_CTRL.color_depth +#define COLOR_DEPTH_1BPP 0 +#define COLOR_DEPTH_2BPP 1 +#define COLOR_DEPTH_4BPP 2 +#define COLOR_DEPTH_8BPP 3 +#define COLOR_DEPTH_332 4 +#define COLOR_DEPTH_A444 5 +#define COLOR_DEPTH_A555 6 + +// VIDEO_CTRL.video_mode +#define VIDEO_MODE_422 0 +#define VIDEO_MODE_420 1 + +/* data structure definitions */ + +typedef struct _chip_id_t { + unsigned long vendor_id : 16; + unsigned long device_id : 16; + } chip_id_t; + +typedef union { + unsigned long val : 32; + chip_id_t f; +} chip_id_u; + +typedef struct _revision_id_t { + unsigned long minor_rev_id : 4; + unsigned long major_rev_id : 4; + unsigned long : 24; + } revision_id_t; + +typedef union { + unsigned long val : 32; + revision_id_t f; +} revision_id_u; + +typedef struct _wrap_buf_a_t { + unsigned long offset_addr_a : 24; + unsigned long block_size_a : 3; + unsigned long : 5; + } wrap_buf_a_t; + +typedef union { + unsigned long val : 32; + wrap_buf_a_t f; +} wrap_buf_a_u; + +typedef struct _wrap_buf_b_t { + unsigned long offset_addr_b : 24; + unsigned long block_size_b : 3; + unsigned long : 5; + } wrap_buf_b_t; + +typedef union { + unsigned long val : 32; + wrap_buf_b_t f; +} wrap_buf_b_u; + +typedef struct _wrap_top_dir_t { + unsigned long top_addr : 23; + unsigned long : 9; + } wrap_top_dir_t; + +typedef union { + unsigned long val : 32; + wrap_top_dir_t f; +} wrap_top_dir_u; + +typedef struct _wrap_start_dir_t { + unsigned long start_addr : 23; + unsigned long : 9; + } wrap_start_dir_t; + +typedef union { + unsigned long val : 32; + wrap_start_dir_t f; +} wrap_start_dir_u; + +typedef struct _cif_cntl_t { + unsigned long swap_reg : 2; + unsigned long swap_fbuf_1 : 2; + unsigned long swap_fbuf_2 : 2; + unsigned long swap_fbuf_3 : 2; + unsigned long pmi_int_disable : 1; + unsigned long pmi_schmen_disable : 1; + unsigned long intb_oe : 1; + unsigned long en_wait_to_compensate_dq_prop_dly : 1; + unsigned long compensate_wait_rd_size : 2; + unsigned long wait_asserted_timeout_val : 2; + unsigned long wait_masked_val : 2; + unsigned long en_wait_timeout : 1; + unsigned long en_one_clk_setup_before_wait : 1; + unsigned long interrupt_active_high : 1; + unsigned long en_overwrite_straps : 1; + unsigned long strap_wait_active_hi : 1; + unsigned long lat_busy_count : 2; + unsigned long lat_rd_pm4_sclk_busy : 1; + unsigned long dis_system_bits : 1; + unsigned long dis_mr : 1; + unsigned long cif_spare_1 : 4; + } cif_cntl_t; + +typedef union { + unsigned long val : 32; + cif_cntl_t f; +} cif_cntl_u; + +typedef struct _cfgreg_base_t { + unsigned long cfgreg_base : 24; + unsigned long : 8; + } cfgreg_base_t; + +typedef union { + unsigned long val : 32; + cfgreg_base_t f; +} cfgreg_base_u; + +typedef struct _cif_io_t { + unsigned long dq_srp : 1; + unsigned long dq_srn : 1; + unsigned long dq_sp : 4; + unsigned long dq_sn : 4; + unsigned long waitb_srp : 1; + unsigned long waitb_srn : 1; + unsigned long waitb_sp : 4; + unsigned long waitb_sn : 4; + unsigned long intb_srp : 1; + unsigned long intb_srn : 1; + unsigned long intb_sp : 4; + unsigned long intb_sn : 4; + unsigned long : 2; + } cif_io_t; + +typedef union { + unsigned long val : 32; + cif_io_t f; +} cif_io_u; + +typedef struct _cif_read_dbg_t { + unsigned long unpacker_pre_fetch_trig_gen : 2; + unsigned long dly_second_rd_fetch_trig : 1; + unsigned long rst_rd_burst_id : 1; + unsigned long dis_rd_burst_id : 1; + unsigned long en_block_rd_when_packer_is_not_emp : 1; + unsigned long dis_pre_fetch_cntl_sm : 1; + unsigned long rbbm_chrncy_dis : 1; + unsigned long rbbm_rd_after_wr_lat : 2; + unsigned long dis_be_during_rd : 1; + unsigned long one_clk_invalidate_pulse : 1; + unsigned long dis_chnl_priority : 1; + unsigned long rst_read_path_a_pls : 1; + unsigned long rst_read_path_b_pls : 1; + unsigned long dis_reg_rd_fetch_trig : 1; + unsigned long dis_rd_fetch_trig_from_ind_addr : 1; + unsigned long dis_rd_same_byte_to_trig_fetch : 1; + unsigned long dis_dir_wrap : 1; + unsigned long dis_ring_buf_to_force_dec : 1; + unsigned long dis_addr_comp_in_16bit : 1; + unsigned long clr_w : 1; + unsigned long err_rd_tag_is_3 : 1; + unsigned long err_load_when_ful_a : 1; + unsigned long err_load_when_ful_b : 1; + unsigned long : 7; + } cif_read_dbg_t; + +typedef union { + unsigned long val : 32; + cif_read_dbg_t f; +} cif_read_dbg_u; + +typedef struct _cif_write_dbg_t { + unsigned long packer_timeout_count : 2; + unsigned long en_upper_load_cond : 1; + unsigned long en_chnl_change_cond : 1; + unsigned long dis_addr_comp_cond : 1; + unsigned long dis_load_same_byte_addr_cond : 1; + unsigned long dis_timeout_cond : 1; + unsigned long dis_timeout_during_rbbm : 1; + unsigned long dis_packer_ful_during_rbbm_timeout : 1; + unsigned long en_dword_split_to_rbbm : 1; + unsigned long en_dummy_val : 1; + unsigned long dummy_val_sel : 1; + unsigned long mask_pm4_wrptr_dec : 1; + unsigned long dis_mc_clean_cond : 1; + unsigned long err_two_reqi_during_ful : 1; + unsigned long err_reqi_during_idle_clk : 1; + unsigned long err_global : 1; + unsigned long en_wr_buf_dbg_load : 1; + unsigned long en_wr_buf_dbg_path : 1; + unsigned long sel_wr_buf_byte : 3; + unsigned long dis_rd_flush_wr : 1; + unsigned long dis_packer_ful_cond : 1; + unsigned long dis_invalidate_by_ops_chnl : 1; + unsigned long en_halt_when_reqi_err : 1; + unsigned long cif_spare_2 : 5; + unsigned long : 1; + } cif_write_dbg_t; + +typedef union { + unsigned long val : 32; + cif_write_dbg_t f; +} cif_write_dbg_u; + +typedef struct _ind_addr_a_0_t { + unsigned char ind_addr_a_0 : 8; + } ind_addr_a_0_t; + +typedef union { + unsigned char val : 8; + ind_addr_a_0_t f; +} ind_addr_a_0_u; + +typedef struct _ind_addr_a_1_t { + unsigned char ind_addr_a_1 : 8; + } ind_addr_a_1_t; + +typedef union { + unsigned char val : 8; + ind_addr_a_1_t f; +} ind_addr_a_1_u; + +typedef struct _ind_addr_a_2_t { + unsigned char ind_addr_a_2 : 8; + } ind_addr_a_2_t; + +typedef union { + unsigned char val : 8; + ind_addr_a_2_t f; +} ind_addr_a_2_u; + +typedef struct _ind_data_a_t { + unsigned char ind_data_a : 8; + } ind_data_a_t; + +typedef union { + unsigned char val : 8; + ind_data_a_t f; +} ind_data_a_u; + +typedef struct _reg_base_t { + unsigned char reg_base : 8; + } reg_base_t; + +typedef union { + unsigned char val : 8; + reg_base_t f; +} reg_base_u; + +typedef struct _intf_cntl_t { + unsigned char ad_inc_a : 1; + unsigned char ring_buf_a : 1; + unsigned char rd_fetch_trigger_a : 1; + unsigned char rd_data_rdy_a : 1; + unsigned char ad_inc_b : 1; + unsigned char ring_buf_b : 1; + unsigned char rd_fetch_trigger_b : 1; + unsigned char rd_data_rdy_b : 1; + } intf_cntl_t; + +typedef union { + unsigned char val : 8; + intf_cntl_t f; +} intf_cntl_u; + +typedef struct _status_t { + unsigned char wr_fifo_available_space : 2; + unsigned char fbuf_wr_pipe_emp : 1; + unsigned char soft_reset : 1; + unsigned char system_pwm_mode : 2; + unsigned char mem_access_dis : 1; + unsigned char en_pre_fetch : 1; + } status_t; + +typedef union { + unsigned char val : 8; + status_t f; +} status_u; + +typedef struct _cpu_defaults_t { + unsigned char unpack_rd_data : 1; + unsigned char access_ind_addr_a : 1; + unsigned char access_ind_addr_b : 1; + unsigned char access_scratch_reg : 1; + unsigned char pack_wr_data : 1; + unsigned char transition_size : 1; + unsigned char en_read_buf_mode : 1; + unsigned char rd_fetch_scratch : 1; + } cpu_defaults_t; + +typedef union { + unsigned char val : 8; + cpu_defaults_t f; +} cpu_defaults_u; + +typedef struct _ind_addr_b_0_t { + unsigned char ind_addr_b_0 : 8; + } ind_addr_b_0_t; + +typedef union { + unsigned char val : 8; + ind_addr_b_0_t f; +} ind_addr_b_0_u; + +typedef struct _ind_addr_b_1_t { + unsigned char ind_addr_b_1 : 8; + } ind_addr_b_1_t; + +typedef union { + unsigned char val : 8; + ind_addr_b_1_t f; +} ind_addr_b_1_u; + +typedef struct _ind_addr_b_2_t { + unsigned char ind_addr_b_2 : 8; + } ind_addr_b_2_t; + +typedef union { + unsigned char val : 8; + ind_addr_b_2_t f; +} ind_addr_b_2_u; + +typedef struct _ind_data_b_t { + unsigned char ind_data_b : 8; + } ind_data_b_t; + +typedef union { + unsigned char val : 8; + ind_data_b_t f; +} ind_data_b_u; + +typedef struct _pm4_rptr_t { + unsigned char pm4_rptr : 8; + } pm4_rptr_t; + +typedef union { + unsigned char val : 8; + pm4_rptr_t f; +} pm4_rptr_u; + +typedef struct _scratch_t { + unsigned char scratch : 8; + } scratch_t; + +typedef union { + unsigned char val : 8; + scratch_t f; +} scratch_u; + +typedef struct _pm4_wrptr_0_t { + unsigned char pm4_wrptr_0 : 8; + } pm4_wrptr_0_t; + +typedef union { + unsigned char val : 8; + pm4_wrptr_0_t f; +} pm4_wrptr_0_u; + +typedef struct _pm4_wrptr_1_t { + unsigned char pm4_wrptr_1 : 6; + unsigned char rd_fetch_pm4_rptr : 1; + unsigned char wrptr_atomic_update_w : 1; + } pm4_wrptr_1_t; + +typedef union { + unsigned char val : 8; + pm4_wrptr_1_t f; +} pm4_wrptr_1_u; + +typedef struct _cp_rb_cntl_t { + unsigned long rb_bufsz : 6; + unsigned long : 2; + unsigned long rb_blksz : 6; + unsigned long : 2; + unsigned long buf_swap : 2; + unsigned long max_fetch : 2; + unsigned long : 7; + unsigned long rb_no_update : 1; + unsigned long : 3; + unsigned long rb_rptr_wr_ena : 1; + } cp_rb_cntl_t; + +typedef union { + unsigned long val : 32; + cp_rb_cntl_t f; +} cp_rb_cntl_u; + +typedef struct _cp_rb_base_t { + unsigned long : 2; + unsigned long rb_base : 22; + unsigned long : 8; + } cp_rb_base_t; + +typedef union { + unsigned long val : 32; + cp_rb_base_t f; +} cp_rb_base_u; + +typedef struct _cp_rb_rptr_addr_t { + unsigned long rb_rptr_swap : 2; + unsigned long rb_rptr_addr : 22; + unsigned long : 8; + } cp_rb_rptr_addr_t; + +typedef union { + unsigned long val : 32; + cp_rb_rptr_addr_t f; +} cp_rb_rptr_addr_u; + +typedef struct _cp_rb_rptr_t { + unsigned long rb_rptr : 23; + unsigned long : 9; + } cp_rb_rptr_t; + +typedef union { + unsigned long val : 32; + cp_rb_rptr_t f; +} cp_rb_rptr_u; + +typedef struct _cp_rb_rptr_wr_t { + unsigned long rb_rptr_wr : 23; + unsigned long : 9; + } cp_rb_rptr_wr_t; + +typedef union { + unsigned long val : 32; + cp_rb_rptr_wr_t f; +} cp_rb_rptr_wr_u; + +typedef struct _cp_rb_wptr_t { + unsigned long rb_wptr : 23; + unsigned long : 9; + } cp_rb_wptr_t; + +typedef union { + unsigned long val : 32; + cp_rb_wptr_t f; +} cp_rb_wptr_u; + +typedef struct _cp_ib_base_t { + unsigned long : 2; + unsigned long ib_base : 22; + unsigned long : 8; + } cp_ib_base_t; + +typedef union { + unsigned long val : 32; + cp_ib_base_t f; +} cp_ib_base_u; + +typedef struct _cp_ib_bufsz_t { + unsigned long ib_bufsz : 23; + unsigned long : 9; + } cp_ib_bufsz_t; + +typedef union { + unsigned long val : 32; + cp_ib_bufsz_t f; +} cp_ib_bufsz_u; + +typedef struct _cp_csq_cntl_t { + unsigned long csq_cnt_primary : 8; + unsigned long csq_cnt_indirect : 8; + unsigned long : 12; + unsigned long csq_mode : 4; + } cp_csq_cntl_t; + +typedef union { + unsigned long val : 32; + cp_csq_cntl_t f; +} cp_csq_cntl_u; + +typedef struct _cp_csq_aper_primary_t { + unsigned long cp_csq_aper_primary : 32; + } cp_csq_aper_primary_t; + +typedef union { + unsigned long val : 32; + cp_csq_aper_primary_t f; +} cp_csq_aper_primary_u; + +typedef struct _cp_csq_aper_indirect_t { + unsigned long cp_csq_aper_indirect : 32; + } cp_csq_aper_indirect_t; + +typedef union { + unsigned long val : 32; + cp_csq_aper_indirect_t f; +} cp_csq_aper_indirect_u; + +typedef struct _cp_me_cntl_t { + unsigned long me_stat : 16; + unsigned long me_statmux : 5; + unsigned long : 8; + unsigned long me_busy : 1; + unsigned long me_mode : 1; + unsigned long me_step : 1; + } cp_me_cntl_t; + +typedef union { + unsigned long val : 32; + cp_me_cntl_t f; +} cp_me_cntl_u; + +typedef struct _cp_me_ram_addr_t { + unsigned long me_ram_addr : 8; + unsigned long : 24; + } cp_me_ram_addr_t; + +typedef union { + unsigned long val : 32; + cp_me_ram_addr_t f; +} cp_me_ram_addr_u; + +typedef struct _cp_me_ram_raddr_t { + unsigned long me_ram_raddr : 8; + unsigned long : 24; + } cp_me_ram_raddr_t; + +typedef union { + unsigned long val : 32; + cp_me_ram_raddr_t f; +} cp_me_ram_raddr_u; + +typedef struct _cp_me_ram_datah_t { + unsigned long me_ram_datah : 6; + unsigned long : 26; + } cp_me_ram_datah_t; + +typedef union { + unsigned long val : 32; + cp_me_ram_datah_t f; +} cp_me_ram_datah_u; + +typedef struct _cp_me_ram_datal_t { + unsigned long me_ram_datal : 32; + } cp_me_ram_datal_t; + +typedef union { + unsigned long val : 32; + cp_me_ram_datal_t f; +} cp_me_ram_datal_u; + +typedef struct _cp_debug_t { + unsigned long cp_debug : 32; + } cp_debug_t; + +typedef union { + unsigned long val : 32; + cp_debug_t f; +} cp_debug_u; + +typedef struct _scratch_reg0_t { + unsigned long scratch_reg0 : 32; + } scratch_reg0_t; + +typedef union { + unsigned long val : 32; + scratch_reg0_t f; +} scratch_reg0_u; + +typedef struct _scratch_reg1_t { + unsigned long scratch_reg1 : 32; + } scratch_reg1_t; + +typedef union { + unsigned long val : 32; + scratch_reg1_t f; +} scratch_reg1_u; + +typedef struct _scratch_reg2_t { + unsigned long scratch_reg2 : 32; + } scratch_reg2_t; + +typedef union { + unsigned long val : 32; + scratch_reg2_t f; +} scratch_reg2_u; + +typedef struct _scratch_reg3_t { + unsigned long scratch_reg3 : 32; + } scratch_reg3_t; + +typedef union { + unsigned long val : 32; + scratch_reg3_t f; +} scratch_reg3_u; + +typedef struct _scratch_reg4_t { + unsigned long scratch_reg4 : 32; + } scratch_reg4_t; + +typedef union { + unsigned long val : 32; + scratch_reg4_t f; +} scratch_reg4_u; + +typedef struct _scratch_reg5_t { + unsigned long scratch_reg5 : 32; + } scratch_reg5_t; + +typedef union { + unsigned long val : 32; + scratch_reg5_t f; +} scratch_reg5_u; + +typedef struct _scratch_umsk_t { + unsigned long scratch_umsk : 6; + unsigned long : 10; + unsigned long scratch_swap : 2; + unsigned long : 14; + } scratch_umsk_t; + +typedef union { + unsigned long val : 32; + scratch_umsk_t f; +} scratch_umsk_u; + +typedef struct _scratch_addr_t { + unsigned long : 5; + unsigned long scratch_addr : 27; + } scratch_addr_t; + +typedef union { + unsigned long val : 32; + scratch_addr_t f; +} scratch_addr_u; + +typedef struct _cp_csq_addr_t { + unsigned long : 2; + unsigned long csq_addr : 8; + unsigned long : 22; + } cp_csq_addr_t; + +typedef union { + unsigned long val : 32; + cp_csq_addr_t f; +} cp_csq_addr_u; + +typedef struct _cp_csq_data_t { + unsigned long csq_data : 32; + } cp_csq_data_t; + +typedef union { + unsigned long val : 32; + cp_csq_data_t f; +} cp_csq_data_u; + +typedef struct _cp_csq_stat_t { + unsigned long csq_rptr_primary : 8; + unsigned long csq_wptr_primary : 8; + unsigned long csq_rptr_indirect : 8; + unsigned long csq_wptr_indirect : 8; + } cp_csq_stat_t; + +typedef union { + unsigned long val : 32; + cp_csq_stat_t f; +} cp_csq_stat_u; + +typedef struct _cp_stat_t { + unsigned long mru_busy : 1; + unsigned long mwu_busy : 1; + unsigned long rsiu_busy : 1; + unsigned long rciu_busy : 1; + unsigned long : 5; + unsigned long csf_primary_busy : 1; + unsigned long csf_indirect_busy : 1; + unsigned long csq_primary_busy : 1; + unsigned long csq_indirect_busy : 1; + unsigned long csi_busy : 1; + unsigned long : 14; + unsigned long guidma_busy : 1; + unsigned long viddma_busy : 1; + unsigned long cmdstrm_busy : 1; + unsigned long cp_busy : 1; + } cp_stat_t; + +typedef union { + unsigned long val : 32; + cp_stat_t f; +} cp_stat_u; + +typedef struct _gen_int_cntl_t { + unsigned long crtc_vblank_mask : 1; + unsigned long crtc_vline_mask : 1; + unsigned long crtc_hwint1_mask : 1; + unsigned long crtc_hwint2_mask : 1; + unsigned long : 15; + unsigned long gui_idle_mask : 1; + unsigned long : 8; + unsigned long pm4_idle_int_mask : 1; + unsigned long dvi_i2c_int_mask : 1; + unsigned long : 2; + } gen_int_cntl_t; + +typedef union { + unsigned long val : 32; + gen_int_cntl_t f; +} gen_int_cntl_u; + +typedef struct _gen_int_status_rd_t { + unsigned long crtc_vblank_stat : 1; + unsigned long crtc_vline_stat : 1; + unsigned long crtc_hwint1_stat : 1; + unsigned long crtc_hwint2_stat : 1; + unsigned long : 15; + unsigned long gui_idle_stat : 1; + unsigned long : 8; + unsigned long pm4_idle_int_stat : 1; + unsigned long dvi_i2c_int_stat : 1; + unsigned long : 2; + } gen_int_status_rd_t; + +typedef union { + unsigned long val : 32; + gen_int_status_rd_t f; +} gen_int_status_rd_u; + +typedef struct _gen_int_status_wr_t { + unsigned long crtc_vblank_stat_ak : 1; + unsigned long crtc_vline_stat_ak : 1; + unsigned long crtc_hwint1_stat_ak : 1; + unsigned long crtc_hwint2_stat_ak : 1; + unsigned long : 15; + unsigned long gui_idle_stat_ak : 1; + unsigned long : 8; + unsigned long pm4_idle_int_ak : 1; + unsigned long dvi_i2c_int_ak : 1; + unsigned long : 2; + } gen_int_status_wr_t; + +typedef union { + unsigned long val : 32; + gen_int_status_wr_t f; +} gen_int_status_wr_u; + +typedef struct _lcd_format_t { + unsigned long lcd_type : 4; + unsigned long color_to_mono : 1; + unsigned long data_inv : 1; + unsigned long stn_fm : 2; + unsigned long tft_fm : 2; + unsigned long scan_lr_en : 1; + unsigned long scan_ud_en : 1; + unsigned long pol_inv : 1; + unsigned long rst_fm : 1; + unsigned long yuv_to_rgb : 1; + unsigned long hr_tft : 1; + unsigned long ulc_panel : 1; + unsigned long : 15; + } lcd_format_t; + +typedef union { + unsigned long val : 32; + lcd_format_t f; +} lcd_format_u; + +typedef struct _graphic_ctrl_t { + unsigned long color_depth : 3; // 6 + unsigned long portrait_mode : 2; // 0 + unsigned long low_power_on : 1; // 1 + unsigned long req_freq : 4; // 5 + unsigned long en_crtc : 1; // 1 + unsigned long en_graphic_req : 1; // 1 + unsigned long en_graphic_crtc : 1; // 1 + unsigned long total_req_graphic : 9; // 240 + unsigned long lcd_pclk_on : 1; // 1 + unsigned long lcd_sclk_on : 1; // 1 + unsigned long pclk_running : 1; // 1 + unsigned long sclk_running : 1; // 1 + unsigned long : 6; + } graphic_ctrl_t; + +typedef union { + unsigned long val : 32; + graphic_ctrl_t f; +} graphic_ctrl_u; + +typedef struct _graphic_offset_t { + unsigned long graphic_offset : 24; + unsigned long : 8; + } graphic_offset_t; + +typedef union { + unsigned long val : 32; + graphic_offset_t f; +} graphic_offset_u; + +typedef struct _graphic_pitch_t { + unsigned long graphic_pitch : 11; + unsigned long : 21; + } graphic_pitch_t; + +typedef union { + unsigned long val : 32; + graphic_pitch_t f; +} graphic_pitch_u; + +typedef struct _crtc_total_t { + unsigned long crtc_h_total : 10; + unsigned long : 6; + unsigned long crtc_v_total : 10; + unsigned long : 6; + } crtc_total_t; + +typedef union { + unsigned long val : 32; + crtc_total_t f; +} crtc_total_u; + +typedef struct _active_h_disp_t { + unsigned long active_h_start : 10; + unsigned long : 6; + unsigned long active_h_end : 10; + unsigned long : 6; + } active_h_disp_t; + +typedef union { + unsigned long val : 32; + active_h_disp_t f; +} active_h_disp_u; + +typedef struct _active_v_disp_t { + unsigned long active_v_start : 10; + unsigned long : 6; + unsigned long active_v_end : 10; + unsigned long : 6; + } active_v_disp_t; + +typedef union { + unsigned long val : 32; + active_v_disp_t f; +} active_v_disp_u; + +typedef struct _graphic_h_disp_t { + unsigned long graphic_h_start : 10; + unsigned long : 6; + unsigned long graphic_h_end : 10; + unsigned long : 6; + } graphic_h_disp_t; + +typedef union { + unsigned long val : 32; + graphic_h_disp_t f; +} graphic_h_disp_u; + +typedef struct _graphic_v_disp_t { + unsigned long graphic_v_start : 10; + unsigned long : 6; + unsigned long graphic_v_end : 10; + unsigned long : 6; + } graphic_v_disp_t; + +typedef union { + unsigned long val : 32; + graphic_v_disp_t f; +} graphic_v_disp_u; + +typedef struct _video_ctrl_t { + unsigned long video_mode : 1; // 00000001 + unsigned long keyer_en : 1; // 00000002 + unsigned long en_video_req : 1; // 00000004 + unsigned long en_graphic_req_video : 1; // 00000008 + unsigned long en_video_crtc : 1; // 00000010 + unsigned long video_hor_exp : 2; // 00000060 + unsigned long video_ver_exp : 2; // 00000180 + unsigned long uv_combine : 1; // 00000200 + unsigned long total_req_video : 9; // 0007fc00 + unsigned long video_ch_sel : 1; // 00080000 + unsigned long video_portrait : 2; // 00300000 + unsigned long yuv2rgb_en : 1; // 00400000 + unsigned long yuv2rgb_option : 1; // 00800000 + unsigned long video_inv_hor : 1; // 01000000 + unsigned long video_inv_ver : 1; // 02000000 + unsigned long gamma_sel : 2; // 0c000000 + unsigned long dis_limit : 1; // 10000000 + unsigned long en_uv_hblend : 1; // 20000000 + unsigned long rgb_gamma_sel : 2; // c0000000 + } video_ctrl_t; + +typedef union { + unsigned long val : 32; + video_ctrl_t f; +} video_ctrl_u; + +typedef struct _graphic_key_t { + unsigned long keyer_color : 16; + unsigned long keyer_mask : 16; + } graphic_key_t; + +typedef union { + unsigned long val : 32; + graphic_key_t f; +} graphic_key_u; + +typedef struct _video_y_offset_t { + unsigned long y_offset : 24; + unsigned long : 8; + } video_y_offset_t; + +typedef union { + unsigned long val : 32; + video_y_offset_t f; +} video_y_offset_u; + +typedef struct _video_y_pitch_t { + unsigned long y_pitch : 11; + unsigned long : 21; + } video_y_pitch_t; + +typedef union { + unsigned long val : 32; + video_y_pitch_t f; +} video_y_pitch_u; + +typedef struct _video_u_offset_t { + unsigned long u_offset : 24; + unsigned long : 8; + } video_u_offset_t; + +typedef union { + unsigned long val : 32; + video_u_offset_t f; +} video_u_offset_u; + +typedef struct _video_u_pitch_t { + unsigned long u_pitch : 11; + unsigned long : 21; + } video_u_pitch_t; + +typedef union { + unsigned long val : 32; + video_u_pitch_t f; +} video_u_pitch_u; + +typedef struct _video_v_offset_t { + unsigned long v_offset : 24; + unsigned long : 8; + } video_v_offset_t; + +typedef union { + unsigned long val : 32; + video_v_offset_t f; +} video_v_offset_u; + +typedef struct _video_v_pitch_t { + unsigned long v_pitch : 11; + unsigned long : 21; + } video_v_pitch_t; + +typedef union { + unsigned long val : 32; + video_v_pitch_t f; +} video_v_pitch_u; + +typedef struct _video_h_pos_t { + unsigned long video_h_start : 10; + unsigned long : 6; + unsigned long video_h_end : 10; + unsigned long : 6; + } video_h_pos_t; + +typedef union { + unsigned long val : 32; + video_h_pos_t f; +} video_h_pos_u; + +typedef struct _video_v_pos_t { + unsigned long video_v_start : 10; + unsigned long : 6; + unsigned long video_v_end : 10; + unsigned long : 6; + } video_v_pos_t; + +typedef union { + unsigned long val : 32; + video_v_pos_t f; +} video_v_pos_u; + +typedef struct _brightness_cntl_t { + unsigned long brightness : 7; + unsigned long : 25; + } brightness_cntl_t; + +typedef union { + unsigned long val : 32; + brightness_cntl_t f; +} brightness_cntl_u; + +typedef struct _cursor1_offset_t { + unsigned long cur1_offset : 24; + unsigned long cur1_x_offset : 4; + unsigned long cur1_y_offset : 4; + } cursor1_offset_t; + +typedef union { + unsigned long val : 32; + cursor1_offset_t f; +} cursor1_offset_u; + +typedef struct _cursor1_h_pos_t { + unsigned long cur1_h_start : 10; + unsigned long : 6; + unsigned long cur1_h_end : 10; + unsigned long : 5; + unsigned long cur1_en : 1; + } cursor1_h_pos_t; + +typedef union { + unsigned long val : 32; + cursor1_h_pos_t f; +} cursor1_h_pos_u; + +typedef struct _cursor1_v_pos_t { + unsigned long cur1_v_start : 10; + unsigned long : 6; + unsigned long cur1_v_end : 10; + unsigned long : 6; + } cursor1_v_pos_t; + +typedef union { + unsigned long val : 32; + cursor1_v_pos_t f; +} cursor1_v_pos_u; + +typedef struct _cursor1_color0_t { + unsigned long cur1_color0_r : 8; + unsigned long cur1_color0_g : 8; + unsigned long cur1_color0_b : 8; + unsigned long : 8; + } cursor1_color0_t; + +typedef union { + unsigned long val : 32; + cursor1_color0_t f; +} cursor1_color0_u; + +typedef struct _cursor1_color1_t { + unsigned long cur1_color1_r : 8; + unsigned long cur1_color1_g : 8; + unsigned long cur1_color1_b : 8; + unsigned long : 8; + } cursor1_color1_t; + +typedef union { + unsigned long val : 32; + cursor1_color1_t f; +} cursor1_color1_u; + +typedef struct _cursor2_offset_t { + unsigned long cur2_offset : 24; + unsigned long cur2_x_offset : 4; + unsigned long cur2_y_offset : 4; + } cursor2_offset_t; + +typedef union { + unsigned long val : 32; + cursor2_offset_t f; +} cursor2_offset_u; + +typedef struct _cursor2_h_pos_t { + unsigned long cur2_h_start : 10; + unsigned long : 6; + unsigned long cur2_h_end : 10; + unsigned long : 5; + unsigned long cur2_en : 1; + } cursor2_h_pos_t; + +typedef union { + unsigned long val : 32; + cursor2_h_pos_t f; +} cursor2_h_pos_u; + +typedef struct _cursor2_v_pos_t { + unsigned long cur2_v_start : 10; + unsigned long : 6; + unsigned long cur2_v_end : 10; + unsigned long : 6; + } cursor2_v_pos_t; + +typedef union { + unsigned long val : 32; + cursor2_v_pos_t f; +} cursor2_v_pos_u; + +typedef struct _cursor2_color0_t { + unsigned long cur2_color0_r : 8; + unsigned long cur2_color0_g : 8; + unsigned long cur2_color0_b : 8; + unsigned long : 8; + } cursor2_color0_t; + +typedef union { + unsigned long val : 32; + cursor2_color0_t f; +} cursor2_color0_u; + +typedef struct _cursor2_color1_t { + unsigned long cur2_color1_r : 8; + unsigned long cur2_color1_g : 8; + unsigned long cur2_color1_b : 8; + unsigned long : 8; + } cursor2_color1_t; + +typedef union { + unsigned long val : 32; + cursor2_color1_t f; +} cursor2_color1_u; + +typedef struct _disp_int_cntl_t { + unsigned long vline_int_pos : 10; + unsigned long : 6; + unsigned long hpos_int_pos : 10; + unsigned long : 4; + unsigned long vblank_int_pol : 1; + unsigned long frame_int_pol : 1; + } disp_int_cntl_t; + +typedef union { + unsigned long val : 32; + disp_int_cntl_t f; +} disp_int_cntl_u; + +typedef struct _crtc_ss_t { + unsigned long ss_start : 10; + unsigned long : 6; + unsigned long ss_end : 10; + unsigned long : 2; + unsigned long ss_align : 1; + unsigned long ss_pol : 1; + unsigned long ss_run_mode : 1; + unsigned long ss_en : 1; + } crtc_ss_t; + +typedef union { + unsigned long val : 32; + crtc_ss_t f; +} crtc_ss_u; + +typedef struct _crtc_ls_t { + unsigned long ls_start : 10; + unsigned long : 6; + unsigned long ls_end : 10; + unsigned long : 2; + unsigned long ls_align : 1; + unsigned long ls_pol : 1; + unsigned long ls_run_mode : 1; + unsigned long ls_en : 1; + } crtc_ls_t; + +typedef union { + unsigned long val : 32; + crtc_ls_t f; +} crtc_ls_u; + +typedef struct _crtc_rev_t { + unsigned long rev_pos : 10; + unsigned long : 6; + unsigned long rev_align : 1; + unsigned long rev_freq_nref : 5; + unsigned long rev_en : 1; + unsigned long : 9; + } crtc_rev_t; + +typedef union { + unsigned long val : 32; + crtc_rev_t f; +} crtc_rev_u; + +typedef struct _crtc_dclk_t { + unsigned long dclk_start : 10; + unsigned long : 6; + unsigned long dclk_end : 10; + unsigned long : 1; + unsigned long dclk_run_mode : 2; + unsigned long dclk_pol : 1; + unsigned long dclk_align : 1; + unsigned long dclk_en : 1; + } crtc_dclk_t; + +typedef union { + unsigned long val : 32; + crtc_dclk_t f; +} crtc_dclk_u; + +typedef struct _crtc_gs_t { + unsigned long gs_start : 10; + unsigned long : 6; + unsigned long gs_end : 10; + unsigned long : 3; + unsigned long gs_align : 1; + unsigned long gs_pol : 1; + unsigned long gs_en : 1; + } crtc_gs_t; + +typedef union { + unsigned long val : 32; + crtc_gs_t f; +} crtc_gs_u; + +typedef struct _crtc_vpos_gs_t { + unsigned long gs_vpos_start : 10; + unsigned long : 6; + unsigned long gs_vpos_end : 10; + unsigned long : 6; + } crtc_vpos_gs_t; + +typedef union { + unsigned long val : 32; + crtc_vpos_gs_t f; +} crtc_vpos_gs_u; + +typedef struct _crtc_gclk_t { + unsigned long gclk_start : 10; + unsigned long : 6; + unsigned long gclk_end : 10; + unsigned long : 3; + unsigned long gclk_align : 1; + unsigned long gclk_pol : 1; + unsigned long gclk_en : 1; + } crtc_gclk_t; + +typedef union { + unsigned long val : 32; + crtc_gclk_t f; +} crtc_gclk_u; + +typedef struct _crtc_goe_t { + unsigned long goe_start : 10; + unsigned long : 6; + unsigned long goe_end : 10; + unsigned long : 3; + unsigned long goe_align : 1; + unsigned long goe_pol : 1; + unsigned long goe_en : 1; + } crtc_goe_t; + +typedef union { + unsigned long val : 32; + crtc_goe_t f; +} crtc_goe_u; + +typedef struct _crtc_frame_t { + unsigned long crtc_fr_start : 10; + unsigned long : 6; + unsigned long crtc_fr_end : 10; + unsigned long : 4; + unsigned long crtc_frame_en : 1; + unsigned long crtc_frame_align : 1; + } crtc_frame_t; + +typedef union { + unsigned long val : 32; + crtc_frame_t f; +} crtc_frame_u; + +typedef struct _crtc_frame_vpos_t { + unsigned long crtc_fr_vpos : 10; + unsigned long : 22; + } crtc_frame_vpos_t; + +typedef union { + unsigned long val : 32; + crtc_frame_vpos_t f; +} crtc_frame_vpos_u; + +typedef struct _gpio_data_t { + unsigned long gio_out : 16; + unsigned long gio_in : 16; + } gpio_data_t; + +typedef union { + unsigned long val : 32; + gpio_data_t f; +} gpio_data_u; + +typedef struct _gpio_cntl1_t { + unsigned long gio_pd : 16; + unsigned long gio_schmen : 16; + } gpio_cntl1_t; + +typedef union { + unsigned long val : 32; + gpio_cntl1_t f; +} gpio_cntl1_u; + +typedef struct _gpio_cntl2_t { + unsigned long gio_oe : 16; + unsigned long gio_srp : 1; + unsigned long gio_srn : 1; + unsigned long gio_sp : 4; + unsigned long gio_sn : 4; + unsigned long : 6; + } gpio_cntl2_t; + +typedef union { + unsigned long val : 32; + gpio_cntl2_t f; +} gpio_cntl2_u; + +typedef struct _lcdd_cntl1_t { + unsigned long lcdd_pd : 18; + unsigned long lcdd_srp : 1; + unsigned long lcdd_srn : 1; + unsigned long lcdd_sp : 4; + unsigned long lcdd_sn : 4; + unsigned long lcdd_align : 1; + unsigned long : 3; + } lcdd_cntl1_t; + +typedef union { + unsigned long val : 32; + lcdd_cntl1_t f; +} lcdd_cntl1_u; + +typedef struct _lcdd_cntl2_t { + unsigned long lcdd_oe : 18; + unsigned long : 14; + } lcdd_cntl2_t; + +typedef union { + unsigned long val : 32; + lcdd_cntl2_t f; +} lcdd_cntl2_u; + +typedef struct _genlcd_cntl1_t { + unsigned long dclk_oe : 1; + unsigned long dclk_pd : 1; + unsigned long dclk_srp : 1; + unsigned long dclk_srn : 1; + unsigned long dclk_sp : 4; + unsigned long dclk_sn : 4; + unsigned long ss_oe : 1; + unsigned long ss_pd : 1; + unsigned long ls_oe : 1; + unsigned long ls_pd : 1; + unsigned long gs_oe : 1; + unsigned long gs_pd : 1; + unsigned long goe_oe : 1; + unsigned long goe_pd : 1; + unsigned long rev_oe : 1; + unsigned long rev_pd : 1; + unsigned long frame_oe : 1; + unsigned long frame_pd : 1; + unsigned long : 8; + } genlcd_cntl1_t; + +typedef union { + unsigned long val : 32; + genlcd_cntl1_t f; +} genlcd_cntl1_u; + +typedef struct _genlcd_cntl2_t { + unsigned long gclk_oe : 1; + unsigned long gclk_pd : 1; + unsigned long gclk_srp : 1; + unsigned long gclk_srn : 1; + unsigned long gclk_sp : 4; + unsigned long gclk_sn : 4; + unsigned long genlcd_srp : 1; + unsigned long genlcd_srn : 1; + unsigned long genlcd_sp : 4; + unsigned long genlcd_sn : 4; + unsigned long : 10; + } genlcd_cntl2_t; + +typedef union { + unsigned long val : 32; + genlcd_cntl2_t f; +} genlcd_cntl2_u; + +typedef struct _disp_debug_t { + unsigned long disp_debug : 32; + } disp_debug_t; + +typedef union { + unsigned long val : 32; + disp_debug_t f; +} disp_debug_u; + +typedef struct _disp_db_buf_cntl_rd_t { + unsigned long en_db_buf : 1; + unsigned long update_db_buf_done : 1; + unsigned long db_buf_cntl : 6; + unsigned long : 24; + } disp_db_buf_cntl_rd_t; + +typedef union { + unsigned long val : 32; + disp_db_buf_cntl_rd_t f; +} disp_db_buf_cntl_rd_u; + +typedef struct _disp_db_buf_cntl_wr_t { + unsigned long en_db_buf : 1; + unsigned long update_db_buf : 1; + unsigned long db_buf_cntl : 6; + unsigned long : 24; + } disp_db_buf_cntl_wr_t; + +typedef union { + unsigned long val : 32; + disp_db_buf_cntl_wr_t f; +} disp_db_buf_cntl_wr_u; + +typedef struct _disp_crc_sig_t { + unsigned long crc_sig_r : 6; + unsigned long crc_sig_g : 6; + unsigned long crc_sig_b : 6; + unsigned long crc_cont_en : 1; + unsigned long crc_en : 1; + unsigned long crc_mask_en : 1; + unsigned long crc_sig_cntl : 6; + unsigned long : 5; + } disp_crc_sig_t; + +typedef union { + unsigned long val : 32; + disp_crc_sig_t f; +} disp_crc_sig_u; + +typedef struct _crtc_default_count_t { + unsigned long crtc_hcount_def : 10; + unsigned long : 6; + unsigned long crtc_vcount_def : 10; + unsigned long : 6; + } crtc_default_count_t; + +typedef union { + unsigned long val : 32; + crtc_default_count_t f; +} crtc_default_count_u; + +typedef struct _lcd_background_color_t { + unsigned long lcd_bg_red : 8; + unsigned long lcd_bg_green : 8; + unsigned long lcd_bg_blue : 8; + unsigned long : 8; + } lcd_background_color_t; + +typedef union { + unsigned long val : 32; + lcd_background_color_t f; +} lcd_background_color_u; + +typedef struct _crtc_ps2_t { + unsigned long ps2_start : 10; + unsigned long : 6; + unsigned long ps2_end : 10; + unsigned long : 4; + unsigned long ps2_pol : 1; + unsigned long ps2_en : 1; + } crtc_ps2_t; + +typedef union { + unsigned long val : 32; + crtc_ps2_t f; +} crtc_ps2_u; + +typedef struct _crtc_ps2_vpos_t { + unsigned long ps2_vpos_start : 10; + unsigned long : 6; + unsigned long ps2_vpos_end : 10; + unsigned long : 6; + } crtc_ps2_vpos_t; + +typedef union { + unsigned long val : 32; + crtc_ps2_vpos_t f; +} crtc_ps2_vpos_u; + +typedef struct _crtc_ps1_active_t { + unsigned long ps1_h_start : 10; + unsigned long : 6; + unsigned long ps1_h_end : 10; + unsigned long : 3; + unsigned long ps1_pol : 1; + unsigned long ps1_en : 1; + unsigned long ps1_use_nactive : 1; + } crtc_ps1_active_t; + +typedef union { + unsigned long val : 32; + crtc_ps1_active_t f; +} crtc_ps1_active_u; + +typedef struct _crtc_ps1_nactive_t { + unsigned long ps1_h_start_na : 10; + unsigned long : 6; + unsigned long ps1_h_end_na : 10; + unsigned long : 5; + unsigned long ps1_en_na : 1; + } crtc_ps1_nactive_t; + +typedef union { + unsigned long val : 32; + crtc_ps1_nactive_t f; +} crtc_ps1_nactive_u; + +typedef struct _crtc_gclk_ext_t { + unsigned long gclk_alter_start : 10; + unsigned long : 6; + unsigned long gclk_alter_width : 2; + unsigned long gclk_en_alter : 1; + unsigned long gclk_db_width : 2; + unsigned long : 11; + } crtc_gclk_ext_t; + +typedef union { + unsigned long val : 32; + crtc_gclk_ext_t f; +} crtc_gclk_ext_u; + +typedef struct _crtc_alw_t { + unsigned long alw_hstart : 10; + unsigned long : 6; + unsigned long alw_hend : 10; + unsigned long : 4; + unsigned long alw_delay : 1; + unsigned long alw_en : 1; + } crtc_alw_t; + +typedef union { + unsigned long val : 32; + crtc_alw_t f; +} crtc_alw_u; + +typedef struct _crtc_alw_vpos_t { + unsigned long alw_vstart : 10; + unsigned long : 6; + unsigned long alw_vend : 10; + unsigned long : 6; + } crtc_alw_vpos_t; + +typedef union { + unsigned long val : 32; + crtc_alw_vpos_t f; +} crtc_alw_vpos_u; + +typedef struct _crtc_psk_t { + unsigned long psk_vstart : 10; + unsigned long : 6; + unsigned long psk_vend : 10; + unsigned long : 4; + unsigned long psk_pol : 1; + unsigned long psk_en : 1; + } crtc_psk_t; + +typedef union { + unsigned long val : 32; + crtc_psk_t f; +} crtc_psk_u; + +typedef struct _crtc_psk_hpos_t { + unsigned long psk_hstart : 10; + unsigned long : 6; + unsigned long psk_hend : 10; + unsigned long : 6; + } crtc_psk_hpos_t; + +typedef union { + unsigned long val : 32; + crtc_psk_hpos_t f; +} crtc_psk_hpos_u; + +typedef struct _crtc_cv4_start_t { + unsigned long cv4_vstart : 10; + unsigned long : 20; + unsigned long cv4_pol : 1; + unsigned long cv4_en : 1; + } crtc_cv4_start_t; + +typedef union { + unsigned long val : 32; + crtc_cv4_start_t f; +} crtc_cv4_start_u; + +typedef struct _crtc_cv4_end_t { + unsigned long cv4_vend1 : 10; + unsigned long : 6; + unsigned long cv4_vend2 : 10; + unsigned long : 6; + } crtc_cv4_end_t; + +typedef union { + unsigned long val : 32; + crtc_cv4_end_t f; +} crtc_cv4_end_u; + +typedef struct _crtc_cv4_hpos_t { + unsigned long cv4_hstart : 10; + unsigned long : 6; + unsigned long cv4_hend : 10; + unsigned long : 6; + } crtc_cv4_hpos_t; + +typedef union { + unsigned long val : 32; + crtc_cv4_hpos_t f; +} crtc_cv4_hpos_u; + +typedef struct _crtc_eck_t { + unsigned long eck_freq1 : 3; + unsigned long eck_en : 1; + unsigned long : 28; + } crtc_eck_t; + +typedef union { + unsigned long val : 32; + crtc_eck_t f; +} crtc_eck_u; + +typedef struct _refresh_cntl_t { + unsigned long ref_frame : 3; + unsigned long nref_frame : 5; + unsigned long ref_cntl : 1; + unsigned long stop_sm_nref : 1; + unsigned long stop_req_nref : 1; + unsigned long : 21; + } refresh_cntl_t; + +typedef union { + unsigned long val : 32; + refresh_cntl_t f; +} refresh_cntl_u; + +typedef struct _genlcd_cntl3_t { + unsigned long ps1_oe : 1; + unsigned long ps1_pd : 1; + unsigned long ps2_oe : 1; + unsigned long ps2_pd : 1; + unsigned long rev2_oe : 1; + unsigned long rev2_pd : 1; + unsigned long awl_oe : 1; + unsigned long awl_pd : 1; + unsigned long dinv_oe : 1; + unsigned long dinv_pd : 1; + unsigned long psk_out : 1; + unsigned long psd_out : 1; + unsigned long eck_out : 1; + unsigned long cv4_out : 1; + unsigned long ps1_out : 1; + unsigned long ps2_out : 1; + unsigned long rev_out : 1; + unsigned long rev2_out : 1; + unsigned long : 14; + } genlcd_cntl3_t; + +typedef union { + unsigned long val : 32; + genlcd_cntl3_t f; +} genlcd_cntl3_u; + +typedef struct _gpio_data2_t { + unsigned long gio2_out : 16; + unsigned long gio2_in : 16; + } gpio_data2_t; + +typedef union { + unsigned long val : 32; + gpio_data2_t f; +} gpio_data2_u; + +typedef struct _gpio_cntl3_t { + unsigned long gio2_pd : 16; + unsigned long gio2_schmen : 16; + } gpio_cntl3_t; + +typedef union { + unsigned long val : 32; + gpio_cntl3_t f; +} gpio_cntl3_u; + +typedef struct _gpio_cntl4_t { + unsigned long gio2_oe : 16; + unsigned long : 16; + } gpio_cntl4_t; + +typedef union { + unsigned long val : 32; + gpio_cntl4_t f; +} gpio_cntl4_u; + +typedef struct _chip_strap_t { + unsigned long config_strap : 8; + unsigned long pkg_strap : 1; + unsigned long : 23; + } chip_strap_t; + +typedef union { + unsigned long val : 32; + chip_strap_t f; +} chip_strap_u; + +typedef struct _disp_debug2_t { + unsigned long disp_debug2 : 32; + } disp_debug2_t; + +typedef union { + unsigned long val : 32; + disp_debug2_t f; +} disp_debug2_u; + +typedef struct _debug_bus_cntl_t { + unsigned long debug_testmux : 4; + unsigned long debug_testsel : 4; + unsigned long debug_gioa_sel : 2; + unsigned long debug_giob_sel : 2; + unsigned long debug_clk_sel : 1; + unsigned long debug_clk_inv : 1; + unsigned long : 2; + unsigned long debug_bus : 16; + } debug_bus_cntl_t; + +typedef union { + unsigned long val : 32; + debug_bus_cntl_t f; +} debug_bus_cntl_u; + +typedef struct _gamma_value1_t { + unsigned long gamma1 : 8; + unsigned long gamma2 : 8; + unsigned long gamma3 : 8; + unsigned long gamma4 : 8; + } gamma_value1_t; + +typedef union { + unsigned long val : 32; + gamma_value1_t f; +} gamma_value1_u; + +typedef struct _gamma_value2_t { + unsigned long gamma5 : 8; + unsigned long gamma6 : 8; + unsigned long gamma7 : 8; + unsigned long gamma8 : 8; + } gamma_value2_t; + +typedef union { + unsigned long val : 32; + gamma_value2_t f; +} gamma_value2_u; + +typedef struct _gamma_slope_t { + unsigned long slope1 : 3; + unsigned long slope2 : 3; + unsigned long slope3 : 3; + unsigned long slope4 : 3; + unsigned long slope5 : 3; + unsigned long slope6 : 3; + unsigned long slope7 : 3; + unsigned long slope8 : 3; + unsigned long : 8; + } gamma_slope_t; + +typedef union { + unsigned long val : 32; + gamma_slope_t f; +} gamma_slope_u; + +typedef struct _gen_status_t { + unsigned long status : 16; + unsigned long : 16; + } gen_status_t; + +typedef union { + unsigned long val : 32; + gen_status_t f; +} gen_status_u; + +typedef struct _hw_int_t { + unsigned long hwint1_pos : 5; + unsigned long hwint2_pos : 5; + unsigned long hwint1_pol : 1; + unsigned long hwint2_pol : 1; + unsigned long hwint1_en_db : 1; + unsigned long hwint2_en_db : 1; + unsigned long : 18; + } hw_int_t; + +typedef union { + unsigned long val : 32; + hw_int_t f; +} hw_int_u; + +typedef struct _dst_offset_t { + unsigned long dst_offset : 24; + unsigned long : 8; + } dst_offset_t; + +typedef union { + unsigned long val : 32; + dst_offset_t f; +} dst_offset_u; + +typedef struct _dst_pitch_t { + unsigned long dst_pitch : 14; + unsigned long mc_dst_pitch_mul : 2; + unsigned long : 16; + } dst_pitch_t; + +typedef union { + unsigned long val : 32; + dst_pitch_t f; +} dst_pitch_u; + +typedef struct _dst_pitch_offset_t { + unsigned long dst_offset : 20; + unsigned long dst_pitch : 10; + unsigned long mc_dst_pitch_mul : 2; + } dst_pitch_offset_t; + +typedef union { + unsigned long val : 32; + dst_pitch_offset_t f; +} dst_pitch_offset_u; + +typedef struct _dst_x_t { + unsigned long dst_x : 14; + unsigned long : 18; + } dst_x_t; + +typedef union { + unsigned long val : 32; + dst_x_t f; +} dst_x_u; + +typedef struct _dst_y_t { + unsigned long dst_y : 14; + unsigned long : 18; + } dst_y_t; + +typedef union { + unsigned long val : 32; + dst_y_t f; +} dst_y_u; + +typedef struct _dst_x_y_t { + unsigned long dst_y : 14; + unsigned long : 2; + unsigned long dst_x : 14; + unsigned long : 2; + } dst_x_y_t; + +typedef union { + unsigned long val : 32; + dst_x_y_t f; +} dst_x_y_u; + +typedef struct _dst_y_x_t { + unsigned long dst_x : 14; + unsigned long : 2; + unsigned long dst_y : 14; + unsigned long : 2; + } dst_y_x_t; + +typedef union { + unsigned long val : 32; + dst_y_x_t f; +} dst_y_x_u; + +typedef struct _dst_width_t { + unsigned long dst_width_b0 : 8; + unsigned long dst_width_b1 : 6; + unsigned long : 18; + } dst_width_t; + +typedef union { + unsigned long val : 32; + dst_width_t f; +} dst_width_u; + +typedef struct _dst_height_t { + unsigned long dst_height : 14; + unsigned long : 18; + } dst_height_t; + +typedef union { + unsigned long val : 32; + dst_height_t f; +} dst_height_u; + +typedef struct _dst_width_height_t { + unsigned long dst_height : 14; + unsigned long : 2; + unsigned long dst_width_b0 : 8; + unsigned long dst_width_b1 : 6; + unsigned long : 2; + } dst_width_height_t; + +typedef union { + unsigned long val : 32; + dst_width_height_t f; +} dst_width_height_u; + +typedef struct _dst_height_width_t { + unsigned long dst_width_b0 : 8; + unsigned long dst_width_b1 : 6; + unsigned long : 2; + unsigned long dst_height : 14; + unsigned long : 2; + } dst_height_width_t; + +typedef union { + unsigned long val : 32; + dst_height_width_t f; +} dst_height_width_u; + +typedef struct _dst_height_width_8_t { + unsigned long : 16; + unsigned long dst_width_b0 : 8; + unsigned long dst_height : 8; + } dst_height_width_8_t; + +typedef union { + unsigned long val : 32; + dst_height_width_8_t f; +} dst_height_width_8_u; + +typedef struct _dst_height_y_t { + unsigned long dst_y : 14; + unsigned long : 2; + unsigned long dst_height : 14; + unsigned long : 2; + } dst_height_y_t; + +typedef union { + unsigned long val : 32; + dst_height_y_t f; +} dst_height_y_u; + +typedef struct _dst_width_x_t { + unsigned long dst_x : 14; + unsigned long : 2; + unsigned long dst_width_b0 : 8; + unsigned long dst_width_b1 : 6; + unsigned long : 2; + } dst_width_x_t; + +typedef union { + unsigned long val : 32; + dst_width_x_t f; +} dst_width_x_u; + +typedef struct _dst_width_x_incy_t { + unsigned long dst_x : 14; + unsigned long : 2; + unsigned long dst_width_b0 : 8; + unsigned long dst_width_b1 : 6; + unsigned long : 2; + } dst_width_x_incy_t; + +typedef union { + unsigned long val : 32; + dst_width_x_incy_t f; +} dst_width_x_incy_u; + +typedef struct _dst_line_start_t { + unsigned long dst_start_x : 14; + unsigned long : 2; + unsigned long dst_start_y : 14; + unsigned long : 2; + } dst_line_start_t; + +typedef union { + unsigned long val : 32; + dst_line_start_t f; +} dst_line_start_u; + +typedef struct _dst_line_end_t { + unsigned long dst_end_x : 14; + unsigned long : 2; + unsigned long dst_end_y_b0 : 8; + unsigned long dst_end_y_b1 : 6; + unsigned long : 2; + } dst_line_end_t; + +typedef union { + unsigned long val : 32; + dst_line_end_t f; +} dst_line_end_u; + +typedef struct _brush_offset_t { + unsigned long brush_offset : 24; + unsigned long : 8; + } brush_offset_t; + +typedef union { + unsigned long val : 32; + brush_offset_t f; +} brush_offset_u; + +typedef struct _brush_y_x_t { + unsigned long brush_x : 5; + unsigned long : 3; + unsigned long brush_y : 3; + unsigned long : 21; + } brush_y_x_t; + +typedef union { + unsigned long val : 32; + brush_y_x_t f; +} brush_y_x_u; + +typedef struct _dp_brush_frgd_clr_t { + unsigned long dp_brush_frgd_clr : 32; + } dp_brush_frgd_clr_t; + +typedef union { + unsigned long val : 32; + dp_brush_frgd_clr_t f; +} dp_brush_frgd_clr_u; + +typedef struct _dp_brush_bkgd_clr_t { + unsigned long dp_brush_bkgd_clr : 32; + } dp_brush_bkgd_clr_t; + +typedef union { + unsigned long val : 32; + dp_brush_bkgd_clr_t f; +} dp_brush_bkgd_clr_u; + +typedef struct _src2_offset_t { + unsigned long src2_offset : 24; + unsigned long : 8; + } src2_offset_t; + +typedef union { + unsigned long val : 32; + src2_offset_t f; +} src2_offset_u; + +typedef struct _src2_pitch_t { + unsigned long src2_pitch : 14; + unsigned long src2_pitch_mul : 2; + unsigned long : 16; + } src2_pitch_t; + +typedef union { + unsigned long val : 32; + src2_pitch_t f; +} src2_pitch_u; + +typedef struct _src2_pitch_offset_t { + unsigned long src2_offset : 20; + unsigned long : 2; + unsigned long src2_pitch : 8; + unsigned long src2_pitch_mul : 2; + } src2_pitch_offset_t; + +typedef union { + unsigned long val : 32; + src2_pitch_offset_t f; +} src2_pitch_offset_u; + +typedef struct _src2_x_t { + unsigned long src_x : 14; + unsigned long : 18; + } src2_x_t; + +typedef union { + unsigned long val : 32; + src2_x_t f; +} src2_x_u; + +typedef struct _src2_y_t { + unsigned long src_y : 14; + unsigned long : 18; + } src2_y_t; + +typedef union { + unsigned long val : 32; + src2_y_t f; +} src2_y_u; + +typedef struct _src2_x_y_t { + unsigned long src_y : 14; + unsigned long : 2; + unsigned long src_x : 14; + unsigned long : 2; + } src2_x_y_t; + +typedef union { + unsigned long val : 32; + src2_x_y_t f; +} src2_x_y_u; + +typedef struct _src2_width_t { + unsigned long src2_width : 14; + unsigned long : 18; + } src2_width_t; + +typedef union { + unsigned long val : 32; + src2_width_t f; +} src2_width_u; + +typedef struct _src2_height_t { + unsigned long src2_height : 14; + unsigned long : 18; + } src2_height_t; + +typedef union { + unsigned long val : 32; + src2_height_t f; +} src2_height_u; + +typedef struct _src2_inc_t { + unsigned long src2_xinc : 6; + unsigned long : 2; + unsigned long src2_yinc : 6; + unsigned long : 18; + } src2_inc_t; + +typedef union { + unsigned long val : 32; + src2_inc_t f; +} src2_inc_u; + +typedef struct _src_offset_t { + unsigned long src_offset : 24; + unsigned long : 8; + } src_offset_t; + +typedef union { + unsigned long val : 32; + src_offset_t f; +} src_offset_u; + +typedef struct _src_pitch_t { + unsigned long src_pitch : 14; + unsigned long src_pitch_mul : 2; + unsigned long : 16; + } src_pitch_t; + +typedef union { + unsigned long val : 32; + src_pitch_t f; +} src_pitch_u; + +typedef struct _src_pitch_offset_t { + unsigned long src_offset : 20; + unsigned long src_pitch : 10; + unsigned long src_pitch_mul : 2; + } src_pitch_offset_t; + +typedef union { + unsigned long val : 32; + src_pitch_offset_t f; +} src_pitch_offset_u; + +typedef struct _src_x_t { + unsigned long src_x : 14; + unsigned long : 18; + } src_x_t; + +typedef union { + unsigned long val : 32; + src_x_t f; +} src_x_u; + +typedef struct _src_y_t { + unsigned long src_y : 14; + unsigned long : 18; + } src_y_t; + +typedef union { + unsigned long val : 32; + src_y_t f; +} src_y_u; + +typedef struct _src_x_y_t { + unsigned long src_y : 14; + unsigned long : 2; + unsigned long src_x : 14; + unsigned long : 2; + } src_x_y_t; + +typedef union { + unsigned long val : 32; + src_x_y_t f; +} src_x_y_u; + +typedef struct _src_y_x_t { + unsigned long src_x : 14; + unsigned long : 2; + unsigned long src_y : 14; + unsigned long : 2; + } src_y_x_t; + +typedef union { + unsigned long val : 32; + src_y_x_t f; +} src_y_x_u; + +typedef struct _src_width_t { + unsigned long src_width : 14; + unsigned long : 18; + } src_width_t; + +typedef union { + unsigned long val : 32; + src_width_t f; +} src_width_u; + +typedef struct _src_height_t { + unsigned long src_height : 14; + unsigned long : 18; + } src_height_t; + +typedef union { + unsigned long val : 32; + src_height_t f; +} src_height_u; + +typedef struct _src_inc_t { + unsigned long src_xinc : 6; + unsigned long : 2; + unsigned long src_yinc : 6; + unsigned long : 18; + } src_inc_t; + +typedef union { + unsigned long val : 32; + src_inc_t f; +} src_inc_u; + +typedef struct _host_data0_t { + unsigned long host_data : 32; + } host_data0_t; + +typedef union { + unsigned long val : 32; + host_data0_t f; +} host_data0_u; + +typedef struct _host_data1_t { + unsigned long host_data : 32; + } host_data1_t; + +typedef union { + unsigned long val : 32; + host_data1_t f; +} host_data1_u; + +typedef struct _host_data2_t { + unsigned long host_data : 32; + } host_data2_t; + +typedef union { + unsigned long val : 32; + host_data2_t f; +} host_data2_u; + +typedef struct _host_data3_t { + unsigned long host_data : 32; + } host_data3_t; + +typedef union { + unsigned long val : 32; + host_data3_t f; +} host_data3_u; + +typedef struct _host_data4_t { + unsigned long host_data : 32; + } host_data4_t; + +typedef union { + unsigned long val : 32; + host_data4_t f; +} host_data4_u; + +typedef struct _host_data5_t { + unsigned long host_data : 32; + } host_data5_t; + +typedef union { + unsigned long val : 32; + host_data5_t f; +} host_data5_u; + +typedef struct _host_data6_t { + unsigned long host_data : 32; + } host_data6_t; + +typedef union { + unsigned long val : 32; + host_data6_t f; +} host_data6_u; + +typedef struct _host_data7_t { + unsigned long host_data : 32; + } host_data7_t; + +typedef union { + unsigned long val : 32; + host_data7_t f; +} host_data7_u; + +typedef struct _host_data_last_t { + unsigned long host_data_last : 32; + } host_data_last_t; + +typedef union { + unsigned long val : 32; + host_data_last_t f; +} host_data_last_u; + +typedef struct _dp_src_frgd_clr_t { + unsigned long dp_src_frgd_clr : 32; + } dp_src_frgd_clr_t; + +typedef union { + unsigned long val : 32; + dp_src_frgd_clr_t f; +} dp_src_frgd_clr_u; + +typedef struct _dp_src_bkgd_clr_t { + unsigned long dp_src_bkgd_clr : 32; + } dp_src_bkgd_clr_t; + +typedef union { + unsigned long val : 32; + dp_src_bkgd_clr_t f; +} dp_src_bkgd_clr_u; + +typedef struct _sc_left_t { + unsigned long sc_left : 14; + unsigned long : 18; + } sc_left_t; + +typedef union { + unsigned long val : 32; + sc_left_t f; +} sc_left_u; + +typedef struct _sc_right_t { + unsigned long sc_right : 14; + unsigned long : 18; + } sc_right_t; + +typedef union { + unsigned long val : 32; + sc_right_t f; +} sc_right_u; + +typedef struct _sc_top_t { + unsigned long sc_top : 14; + unsigned long : 18; + } sc_top_t; + +typedef union { + unsigned long val : 32; + sc_top_t f; +} sc_top_u; + +typedef struct _sc_bottom_t { + unsigned long sc_bottom : 14; + unsigned long : 18; + } sc_bottom_t; + +typedef union { + unsigned long val : 32; + sc_bottom_t f; +} sc_bottom_u; + +typedef struct _src_sc_right_t { + unsigned long sc_right : 14; + unsigned long : 18; + } src_sc_right_t; + +typedef union { + unsigned long val : 32; + src_sc_right_t f; +} src_sc_right_u; + +typedef struct _src_sc_bottom_t { + unsigned long sc_bottom : 14; + unsigned long : 18; + } src_sc_bottom_t; + +typedef union { + unsigned long val : 32; + src_sc_bottom_t f; +} src_sc_bottom_u; + +typedef struct _dp_cntl_t { + unsigned long dst_x_dir : 1; + unsigned long dst_y_dir : 1; + unsigned long src_x_dir : 1; + unsigned long src_y_dir : 1; + unsigned long dst_major_x : 1; + unsigned long src_major_x : 1; + unsigned long : 26; + } dp_cntl_t; + +typedef union { + unsigned long val : 32; + dp_cntl_t f; +} dp_cntl_u; + +typedef struct _dp_cntl_dst_dir_t { + unsigned long : 15; + unsigned long dst_y_dir : 1; + unsigned long : 15; + unsigned long dst_x_dir : 1; + } dp_cntl_dst_dir_t; + +typedef union { + unsigned long val : 32; + dp_cntl_dst_dir_t f; +} dp_cntl_dst_dir_u; + +typedef struct _dp_datatype_t { + unsigned long dp_dst_datatype : 4; + unsigned long : 4; + unsigned long dp_brush_datatype : 4; + unsigned long dp_src2_type : 1; + unsigned long dp_src2_datatype : 3; + unsigned long dp_src_datatype : 3; + unsigned long : 11; + unsigned long dp_byte_pix_order : 1; + unsigned long : 1; + } dp_datatype_t; + +typedef union { + unsigned long val : 32; + dp_datatype_t f; +} dp_datatype_u; + +typedef struct _dp_mix_t { + unsigned long : 8; + unsigned long dp_src_source : 3; + unsigned long dp_src2_source : 3; + unsigned long : 2; + unsigned long dp_rop3 : 8; + unsigned long dp_op : 1; + unsigned long : 7; + } dp_mix_t; + +typedef union { + unsigned long val : 32; + dp_mix_t f; +} dp_mix_u; + +typedef struct _dp_write_msk_t { + unsigned long dp_write_msk : 32; + } dp_write_msk_t; + +typedef union { + unsigned long val : 32; + dp_write_msk_t f; +} dp_write_msk_u; + +typedef struct _clr_cmp_clr_src_t { + unsigned long clr_cmp_clr_src : 32; + } clr_cmp_clr_src_t; + +typedef union { + unsigned long val : 32; + clr_cmp_clr_src_t f; +} clr_cmp_clr_src_u; + +typedef struct _clr_cmp_clr_dst_t { + unsigned long clr_cmp_clr_dst : 32; + } clr_cmp_clr_dst_t; + +typedef union { + unsigned long val : 32; + clr_cmp_clr_dst_t f; +} clr_cmp_clr_dst_u; + +typedef struct _clr_cmp_cntl_t { + unsigned long clr_cmp_fcn_src : 3; + unsigned long : 5; + unsigned long clr_cmp_fcn_dst : 3; + unsigned long : 13; + unsigned long clr_cmp_src : 2; + unsigned long : 6; + } clr_cmp_cntl_t; + +typedef union { + unsigned long val : 32; + clr_cmp_cntl_t f; +} clr_cmp_cntl_u; + +typedef struct _clr_cmp_msk_t { + unsigned long clr_cmp_msk : 32; + } clr_cmp_msk_t; + +typedef union { + unsigned long val : 32; + clr_cmp_msk_t f; +} clr_cmp_msk_u; + +typedef struct _default_pitch_offset_t { + unsigned long default_offset : 20; + unsigned long default_pitch : 10; + unsigned long : 2; + } default_pitch_offset_t; + +typedef union { + unsigned long val : 32; + default_pitch_offset_t f; +} default_pitch_offset_u; + +typedef struct _default_sc_bottom_right_t { + unsigned long default_sc_right : 14; + unsigned long : 2; + unsigned long default_sc_bottom : 14; + unsigned long : 2; + } default_sc_bottom_right_t; + +typedef union { + unsigned long val : 32; + default_sc_bottom_right_t f; +} default_sc_bottom_right_u; + +typedef struct _default2_sc_bottom_right_t { + unsigned long default_sc_right : 14; + unsigned long : 2; + unsigned long default_sc_bottom : 14; + unsigned long : 2; + } default2_sc_bottom_right_t; + +typedef union { + unsigned long val : 32; + default2_sc_bottom_right_t f; +} default2_sc_bottom_right_u; + +typedef struct _ref1_pitch_offset_t { + unsigned long offset : 20; + unsigned long : 2; + unsigned long pitch : 8; + unsigned long : 2; + } ref1_pitch_offset_t; + +typedef union { + unsigned long val : 32; + ref1_pitch_offset_t f; +} ref1_pitch_offset_u; + +typedef struct _ref2_pitch_offset_t { + unsigned long offset : 20; + unsigned long : 2; + unsigned long pitch : 8; + unsigned long : 2; + } ref2_pitch_offset_t; + +typedef union { + unsigned long val : 32; + ref2_pitch_offset_t f; +} ref2_pitch_offset_u; + +typedef struct _ref3_pitch_offset_t { + unsigned long offset : 20; + unsigned long : 2; + unsigned long pitch : 8; + unsigned long : 2; + } ref3_pitch_offset_t; + +typedef union { + unsigned long val : 32; + ref3_pitch_offset_t f; +} ref3_pitch_offset_u; + +typedef struct _ref4_pitch_offset_t { + unsigned long offset : 20; + unsigned long : 2; + unsigned long pitch : 8; + unsigned long : 2; + } ref4_pitch_offset_t; + +typedef union { + unsigned long val : 32; + ref4_pitch_offset_t f; +} ref4_pitch_offset_u; + +typedef struct _ref5_pitch_offset_t { + unsigned long offset : 20; + unsigned long : 2; + unsigned long pitch : 8; + unsigned long : 2; + } ref5_pitch_offset_t; + +typedef union { + unsigned long val : 32; + ref5_pitch_offset_t f; +} ref5_pitch_offset_u; + +typedef struct _ref6_pitch_offset_t { + unsigned long offset : 20; + unsigned long : 2; + unsigned long pitch : 8; + unsigned long : 2; + } ref6_pitch_offset_t; + +typedef union { + unsigned long val : 32; + ref6_pitch_offset_t f; +} ref6_pitch_offset_u; + +typedef struct _dp_gui_master_cntl_t { + unsigned long gmc_src_pitch_offset_cntl : 1; + unsigned long gmc_dst_pitch_offset_cntl : 1; + unsigned long gmc_src_clipping : 1; + unsigned long gmc_dst_clipping : 1; + unsigned long gmc_brush_datatype : 4; + unsigned long gmc_dst_datatype : 4; + unsigned long gmc_src_datatype : 3; + unsigned long gmc_byte_pix_order : 1; + unsigned long gmc_default_sel : 1; + unsigned long gmc_rop3 : 8; + unsigned long gmc_dp_src_source : 3; + unsigned long gmc_clr_cmp_fcn_dis : 1; + unsigned long : 1; + unsigned long gmc_wr_msk_dis : 1; + unsigned long gmc_dp_op : 1; + } dp_gui_master_cntl_t; + +typedef union { + unsigned long val : 32; + dp_gui_master_cntl_t f; +} dp_gui_master_cntl_u; + +typedef struct _sc_top_left_t { + unsigned long sc_left : 14; + unsigned long : 2; + unsigned long sc_top : 14; + unsigned long : 2; + } sc_top_left_t; + +typedef union { + unsigned long val : 32; + sc_top_left_t f; +} sc_top_left_u; + +typedef struct _sc_bottom_right_t { + unsigned long sc_right : 14; + unsigned long : 2; + unsigned long sc_bottom : 14; + unsigned long : 2; + } sc_bottom_right_t; + +typedef union { + unsigned long val : 32; + sc_bottom_right_t f; +} sc_bottom_right_u; + +typedef struct _src_sc_bottom_right_t { + unsigned long sc_right : 14; + unsigned long : 2; + unsigned long sc_bottom : 14; + unsigned long : 2; + } src_sc_bottom_right_t; + +typedef union { + unsigned long val : 32; + src_sc_bottom_right_t f; +} src_sc_bottom_right_u; + +typedef struct _global_alpha_t { + unsigned long alpha_r : 8; + unsigned long alpha_g : 8; + unsigned long alpha_b : 8; + unsigned long alpha_a : 8; + } global_alpha_t; + +typedef union { + unsigned long val : 32; + global_alpha_t f; +} global_alpha_u; + +typedef struct _filter_coef_t { + unsigned long c_4 : 4; + unsigned long c_3 : 4; + unsigned long c_2 : 4; + unsigned long c_1 : 4; + unsigned long c1 : 4; + unsigned long c2 : 4; + unsigned long c3 : 4; + unsigned long c4 : 4; + } filter_coef_t; + +typedef union { + unsigned long val : 32; + filter_coef_t f; +} filter_coef_u; + +typedef struct _mvc_cntl_start_t { + unsigned long mc_cntl_src_1_index : 4; + unsigned long mc_cntl_dst_offset : 20; + unsigned long mc_dst_pitch_mul : 2; + unsigned long mc_cntl_src_2_index : 3; + unsigned long mc_cntl_width_height_sel : 3; + } mvc_cntl_start_t; + +typedef union { + unsigned long val : 32; + mvc_cntl_start_t f; +} mvc_cntl_start_u; + +typedef struct _e2_arithmetic_cntl_t { + unsigned long opcode : 5; + unsigned long shiftright : 4; + unsigned long clamp : 1; + unsigned long rounding : 2; + unsigned long filter_n : 3; + unsigned long : 1; + unsigned long srcblend_inv : 1; + unsigned long srcblend : 4; + unsigned long : 3; + unsigned long dstblend_inv : 1; + unsigned long dstblend : 4; + unsigned long dst_signed : 1; + unsigned long autoinc : 1; + unsigned long : 1; + } e2_arithmetic_cntl_t; + +typedef union { + unsigned long val : 32; + e2_arithmetic_cntl_t f; +} e2_arithmetic_cntl_u; + +typedef struct _debug0_t { + unsigned long debug0_r : 8; + unsigned long : 8; + unsigned long debug0_rw : 8; + unsigned long : 8; + } debug0_t; + +typedef union { + unsigned long val : 32; + debug0_t f; +} debug0_u; + +typedef struct _debug1_t { + unsigned long debug1_r : 8; + unsigned long : 8; + unsigned long debug1_rw : 8; + unsigned long : 8; + } debug1_t; + +typedef union { + unsigned long val : 32; + debug1_t f; +} debug1_u; + +typedef struct _debug2_t { + unsigned long debug2_r : 8; + unsigned long : 8; + unsigned long debug2_rw : 8; + unsigned long : 8; + } debug2_t; + +typedef union { + unsigned long val : 32; + debug2_t f; +} debug2_u; + +typedef struct _debug3_t { + unsigned long : 32; + } debug3_t; + +typedef union { + unsigned long val : 32; + debug3_t f; +} debug3_u; + +typedef struct _debug4_t { + unsigned long : 32; + } debug4_t; + +typedef union { + unsigned long val : 32; + debug4_t f; +} debug4_u; + +typedef struct _debug5_t { + unsigned long : 32; + } debug5_t; + +typedef union { + unsigned long val : 32; + debug5_t f; +} debug5_u; + +typedef struct _debug6_t { + unsigned long : 32; + } debug6_t; + +typedef union { + unsigned long val : 32; + debug6_t f; +} debug6_u; + +typedef struct _debug7_t { + unsigned long : 32; + } debug7_t; + +typedef union { + unsigned long val : 32; + debug7_t f; +} debug7_u; + +typedef struct _debug8_t { + unsigned long : 32; + } debug8_t; + +typedef union { + unsigned long val : 32; + debug8_t f; +} debug8_u; + +typedef struct _debug9_t { + unsigned long : 32; + } debug9_t; + +typedef union { + unsigned long val : 32; + debug9_t f; +} debug9_u; + +typedef struct _debug10_t { + unsigned long : 32; + } debug10_t; + +typedef union { + unsigned long val : 32; + debug10_t f; +} debug10_u; + +typedef struct _debug11_t { + unsigned long : 32; + } debug11_t; + +typedef union { + unsigned long val : 32; + debug11_t f; +} debug11_u; + +typedef struct _debug12_t { + unsigned long : 32; + } debug12_t; + +typedef union { + unsigned long val : 32; + debug12_t f; +} debug12_u; + +typedef struct _debug13_t { + unsigned long : 32; + } debug13_t; + +typedef union { + unsigned long val : 32; + debug13_t f; +} debug13_u; + +typedef struct _debug14_t { + unsigned long : 32; + } debug14_t; + +typedef union { + unsigned long val : 32; + debug14_t f; +} debug14_u; + +typedef struct _debug15_t { + unsigned long : 32; + } debug15_t; + +typedef union { + unsigned long val : 32; + debug15_t f; +} debug15_u; + +typedef struct _eng_cntl_t { + unsigned long erc_reg_rd_ws : 1; + unsigned long erc_reg_wr_ws : 1; + unsigned long erc_idle_reg_wr : 1; + unsigned long dis_engine_triggers : 1; + unsigned long dis_rop_src_uses_dst_w_h : 1; + unsigned long dis_src_uses_dst_dirmaj : 1; + unsigned long : 6; + unsigned long force_3dclk_when_2dclk : 1; + unsigned long : 19; + } eng_cntl_t; + +typedef union { + unsigned long val : 32; + eng_cntl_t f; +} eng_cntl_u; + +typedef struct _eng_perf_cnt_t { + unsigned long perf_cnt : 20; + unsigned long perf_sel : 4; + unsigned long perf_en : 1; + unsigned long : 3; + unsigned long perf_clr : 1; + unsigned long : 3; + } eng_perf_cnt_t; + +typedef union { + unsigned long val : 32; + eng_perf_cnt_t f; +} eng_perf_cnt_u; + +typedef struct _idct_runs_t { + unsigned long idct_runs_3 : 8; + unsigned long idct_runs_2 : 8; + unsigned long idct_runs_1 : 8; + unsigned long idct_runs_0 : 8; + } idct_runs_t; + +typedef union { + unsigned long val : 32; + idct_runs_t f; +} idct_runs_u; + +typedef struct _idct_levels_t { + unsigned long idct_level_hi : 16; + unsigned long idct_level_lo : 16; + } idct_levels_t; + +typedef union { + unsigned long val : 32; + idct_levels_t f; +} idct_levels_u; + +typedef struct _idct_control_t { + unsigned long idct_ctl_luma_rd_format : 2; + unsigned long idct_ctl_chroma_rd_format : 2; + unsigned long idct_ctl_scan_pattern : 1; + unsigned long idct_ctl_intra : 1; + unsigned long idct_ctl_flush : 1; + unsigned long idct_ctl_passthru : 1; + unsigned long idct_ctl_sw_reset : 1; + unsigned long idct_ctl_constreq : 1; + unsigned long idct_ctl_scramble : 1; + unsigned long idct_ctl_alt_scan : 1; + unsigned long : 20; + } idct_control_t; + +typedef union { + unsigned long val : 32; + idct_control_t f; +} idct_control_u; + +typedef struct _idct_auth_control_t { + unsigned long control_bits : 32; + } idct_auth_control_t; + +typedef union { + unsigned long val : 32; + idct_auth_control_t f; +} idct_auth_control_u; + +typedef struct _idct_auth_t { + unsigned long auth : 32; + } idct_auth_t; + +typedef union { + unsigned long val : 32; + idct_auth_t f; +} idct_auth_u; + +typedef struct _mem_cntl_t { + unsigned long : 1; + unsigned long en_mem_ch1 : 1; + unsigned long en_mem_ch2 : 1; + unsigned long int_mem_mapping : 1; + unsigned long : 28; + } mem_cntl_t; + +typedef union { + unsigned long val : 32; + mem_cntl_t f; +} mem_cntl_u; + +typedef struct _mem_arb_t { + unsigned long disp_time_slot : 4; + unsigned long disp_timer : 4; + unsigned long arb_option : 1; + unsigned long : 23; + } mem_arb_t; + +typedef union { + unsigned long val : 32; + mem_arb_t f; +} mem_arb_u; + +typedef struct _mc_fb_location_t { + unsigned long mc_fb_start : 16; + unsigned long mc_fb_top : 16; + } mc_fb_location_t; + +typedef union { + unsigned long val : 32; + mc_fb_location_t f; +} mc_fb_location_u; + +typedef struct _mem_ext_cntl_t { + unsigned long mem_ext_enable : 1; + unsigned long mem_ap_enable : 1; + unsigned long mem_addr_mapping : 2; + unsigned long mem_wdoe_cntl : 2; + unsigned long mem_wdoe_extend : 1; + unsigned long : 1; + unsigned long mem_page_timer : 8; + unsigned long mem_dynamic_cke : 1; + unsigned long mem_sdram_tri_en : 1; + unsigned long mem_self_refresh_en : 1; + unsigned long mem_power_down : 1; + unsigned long mem_hw_power_down_en : 1; + unsigned long mem_power_down_stat : 1; + unsigned long : 3; + unsigned long mem_pd_mck : 1; + unsigned long mem_pd_ma : 1; + unsigned long mem_pd_mdq : 1; + unsigned long mem_tristate_mck : 1; + unsigned long mem_tristate_ma : 1; + unsigned long mem_tristate_mcke : 1; + unsigned long mem_invert_mck : 1; + } mem_ext_cntl_t; + +typedef union { + unsigned long val : 32; + mem_ext_cntl_t f; +} mem_ext_cntl_u; + +typedef struct _mc_ext_mem_location_t { + unsigned long mc_ext_mem_start : 16; + unsigned long mc_ext_mem_top : 16; + } mc_ext_mem_location_t; + +typedef union { + unsigned long val : 32; + mc_ext_mem_location_t f; +} mc_ext_mem_location_u; + +typedef struct _mem_ext_timing_cntl_t { + unsigned long mem_trp : 2; + unsigned long mem_trcd : 2; + unsigned long mem_tras : 3; + unsigned long : 1; + unsigned long mem_trrd : 2; + unsigned long mem_tr2w : 2; + unsigned long mem_twr : 2; + unsigned long : 4; + unsigned long mem_twr_mode : 1; + unsigned long : 1; + unsigned long mem_refresh_dis : 1; + unsigned long : 3; + unsigned long mem_refresh_rate : 8; + } mem_ext_timing_cntl_t; + +typedef union { + unsigned long val : 32; + mem_ext_timing_cntl_t f; +} mem_ext_timing_cntl_u; + +typedef struct _mem_sdram_mode_reg_t { + unsigned long mem_mode_reg : 14; + unsigned long : 2; + unsigned long mem_read_latency : 2; + unsigned long mem_schmen_latency : 2; + unsigned long mem_cas_latency : 2; + unsigned long mem_schmen_extend : 1; + unsigned long : 8; + unsigned long mem_sdram_reset : 1; + } mem_sdram_mode_reg_t; + +typedef union { + unsigned long val : 32; + mem_sdram_mode_reg_t f; +} mem_sdram_mode_reg_u; + +typedef struct _mem_io_cntl_t { + unsigned long mem_sn_mck : 4; + unsigned long mem_sn_ma : 4; + unsigned long mem_sn_mdq : 4; + unsigned long mem_srn_mck : 1; + unsigned long mem_srn_ma : 1; + unsigned long mem_srn_mdq : 1; + unsigned long : 1; + unsigned long mem_sp_mck : 4; + unsigned long mem_sp_ma : 4; + unsigned long mem_sp_mdq : 4; + unsigned long mem_srp_mck : 1; + unsigned long mem_srp_ma : 1; + unsigned long mem_srp_mdq : 1; + unsigned long : 1; + } mem_io_cntl_t; + +typedef union { + unsigned long val : 32; + mem_io_cntl_t f; +} mem_io_cntl_u; + +typedef struct _mc_debug_t { + unsigned long mc_debug : 32; + } mc_debug_t; + +typedef union { + unsigned long val : 32; + mc_debug_t f; +} mc_debug_u; + +typedef struct _mc_bist_ctrl_t { + unsigned long mc_bist_ctrl : 32; + } mc_bist_ctrl_t; + +typedef union { + unsigned long val : 32; + mc_bist_ctrl_t f; +} mc_bist_ctrl_u; + +typedef struct _mc_bist_collar_read_t { + unsigned long mc_bist_collar_read : 32; + } mc_bist_collar_read_t; + +typedef union { + unsigned long val : 32; + mc_bist_collar_read_t f; +} mc_bist_collar_read_u; + +typedef struct _tc_mismatch_t { + unsigned long tc_mismatch : 24; + unsigned long : 8; + } tc_mismatch_t; + +typedef union { + unsigned long val : 32; + tc_mismatch_t f; +} tc_mismatch_u; + +typedef struct _mc_perf_mon_cntl_t { + unsigned long clr_perf : 1; + unsigned long en_perf : 1; + unsigned long : 2; + unsigned long perf_op_a : 2; + unsigned long perf_op_b : 2; + unsigned long : 8; + unsigned long monitor_period : 8; + unsigned long perf_count_a_overflow : 1; + unsigned long perf_count_b_overflow : 1; + unsigned long : 6; + } mc_perf_mon_cntl_t; + +typedef union { + unsigned long val : 32; + mc_perf_mon_cntl_t f; +} mc_perf_mon_cntl_u; + +typedef struct _mc_perf_counters_t { + unsigned long mc_perf_counter_a : 16; + unsigned long mc_perf_counter_b : 16; + } mc_perf_counters_t; + +typedef union { + unsigned long val : 32; + mc_perf_counters_t f; +} mc_perf_counters_u; + +typedef struct _wait_until_t { + unsigned long wait_crtc_pflip : 1; + unsigned long wait_re_crtc_vline : 1; + unsigned long wait_fe_crtc_vline : 1; + unsigned long wait_crtc_vline : 1; + unsigned long wait_dma_viph0_idle : 1; + unsigned long wait_dma_viph1_idle : 1; + unsigned long wait_dma_viph2_idle : 1; + unsigned long wait_dma_viph3_idle : 1; + unsigned long wait_dma_vid_idle : 1; + unsigned long wait_dma_gui_idle : 1; + unsigned long wait_cmdfifo : 1; + unsigned long wait_ov0_flip : 1; + unsigned long wait_ov0_slicedone : 1; + unsigned long : 1; + unsigned long wait_2d_idle : 1; + unsigned long wait_3d_idle : 1; + unsigned long wait_2d_idleclean : 1; + unsigned long wait_3d_idleclean : 1; + unsigned long wait_host_idleclean : 1; + unsigned long wait_extern_sig : 1; + unsigned long cmdfifo_entries : 7; + unsigned long : 3; + unsigned long wait_both_crtc_pflip : 1; + unsigned long eng_display_select : 1; + } wait_until_t; + +typedef union { + unsigned long val : 32; + wait_until_t f; +} wait_until_u; + +typedef struct _isync_cntl_t { + unsigned long isync_any2d_idle3d : 1; + unsigned long isync_any3d_idle2d : 1; + unsigned long isync_trig2d_idle3d : 1; + unsigned long isync_trig3d_idle2d : 1; + unsigned long isync_wait_idlegui : 1; + unsigned long isync_cpscratch_idlegui : 1; + unsigned long : 26; + } isync_cntl_t; + +typedef union { + unsigned long val : 32; + isync_cntl_t f; +} isync_cntl_u; + +typedef struct _rbbm_guicntl_t { + unsigned long host_data_swap : 2; + unsigned long : 30; + } rbbm_guicntl_t; + +typedef union { + unsigned long val : 32; + rbbm_guicntl_t f; +} rbbm_guicntl_u; + +typedef struct _rbbm_status_t { + unsigned long cmdfifo_avail : 7; + unsigned long : 1; + unsigned long hirq_on_rbb : 1; + unsigned long cprq_on_rbb : 1; + unsigned long cfrq_on_rbb : 1; + unsigned long hirq_in_rtbuf : 1; + unsigned long cprq_in_rtbuf : 1; + unsigned long cfrq_in_rtbuf : 1; + unsigned long cf_pipe_busy : 1; + unsigned long eng_ev_busy : 1; + unsigned long cp_cmdstrm_busy : 1; + unsigned long e2_busy : 1; + unsigned long rb2d_busy : 1; + unsigned long rb3d_busy : 1; + unsigned long se_busy : 1; + unsigned long re_busy : 1; + unsigned long tam_busy : 1; + unsigned long tdm_busy : 1; + unsigned long pb_busy : 1; + unsigned long : 6; + unsigned long gui_active : 1; + } rbbm_status_t; + +typedef union { + unsigned long val : 32; + rbbm_status_t f; +} rbbm_status_u; + +typedef struct _rbbm_cntl_t { + unsigned long rb_settle : 4; + unsigned long abortclks_hi : 3; + unsigned long : 1; + unsigned long abortclks_cp : 3; + unsigned long : 1; + unsigned long abortclks_cfifo : 3; + unsigned long : 2; + unsigned long cpq_data_swap : 1; + unsigned long : 3; + unsigned long no_abort_idct : 1; + unsigned long no_abort_bios : 1; + unsigned long no_abort_fb : 1; + unsigned long no_abort_cp : 1; + unsigned long no_abort_hi : 1; + unsigned long no_abort_hdp : 1; + unsigned long no_abort_mc : 1; + unsigned long no_abort_aic : 1; + unsigned long no_abort_vip : 1; + unsigned long no_abort_disp : 1; + unsigned long no_abort_cg : 1; + } rbbm_cntl_t; + +typedef union { + unsigned long val : 32; + rbbm_cntl_t f; +} rbbm_cntl_u; + +typedef struct _rbbm_soft_reset_t { + unsigned long soft_reset_cp : 1; + unsigned long soft_reset_hi : 1; + unsigned long reserved3 : 3; + unsigned long soft_reset_e2 : 1; + unsigned long reserved2 : 2; + unsigned long soft_reset_mc : 1; + unsigned long reserved1 : 2; + unsigned long soft_reset_disp : 1; + unsigned long soft_reset_cg : 1; + unsigned long : 19; + } rbbm_soft_reset_t; + +typedef union { + unsigned long val : 32; + rbbm_soft_reset_t f; +} rbbm_soft_reset_u; + +typedef struct _nqwait_until_t { + unsigned long wait_gui_idle : 1; + unsigned long : 31; + } nqwait_until_t; + +typedef union { + unsigned long val : 32; + nqwait_until_t f; +} nqwait_until_u; + +typedef struct _rbbm_debug_t { + unsigned long rbbm_debug : 32; + } rbbm_debug_t; + +typedef union { + unsigned long val : 32; + rbbm_debug_t f; +} rbbm_debug_u; + +typedef struct _rbbm_cmdfifo_addr_t { + unsigned long cmdfifo_addr : 6; + unsigned long : 26; + } rbbm_cmdfifo_addr_t; + +typedef union { + unsigned long val : 32; + rbbm_cmdfifo_addr_t f; +} rbbm_cmdfifo_addr_u; + +typedef struct _rbbm_cmdfifo_datal_t { + unsigned long cmdfifo_datal : 32; + } rbbm_cmdfifo_datal_t; + +typedef union { + unsigned long val : 32; + rbbm_cmdfifo_datal_t f; +} rbbm_cmdfifo_datal_u; + +typedef struct _rbbm_cmdfifo_datah_t { + unsigned long cmdfifo_datah : 12; + unsigned long : 20; + } rbbm_cmdfifo_datah_t; + +typedef union { + unsigned long val : 32; + rbbm_cmdfifo_datah_t f; +} rbbm_cmdfifo_datah_u; + +typedef struct _rbbm_cmdfifo_stat_t { + unsigned long cmdfifo_rptr : 6; + unsigned long : 2; + unsigned long cmdfifo_wptr : 6; + unsigned long : 18; + } rbbm_cmdfifo_stat_t; + +typedef union { + unsigned long val : 32; + rbbm_cmdfifo_stat_t f; +} rbbm_cmdfifo_stat_u; + +typedef struct _clk_pin_cntl_t { + unsigned long osc_en : 1; + unsigned long osc_gain : 5; + unsigned long dont_use_xtalin : 1; + unsigned long xtalin_pm_en : 1; + unsigned long xtalin_dbl_en : 1; + unsigned long : 7; + unsigned long cg_debug : 16; + } clk_pin_cntl_t; + +typedef union { + unsigned long val : 32; + clk_pin_cntl_t f; +} clk_pin_cntl_u; + +typedef struct _pll_ref_fb_div_t { + unsigned long pll_ref_div : 4; + unsigned long : 4; + unsigned long pll_fb_div_int : 6; + unsigned long : 2; + unsigned long pll_fb_div_frac : 3; + unsigned long : 1; + unsigned long pll_reset_time : 4; + unsigned long pll_lock_time : 8; + } pll_ref_fb_div_t; + +typedef union { + unsigned long val : 32; + pll_ref_fb_div_t f; +} pll_ref_fb_div_u; + +typedef struct _pll_cntl_t { + unsigned long pll_pwdn : 1; + unsigned long pll_reset : 1; + unsigned long pll_pm_en : 1; + unsigned long pll_mode : 1; + unsigned long pll_refclk_sel : 1; + unsigned long pll_fbclk_sel : 1; + unsigned long pll_tcpoff : 1; + unsigned long pll_pcp : 3; + unsigned long pll_pvg : 3; + unsigned long pll_vcofr : 1; + unsigned long pll_ioffset : 2; + unsigned long pll_pecc_mode : 2; + unsigned long pll_pecc_scon : 2; + unsigned long pll_dactal : 4; + unsigned long pll_cp_clip : 2; + unsigned long pll_conf : 3; + unsigned long pll_mbctrl : 2; + unsigned long pll_ring_off : 1; + } pll_cntl_t; + +typedef union { + unsigned long val : 32; + pll_cntl_t f; +} pll_cntl_u; + +typedef struct _sclk_cntl_t { + unsigned long sclk_src_sel : 2; + unsigned long : 2; + unsigned long sclk_post_div_fast : 4; + unsigned long sclk_clkon_hys : 3; + unsigned long sclk_post_div_slow : 4; + unsigned long disp_cg_ok2switch_en : 1; + unsigned long sclk_force_reg : 1; + unsigned long sclk_force_disp : 1; + unsigned long sclk_force_mc : 1; + unsigned long sclk_force_extmc : 1; + unsigned long sclk_force_cp : 1; + unsigned long sclk_force_e2 : 1; + unsigned long sclk_force_e3 : 1; + unsigned long sclk_force_idct : 1; + unsigned long sclk_force_bist : 1; + unsigned long busy_extend_cp : 1; + unsigned long busy_extend_e2 : 1; + unsigned long busy_extend_e3 : 1; + unsigned long busy_extend_idct : 1; + unsigned long : 3; + } sclk_cntl_t; + +typedef union { + unsigned long val : 32; + sclk_cntl_t f; +} sclk_cntl_u; + +typedef struct _pclk_cntl_t { + unsigned long pclk_src_sel : 2; + unsigned long : 2; + unsigned long pclk_post_div : 4; + unsigned long : 8; + unsigned long pclk_force_disp : 1; + unsigned long : 15; + } pclk_cntl_t; + +typedef union { + unsigned long val : 32; + pclk_cntl_t f; +} pclk_cntl_u; + +typedef struct _clk_test_cntl_t { + unsigned long testclk_sel : 4; + unsigned long : 3; + unsigned long start_check_freq : 1; + unsigned long tstcount_rst : 1; + unsigned long : 15; + unsigned long test_count : 8; + } clk_test_cntl_t; + +typedef union { + unsigned long val : 32; + clk_test_cntl_t f; +} clk_test_cntl_u; + +typedef struct _pwrmgt_cntl_t { + unsigned long pwm_enable : 1; + unsigned long : 1; + unsigned long pwm_mode_req : 2; + unsigned long pwm_wakeup_cond : 2; + unsigned long pwm_fast_noml_hw_en : 1; + unsigned long pwm_noml_fast_hw_en : 1; + unsigned long pwm_fast_noml_cond : 4; + unsigned long pwm_noml_fast_cond : 4; + unsigned long pwm_idle_timer : 8; + unsigned long pwm_busy_timer : 8; + } pwrmgt_cntl_t; + +typedef union { + unsigned long val : 32; + pwrmgt_cntl_t f; +} pwrmgt_cntl_u; + +typedef struct _pwrmgt_status_t { + unsigned long pwm_mode : 2; + unsigned long : 30; + } pwrmgt_status_t; + +typedef union { + unsigned long val : 32; + pwrmgt_status_t f; +} pwrmgt_status_u; + +typedef struct tagDISPLAYSTATE { +lcd_format_u LcdFormat; +crtc_total_u CrtcTotal; +active_h_disp_u ActiveHDisp; +active_v_disp_u ActiveVDisp; +crtc_ss_u CrtcSS; +crtc_ls_u CrtcLS; +crtc_gs_u CrtcGS; +crtc_vpos_gs_u CrtcVPosGS; +crtc_gclk_u CrtcGClk; +crtc_goe_u CrtcGOE; +crtc_rev_u CrtcRev; +crtc_dclk_u CrtcDClk; +crtc_default_count_u CrtcDefaultCount; +crtc_frame_u CrtcFrame; +crtc_frame_vpos_u CrtcFrameVPos; +lcdd_cntl1_u LcddCntl1; +lcdd_cntl2_u LcddCntl2; +genlcd_cntl1_u GenlcdCntl1; +genlcd_cntl2_u GenlcdCntl2; +lcd_background_color_u LcdBackgroundColor; +brightness_cntl_u Brightness_Cntl; +} DISPLAYSTATE; + +typedef struct { +s16 X_Top_Left; // x coordinate of top left corner +s16 Y_Top_Left; // y coordinate of top left corner +s16 X_Bottom_Right; // x coordinate of bottom right corner +s16 Y_Bottom_Right; // y coordinate of bottom right corner +} ATI_CLIPRECT; + +typedef struct tagGUISTATE { +dp_cntl_u DpCntl; +dp_gui_master_cntl_u GMC; +e2_arithmetic_cntl_u E2AC; +global_alpha_u GlobalAlpha; +dst_pitch_u dstPitch; +dst_offset_u dstOffset; +src_pitch_u srcPitch; +src_offset_u srcOffset; +u32 FrgrdColour; +u32 BkgrdColour; +ATI_CLIPRECT SrcClipRect; +ATI_CLIPRECT DstClipRect; +u32 BrushOffset; +u16 BrushHandle; +// for 16bpp, SRC must be the same type as DST, can't go from 1555->565 +s8 TurnOnDst565ForNon2D; +} GUISTATE; + +typedef struct tagGFXWINSTATE { +graphic_ctrl_u GraphicCtrl; +graphic_offset_u GraphicOffset; +graphic_pitch_u GraphicPitch; // byte-based +graphic_h_disp_u GraphicHDisp; +graphic_v_disp_u GraphicVDisp; +s8 TurnOnDisp565; +// These memory offsets need to be translated before writing to registers +u32 Grp_Offset; +u32 Grp_W; +u32 Grp_H; +u32 Grp_Src_X; +u32 Grp_Src_Y; +u32 Grp_Src_W; // pixel-based +} GFXWINSTATE; + +typedef struct tagPREVSTATE { +u16 PrevOverlayX; +u16 PrevOverlayY; +u8 bOverlayWasOn; +u16 PrevGfxWinX; +u16 PrevGfxWinY; +u8 bGfxWinWasOn; +} PREVSTATE; + +typedef struct tagPOWERSTATE { +clk_pin_cntl_u ClkPinCntl; +pll_ref_fb_div_u PllRefFbDiv; +pll_cntl_u PllCntl; +sclk_cntl_u SclkCntl; +pclk_cntl_u PclkCntl; +clk_test_cntl_u ClkTestCntl; +pwrmgt_cntl_u PwrmgtCntl; +u32 Freq; +u8 tf100; +u8 tf80; +u8 tf20; +u8 M; +u8 N_int; +u8 N_fac; +u8 lock_time; +u8 tfgoal; +u8 AutoMode; +u8 PWMMode; +u16 FastSclk; +u16 NormSclk; +PREVSTATE PrevState; +} POWERSTATE; + +typedef struct tagAPERTURE { +u32 MMRegBase; +u32 CfgRegBase; +u32 McFbStart; +u32 McFbTop; +u32 McExtMemStart; +u32 McExtMemTop; +u32 WrapStart; +u32 WrapTop; +} APERTURE; + +#endif + diff --git a/recipes/mplayer/files/w100-Makefile.patch b/recipes/mplayer/files/w100-Makefile.patch new file mode 100644 index 0000000000..01899556d3 --- /dev/null +++ b/recipes/mplayer/files/w100-Makefile.patch @@ -0,0 +1,10 @@ +--- mplayer_20060519/Makefile.orig 2006-05-30 10:29:18.000000000 +0100 ++++ mplayer_20060519/Makefile 2006-05-30 10:29:53.000000000 +0100 +@@ -74,6 +74,7 @@ + $(DIRECTFB_LIB) \ + $(CACA_LIB) \ + $(VESA_LIB) \ ++ $(W100_LIB) + + ifeq ($(EXTERNAL_VIDIX),yes) + VO_LIBS += $(EXTERNAL_VIDIX_LIB) diff --git a/recipes/mplayer/files/w100-configure-svn.patch b/recipes/mplayer/files/w100-configure-svn.patch new file mode 100644 index 0000000000..e3067a4724 --- /dev/null +++ b/recipes/mplayer/files/w100-configure-svn.patch @@ -0,0 +1,48 @@ +Index: trunk/configure +=================================================================== +--- trunk.orig/configure ++++ trunk/configure +@@ -1623,6 +1623,7 @@ _caca=auto + _svga=auto + _vesa=auto + _fbdev=auto ++_w100=no + _dvb=auto + _dvbhead=auto + _dxr2=auto +@@ -1822,6 +1823,8 @@ for ac_option do + --disable-vesa) _vesa=no ;; + --enable-fbdev) _fbdev=yes ;; + --disable-fbdev) _fbdev=no ;; ++ --enable-w100) _w100=yes ;; ++ --disable-w100) _w100=no ;; + --enable-dvb) _dvb=yes ;; + --disable-dvb) _dvb=no ;; + --enable-dvbhead) _dvbhead=yes ;; +@@ -4280,6 +4283,18 @@ else + fi + echores "$_fbdev" + ++echocheck "ATI Imageon 100 (w100)" ++if test "$_w100" = yes ; then ++ _def_w100='#define HAVE_W100 1' ++ _ld_w100='-laticore' ++ _libs_mplayer="$_libs_mplayer $_ld_w100" ++ _vosrc="$_vosrc vo_w100.c" ++ _vomodules="w100 $_vomodules" ++else ++ _def_w100='#undef HAVE_W100' ++ _novomodules="w100 $_novomodules" ++fi ++echores "$_w100" + + + echocheck "DVB" +@@ -8227,6 +8242,7 @@ $_def_mga + $_def_xmga + $_def_syncfb + $_def_fbdev ++$_def_w100 + $_def_dxr2 + $_def_dxr3 + $_def_ivtv diff --git a/recipes/mplayer/files/w100-configure.patch b/recipes/mplayer/files/w100-configure.patch new file mode 100644 index 0000000000..03610610e4 --- /dev/null +++ b/recipes/mplayer/files/w100-configure.patch @@ -0,0 +1,53 @@ +--- mplayer_20060519/configure.orig 2006-05-30 10:23:24.000000000 +0100 ++++ mplayer_20060519/configure 2006-05-30 10:27:24.000000000 +0100 +@@ -1585,6 +1585,7 @@ + _svga=auto + _vesa=auto + _fbdev=auto ++_w100=no + _dvb=auto + _dvbhead=auto + _dxr2=auto +@@ -1767,6 +1768,8 @@ + --disable-vesa) _vesa=no ;; + --enable-fbdev) _fbdev=yes ;; + --disable-fbdev) _fbdev=no ;; ++ --enable-w100) _w100=yes ;; ++ --disable-w100) _w100=no ;; + --enable-dvb) _dvb=yes ;; + --disable-dvb) _dvb=no ;; + --enable-dvbhead) _dvbhead=yes ;; +@@ -4200,6 +4203,17 @@ + fi + echores "$_fbdev" + ++echocheck "ATI Imageon 100 (w100)" ++if test "$_w100" = yes ; then ++ _def_w100='#define HAVE_W100 1' ++ _ld_w100='-laticore' ++ _vosrc="$_vosrc vo_w100.c" ++ _vomodules="w100 $_vomodules" ++else ++ _def_w100='#undef HAVE_W100' ++ _novomodules="w100 $_novomodules" ++fi ++echores "$_w100" + + + echocheck "DVB" +@@ -7441,6 +7455,7 @@ + AA_LIB = $_ld_aa + CACA_INC = $_inc_caca + CACA_LIB = $_ld_caca ++W100_LIB = $_ld_w100 + + # audio output + ALSA_LIB = $_ld_alsa +@@ -8238,6 +8253,7 @@ + $_def_xmga + $_def_syncfb + $_def_fbdev ++$_def_w100 + $_def_dxr2 + $_def_dxr3 + $_def_dvb diff --git a/recipes/mplayer/files/w100-mplayer.patch b/recipes/mplayer/files/w100-mplayer.patch new file mode 100644 index 0000000000..8ce37d014c --- /dev/null +++ b/recipes/mplayer/files/w100-mplayer.patch @@ -0,0 +1,32 @@ +Index: MPlayer-1.0rc1/mplayer.c +=================================================================== +--- MPlayer-1.0rc1.orig/mplayer.c ++++ MPlayer-1.0rc1/mplayer.c +@@ -807,6 +807,17 @@ static void exit_sighandler(int x){ + exit_player(NULL); + } + ++//w100 driver additions ++int g_sigcont = 0; ++ ++static void misc_sighandler(int x){ ++ switch(x){ ++ case SIGCONT: ++ ++ g_sigcont; ++ break; ++ } ++} ++ + extern void mp_input_register_options(m_config_t* cfg); + + #include "mixer.h" +@@ -3216,6 +3227,9 @@ current_module = NULL; + #endif + #endif + ++// w100 driver additions ++ signal(SIGCONT,misc_sighandler); ++ + #ifdef HAVE_NEW_GUI + if(use_gui){ + guiInit(); diff --git a/recipes/mplayer/files/w100-video_out.patch b/recipes/mplayer/files/w100-video_out.patch new file mode 100644 index 0000000000..9855853fe6 --- /dev/null +++ b/recipes/mplayer/files/w100-video_out.patch @@ -0,0 +1,20 @@ +--- mplayer_20060519/libvo/video_out.c.orig 2006-05-30 11:25:57.000000000 +0100 ++++ mplayer_20060519/libvo/video_out.c 2006-05-30 11:26:49.000000000 +0100 +@@ -86,6 +86,7 @@ + extern vo_functions_t video_out_syncfb; + extern vo_functions_t video_out_fbdev; + extern vo_functions_t video_out_fbdev2; ++extern vo_functions_t video_out_w100; + extern vo_functions_t video_out_svga; + extern vo_functions_t video_out_png; + extern vo_functions_t video_out_ggi; +@@ -196,6 +197,9 @@ + &video_out_fbdev, + &video_out_fbdev2, + #endif ++#ifdef HAVE_W100 ++ &video_out_w100, ++#endif + #ifdef HAVE_SVGALIB + &video_out_svga, + #endif diff --git a/recipes/mplayer/files/yuv.S b/recipes/mplayer/files/yuv.S new file mode 100644 index 0000000000..3eaf284a61 --- /dev/null +++ b/recipes/mplayer/files/yuv.S @@ -0,0 +1,119 @@ +/* + Copyright (C) 2008 Mans Rullgard + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without + restriction, including without limitation the rights to use, copy, + modify, merge, publish, distribute, sublicense, and/or sell copies + of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + */ + + .fpu neon + .text + +@ yuv420_to_yuv422(uint8_t *yuv, uint8_t *y, uint8_t *u, uint8_t *v, +@ int w, int h, int yw, int cw, int dw) + +#define yuv r0 +#define y r1 +#define u r2 +#define v r3 +#define w r4 +#define h r5 +#define yw r6 +#define cw r7 +#define dw r8 + +#define tyuv r9 +#define ty r10 +#define tu r11 +#define tv r12 +#define i lr + + .global yuv420_to_yuv422 + .func yuv420_to_yuv422 +yuv420_to_yuv422: + push {r4-r11,lr} + add r4, sp, #36 + ldm r4, {r4-r8} + dmb +1: + mov tu, u + mov tv, v + vld1.64 {d2}, [u,:64], cw @ u0 + vld1.64 {d3}, [v,:64], cw @ v0 + mov tyuv, yuv + mov ty, y + vzip.8 d2, d3 @ u0v0 + mov i, #16 +2: + pld [y, #64] + vld1.64 {d0, d1}, [y,:128], yw @ y0 + pld [u, #64] + subs i, i, #4 + vld1.64 {d6}, [u,:64], cw @ u2 + pld [y, #64] + vld1.64 {d4, d5}, [y,:128], yw @ y1 + pld [v, #64] + vld1.64 {d7}, [v,:64], cw @ v2 + pld [y, #64] + vld1.64 {d16,d17}, [y,:128], yw @ y2 + vzip.8 d6, d7 @ u2v2 + pld [u, #64] + vld1.64 {d22}, [u,:64], cw @ u4 + pld [v, #64] + vld1.64 {d23}, [v,:64], cw @ v4 + pld [y, #64] + vld1.64 {d20,d21}, [y,:128], yw @ y3 + vmov q9, q3 @ u2v2 + vzip.8 d22, d23 @ u4v4 + vrhadd.u8 q3, q1, q3 @ u1v1 + vzip.8 q0, q1 @ y0u0y0v0 + vmov q12, q11 @ u4v4 + vzip.8 q2, q3 @ y1u1y1v1 + vrhadd.u8 q11, q9, q11 @ u3v3 + vst1.64 {d0-d3}, [yuv,:128], dw @ y0u0y0v0 + vzip.8 q8, q9 @ y2u2y2v2 + vst1.64 {d4-d7}, [yuv,:128], dw @ y1u1y1v1 + vzip.8 q10, q11 @ y3u3y3v3 + vst1.64 {d16-d19}, [yuv,:128], dw @ y2u2y2v2 + vmov q1, q12 + vst1.64 {d20-d23}, [yuv,:128], dw @ y3u3y3v3 + bgt 2b + + subs w, w, #16 + add yuv, tyuv, #32 + add y, ty, #16 + add u, tu, #8 + add v, tv, #8 + bgt 1b + + ldr w, [sp, #36] + subs h, h, #16 + add yuv, yuv, dw, lsl #4 + sub yuv, yuv, w, lsl #1 + add y, y, yw, lsl #4 + sub y, y, w + add u, u, cw, lsl #3 + sub u, u, w, asr #1 + add v, v, cw, lsl #3 + sub v, v, w, asr #1 + bgt 1b + + pop {r4-r11,pc} + .endfunc + |