diff options
Diffstat (limited to 'recipes/xorg-lib/pixman-0.17.8')
7 files changed, 756 insertions, 0 deletions
diff --git a/recipes/xorg-lib/pixman-0.17.8/0003-Generic-C-implementation-of-pixman_blt-with-overlapp.patch b/recipes/xorg-lib/pixman-0.17.8/0003-Generic-C-implementation-of-pixman_blt-with-overlapp.patch new file mode 100644 index 0000000000..25ce7ee3b8 --- /dev/null +++ b/recipes/xorg-lib/pixman-0.17.8/0003-Generic-C-implementation-of-pixman_blt-with-overlapp.patch @@ -0,0 +1,114 @@ +From c29c9fa826b7112156fd6150b5f1564227935c05 Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Thu, 22 Oct 2009 05:27:33 +0300 +Subject: [PATCH 3/6] Generic C implementation of pixman_blt with overlapping support + +Uses memcpy/memmove functions to copy pixels, can handle the +case when both source and destination areas are in the same +image (this is useful for scrolling). + +It is assumed that copying direction is only important when +using the same image for both source and destination (and +src_stride == dst_stride). Copying direction is undefined +for the images with different source and destination stride +which happen to be in the overlapped areas (but this is an +unrealistic case anyway). +--- + pixman/pixman-general.c | 21 ++++++++++++++++++--- + pixman/pixman-private.h | 43 +++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 61 insertions(+), 3 deletions(-) + +diff --git a/pixman/pixman-general.c b/pixman/pixman-general.c +index c96a3f9..d71a299 100644 +--- a/pixman/pixman-general.c ++++ b/pixman/pixman-general.c +@@ -300,9 +300,24 @@ general_blt (pixman_implementation_t *imp, + int width, + int height) + { +- /* We can't blit unless we have sse2 or mmx */ +- +- return FALSE; ++ uint8_t *dst_bytes = (uint8_t *)dst_bits; ++ uint8_t *src_bytes = (uint8_t *)src_bits; ++ int bpp; ++ ++ if (src_bpp != dst_bpp || src_bpp & 7) ++ return FALSE; ++ ++ bpp = src_bpp >> 3; ++ width *= bpp; ++ src_stride *= 4; ++ dst_stride *= 4; ++ pixman_blt_helper (src_bytes + src_y * src_stride + src_x * bpp, ++ dst_bytes + dst_y * dst_stride + dst_x * bpp, ++ src_stride, ++ dst_stride, ++ width, ++ height); ++ return TRUE; + } + + static pixman_bool_t +diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h +index 5000f91..8c5d4fd 100644 +--- a/pixman/pixman-private.h ++++ b/pixman/pixman-private.h +@@ -10,6 +10,7 @@ + + #include "pixman.h" + #include <time.h> ++#include <string.h> + #include <assert.h> + + #include "pixman-compiler.h" +@@ -794,4 +795,46 @@ void pixman_timer_register (pixman_timer_t *timer); + + #endif /* PIXMAN_TIMERS */ + ++/* a helper function, can blit 8-bit images with src/dst overlapping support */ ++static inline void ++pixman_blt_helper (uint8_t *src_bytes, ++ uint8_t *dst_bytes, ++ int src_stride, ++ int dst_stride, ++ int width, ++ int height) ++{ ++ /* ++ * The second part of this check is not strictly needed, but it prevents ++ * unnecessary upside-down processing of areas which belong to different ++ * images. Upside-down processing can be slower with fixed-distance-ahead ++ * prefetch and perceived as having more tearing. ++ */ ++ if (src_bytes < dst_bytes + width && ++ src_bytes + src_stride * height > dst_bytes) ++ { ++ src_bytes += src_stride * height - src_stride; ++ dst_bytes += dst_stride * height - dst_stride; ++ dst_stride = -dst_stride; ++ src_stride = -src_stride; ++ /* Horizontal scrolling to the left needs memmove */ ++ if (src_bytes + width > dst_bytes) ++ { ++ while (--height >= 0) ++ { ++ memmove (dst_bytes, src_bytes, width); ++ dst_bytes += dst_stride; ++ src_bytes += src_stride; ++ } ++ return; ++ } ++ } ++ while (--height >= 0) ++ { ++ memcpy (dst_bytes, src_bytes, width); ++ dst_bytes += dst_stride; ++ src_bytes += src_stride; ++ } ++} ++ + #endif /* PIXMAN_PRIVATE_H */ +-- +1.6.2.4 + diff --git a/recipes/xorg-lib/pixman-0.17.8/0004-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch b/recipes/xorg-lib/pixman-0.17.8/0004-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch new file mode 100644 index 0000000000..74c7b45bc4 --- /dev/null +++ b/recipes/xorg-lib/pixman-0.17.8/0004-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch @@ -0,0 +1,91 @@ +From 7ca32542c957ff308a6ca7e3715e6552a65ae395 Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Thu, 22 Oct 2009 05:45:47 +0300 +Subject: [PATCH 4/6] Support of overlapping src/dst for pixman_blt_mmx + +--- + pixman/pixman-mmx.c | 55 +++++++++++++++++++++++++++++--------------------- + 1 files changed, 32 insertions(+), 23 deletions(-) + +diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c +index 819e3a0..dcccadb 100644 +--- a/pixman/pixman-mmx.c ++++ b/pixman/pixman-mmx.c +@@ -3002,34 +3002,43 @@ pixman_blt_mmx (uint32_t *src_bits, + { + uint8_t * src_bytes; + uint8_t * dst_bytes; +- int byte_width; ++ int bpp; + +- if (src_bpp != dst_bpp) ++ if (src_bpp != dst_bpp || src_bpp & 7) + return FALSE; + +- if (src_bpp == 16) +- { +- src_stride = src_stride * (int) sizeof (uint32_t) / 2; +- dst_stride = dst_stride * (int) sizeof (uint32_t) / 2; +- src_bytes = (uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x)); +- dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x)); +- byte_width = 2 * width; +- src_stride *= 2; +- dst_stride *= 2; +- } +- else if (src_bpp == 32) ++ bpp = src_bpp >> 3; ++ width *= bpp; ++ src_stride *= 4; ++ dst_stride *= 4; ++ src_bytes = (uint8_t *)src_bits + src_y * src_stride + src_x * bpp; ++ dst_bytes = (uint8_t *)dst_bits + dst_y * dst_stride + dst_x * bpp; ++ ++ if (src_bpp != 16 && src_bpp != 32) + { +- src_stride = src_stride * (int) sizeof (uint32_t) / 4; +- dst_stride = dst_stride * (int) sizeof (uint32_t) / 4; +- src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x)); +- dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x)); +- byte_width = 4 * width; +- src_stride *= 4; +- dst_stride *= 4; ++ pixman_blt_helper (src_bytes, dst_bytes, src_stride, dst_stride, ++ width, height); ++ return TRUE; + } +- else ++ ++ if (src_bytes < dst_bytes && src_bytes + src_stride * height > dst_bytes) + { +- return FALSE; ++ src_bytes += src_stride * height - src_stride; ++ dst_bytes += dst_stride * height - dst_stride; ++ dst_stride = -dst_stride; ++ src_stride = -src_stride; ++ ++ if (src_bytes + width > dst_bytes) ++ { ++ /* TODO: reverse scanline copy using MMX */ ++ while (--height >= 0) ++ { ++ memmove (dst_bytes, src_bytes, width); ++ dst_bytes += dst_stride; ++ src_bytes += src_stride; ++ } ++ return TRUE; ++ } + } + + while (height--) +@@ -3039,7 +3048,7 @@ pixman_blt_mmx (uint32_t *src_bits, + uint8_t *d = dst_bytes; + src_bytes += src_stride; + dst_bytes += dst_stride; +- w = byte_width; ++ w = width; + + while (w >= 2 && ((unsigned long)d & 3)) + { +-- +1.6.2.4 + diff --git a/recipes/xorg-lib/pixman-0.17.8/0005-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch b/recipes/xorg-lib/pixman-0.17.8/0005-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch new file mode 100644 index 0000000000..3704fbf1cf --- /dev/null +++ b/recipes/xorg-lib/pixman-0.17.8/0005-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch @@ -0,0 +1,91 @@ +From edc80b41c6480b7c80ec5f7c835c92b2debb3774 Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Thu, 22 Oct 2009 05:45:54 +0300 +Subject: [PATCH 5/6] Support of overlapping src/dst for pixman_blt_sse2 + +--- + pixman/pixman-sse2.c | 55 +++++++++++++++++++++++++++++-------------------- + 1 files changed, 32 insertions(+), 23 deletions(-) + +diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c +index 78b0ad1..b84636b 100644 +--- a/pixman/pixman-sse2.c ++++ b/pixman/pixman-sse2.c +@@ -5300,34 +5300,43 @@ pixman_blt_sse2 (uint32_t *src_bits, + { + uint8_t * src_bytes; + uint8_t * dst_bytes; +- int byte_width; ++ int bpp; + +- if (src_bpp != dst_bpp) ++ if (src_bpp != dst_bpp || src_bpp & 7) + return FALSE; + +- if (src_bpp == 16) +- { +- src_stride = src_stride * (int) sizeof (uint32_t) / 2; +- dst_stride = dst_stride * (int) sizeof (uint32_t) / 2; +- src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x)); +- dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x)); +- byte_width = 2 * width; +- src_stride *= 2; +- dst_stride *= 2; +- } +- else if (src_bpp == 32) ++ bpp = src_bpp >> 3; ++ width *= bpp; ++ src_stride *= 4; ++ dst_stride *= 4; ++ src_bytes = (uint8_t *)src_bits + src_y * src_stride + src_x * bpp; ++ dst_bytes = (uint8_t *)dst_bits + dst_y * dst_stride + dst_x * bpp; ++ ++ if (src_bpp != 16 && src_bpp != 32) + { +- src_stride = src_stride * (int) sizeof (uint32_t) / 4; +- dst_stride = dst_stride * (int) sizeof (uint32_t) / 4; +- src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x)); +- dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x)); +- byte_width = 4 * width; +- src_stride *= 4; +- dst_stride *= 4; ++ pixman_blt_helper (src_bytes, dst_bytes, src_stride, dst_stride, ++ width, height); ++ return TRUE; + } +- else ++ ++ if (src_bytes < dst_bytes && src_bytes + src_stride * height > dst_bytes) + { +- return FALSE; ++ src_bytes += src_stride * height - src_stride; ++ dst_bytes += dst_stride * height - dst_stride; ++ dst_stride = -dst_stride; ++ src_stride = -src_stride; ++ ++ if (src_bytes + width > dst_bytes) ++ { ++ /* TODO: reverse scanline copy using SSE2 */ ++ while (--height >= 0) ++ { ++ memmove (dst_bytes, src_bytes, width); ++ dst_bytes += dst_stride; ++ src_bytes += src_stride; ++ } ++ return TRUE; ++ } + } + + cache_prefetch ((__m128i*)src_bytes); +@@ -5340,7 +5349,7 @@ pixman_blt_sse2 (uint32_t *src_bits, + uint8_t *d = dst_bytes; + src_bytes += src_stride; + dst_bytes += dst_stride; +- w = byte_width; ++ w = width; + + cache_prefetch_next ((__m128i*)s); + cache_prefetch_next ((__m128i*)d); +-- +1.6.2.4 + diff --git a/recipes/xorg-lib/pixman-0.17.8/0006-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch b/recipes/xorg-lib/pixman-0.17.8/0006-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch new file mode 100644 index 0000000000..7c22483a2e --- /dev/null +++ b/recipes/xorg-lib/pixman-0.17.8/0006-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch @@ -0,0 +1,94 @@ +From 86870ff530b5e435034bd80207e5758466d96cff Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed, 18 Nov 2009 06:08:48 +0200 +Subject: [PATCH 6/6] Support of overlapping src/dst for pixman_blt_neon + +--- + pixman/pixman-arm-neon.c | 63 ++++++++++++++++++++++++++++++++++++++------- + 1 files changed, 53 insertions(+), 10 deletions(-) + +diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c +index 495fda4..c632ff5 100644 +--- a/pixman/pixman-arm-neon.c ++++ b/pixman/pixman-arm-neon.c +@@ -357,26 +357,66 @@ + int width, + int height) + { +- if (src_bpp != dst_bpp) ++ uint8_t * src_bytes; ++ uint8_t * dst_bytes; ++ int bpp; ++ ++ if (src_bpp != dst_bpp || src_bpp & 7) + return FALSE; + ++ bpp = src_bpp >> 3; ++ width *= bpp; ++ src_stride *= 4; ++ dst_stride *= 4; ++ src_bytes = (uint8_t *)src_bits + src_y * src_stride + src_x * bpp; ++ dst_bytes = (uint8_t *)dst_bits + dst_y * dst_stride + dst_x * bpp; ++ ++ if (src_bpp != 16 && src_bpp != 32) ++ { ++ pixman_blt_helper (src_bytes, dst_bytes, src_stride, dst_stride, ++ width, height); ++ return TRUE; ++ } ++ ++ if (src_bytes < dst_bytes && src_bytes + src_stride * height > dst_bytes) ++ { ++ src_bytes += src_stride * height - src_stride; ++ dst_bytes += dst_stride * height - dst_stride; ++ dst_stride = -dst_stride; ++ src_stride = -src_stride; ++ ++ if (src_bytes + width > dst_bytes) ++ { ++ /* TODO: reverse scanline copy using NEON */ ++ while (--height >= 0) ++ { ++ memmove (dst_bytes, src_bytes, width); ++ dst_bytes += dst_stride; ++ src_bytes += src_stride; ++ } ++ return TRUE; ++ } ++ } ++ + switch (src_bpp) + { + case 16: + pixman_composite_src_0565_0565_asm_neon ( +- width, height, +- (uint16_t *)(((char *) dst_bits) + +- dst_y * dst_stride * 4 + dst_x * 2), dst_stride * 2, +- (uint16_t *)(((char *) src_bits) + +- src_y * src_stride * 4 + src_x * 2), src_stride * 2); ++ width >> 1, ++ height, ++ (uint16_t *) dst_bytes, ++ dst_stride >> 1, ++ (uint16_t *) src_bytes, ++ src_stride >> 1); + return TRUE; + case 32: + pixman_composite_src_8888_8888_asm_neon ( +- width, height, +- (uint32_t *)(((char *) dst_bits) + +- dst_y * dst_stride * 4 + dst_x * 4), dst_stride, +- (uint32_t *)(((char *) src_bits) + +- src_y * src_stride * 4 + src_x * 4), src_stride); ++ width >> 2, ++ height, ++ (uint32_t *) dst_bytes, ++ dst_stride >> 2, ++ (uint32_t *) src_bytes, ++ src_stride >> 2); + return TRUE; + default: + return FALSE; +-- +1.6.2.4 + diff --git a/recipes/xorg-lib/pixman-0.17.8/1-composite.patch b/recipes/xorg-lib/pixman-0.17.8/1-composite.patch new file mode 100644 index 0000000000..761a2b937a --- /dev/null +++ b/recipes/xorg-lib/pixman-0.17.8/1-composite.patch @@ -0,0 +1,161 @@ +From 138d38f68bb2f955ca209f7412002a983a32a2fd Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Tue, 23 Feb 2010 23:44:00 +0000 +Subject: ARM: added 'neon_composite_over_n_8888_8888_ca' fast path + +This fast path function improves performance of 'firefox-talos-gfx' +cairo-perf trace. + +Benchmark from ARM Cortex-A8 @720MHz + +before: + +[ # ] backend test min(s) median(s) stddev. count +[ 0] image firefox-talos-gfx 139.969 141.176 0.35% 6/6 + +after: + +[ # ] backend test min(s) median(s) stddev. count +[ 0] image firefox-talos-gfx 111.810 112.196 0.23% 6/6 +--- +diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S +index 2986884..2db4da8 100644 +--- a/pixman/pixman-arm-neon-asm.S ++++ b/pixman/pixman-arm-neon-asm.S +@@ -1026,6 +1026,113 @@ generate_composite_function \ + + /******************************************************************************/ + ++.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_head ++ /* ++ * 'combine_mask_ca' replacement ++ * ++ * input: solid src (n) in {d8, d9, d10, d11} ++ * dest in {d4, d5, d6, d7 } ++ * mask in {d24, d25, d26, d27} ++ * output: updated src in {d0, d1, d2, d3 } ++ * updated mask in {d24, d25, d26, d27} ++ */ ++ vmull.u8 q0, d24, d8 ++ vmull.u8 q1, d25, d9 ++ vmull.u8 q6, d26, d10 ++ vmull.u8 q7, d27, d11 ++ vmull.u8 q9, d11, d24 ++ vmull.u8 q12, d11, d25 ++ vmull.u8 q13, d11, d26 ++ vrshr.u16 q10, q0, #8 ++ vrshr.u16 q11, q1, #8 ++ vrshr.u16 q8, q6, #8 ++ vraddhn.u16 d0, q0, q10 ++ vraddhn.u16 d1, q1, q11 ++ vraddhn.u16 d2, q6, q8 ++ vrshr.u16 q10, q7, #8 ++ vrshr.u16 q11, q9, #8 ++ vrshr.u16 q8, q12, #8 ++ vraddhn.u16 d3, q7, q10 ++ vrshr.u16 q10, q13, #8 ++ vraddhn.u16 d25, q12, q8 ++ vrshr.u16 q8, q7, #8 ++ vraddhn.u16 d24, q9, q11 ++ vraddhn.u16 d26, q13, q10 ++ vraddhn.u16 d27, q7, q8 ++ /* ++ * 'combine_over_ca' replacement ++ * ++ * output: updated dest in {d28, d29, d30, d31} ++ */ ++ vmvn.8 d24, d24 ++ vmvn.8 d25, d25 ++ vmull.u8 q8, d24, d4 ++ vmull.u8 q9, d25, d5 ++ vmvn.8 d26, d26 ++ vmvn.8 d27, d27 ++ vmull.u8 q10, d26, d6 ++ vmull.u8 q11, d27, d7 ++.endm ++ ++.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail ++ /* ... continue 'combine_over_ca' replacement */ ++ vrshr.u16 q14, q8, #8 ++ vrshr.u16 q15, q9, #8 ++ vrshr.u16 q6, q10, #8 ++ vrshr.u16 q7, q11, #8 ++ vraddhn.u16 d28, q14, q8 ++ vraddhn.u16 d29, q15, q9 ++ vraddhn.u16 d30, q6, q10 ++ vraddhn.u16 d31, q7, q11 ++ vqadd.u8 q14, q0, q14 ++ vqadd.u8 q15, q1, q15 ++.endm ++ ++.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head ++ vrshr.u16 q14, q8, #8 ++ vrshr.u16 q15, q9, #8 ++ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! ++ vrshr.u16 q6, q10, #8 ++ vrshr.u16 q7, q11, #8 ++ vraddhn.u16 d28, q14, q8 ++ vraddhn.u16 d29, q15, q9 ++ vraddhn.u16 d30, q6, q10 ++ vraddhn.u16 d31, q7, q11 ++ vld4.8 {d24, d25, d26, d27}, [MASK]! ++ vqadd.u8 q14, q0, q14 ++ vqadd.u8 q15, q1, q15 ++ cache_preload 8, 8 ++ pixman_composite_over_n_8888_8888_ca_process_pixblock_head ++ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! ++.endm ++ ++.macro pixman_composite_over_n_8888_8888_ca_init ++ add DUMMY, sp, #ARGS_STACK_OFFSET ++ vpush {d8-d15} ++ vld1.32 {d11[0]}, [DUMMY] ++ vdup.8 d8, d11[0] ++ vdup.8 d9, d11[1] ++ vdup.8 d10, d11[2] ++ vdup.8 d11, d11[3] ++.endm ++ ++.macro pixman_composite_over_n_8888_8888_ca_cleanup ++ vpop {d8-d15} ++.endm ++ ++generate_composite_function \ ++ pixman_composite_over_n_8888_8888_ca_asm_neon, 0, 32, 32, \ ++ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ ++ 8, /* number of pixels, processed in a single block */ \ ++ 5, /* prefetch distance */ \ ++ pixman_composite_over_n_8888_8888_ca_init, \ ++ pixman_composite_over_n_8888_8888_ca_cleanup, \ ++ pixman_composite_over_n_8888_8888_ca_process_pixblock_head, \ ++ pixman_composite_over_n_8888_8888_ca_process_pixblock_tail, \ ++ pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head ++ ++/******************************************************************************/ ++ + .macro pixman_composite_add_n_8_8_process_pixblock_head + /* expecting source data in {d8, d9, d10, d11} */ + /* d8 - blue, d9 - green, d10 - red, d11 - alpha */ +diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c +index 557301e..00b5c35 100644 +--- a/pixman/pixman-arm-neon.c ++++ b/pixman/pixman-arm-neon.c +@@ -269,6 +269,7 @@ BIND_SRC_NULL_DST(over_8888_8888, uint32_t, 1, uint32_t, 1) + + BIND_N_MASK_DST(over_n_8_0565, uint8_t, 1, uint16_t, 1) + BIND_N_MASK_DST(over_n_8_8888, uint8_t, 1, uint32_t, 1) ++BIND_N_MASK_DST(over_n_8888_8888_ca, uint32_t, 1, uint32_t, 1) + BIND_N_MASK_DST(add_n_8_8, uint8_t, 1, uint8_t, 1) + + BIND_SRC_N_DST(over_8888_n_8888, uint32_t, 1, uint32_t, 1) +@@ -412,6 +413,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] = + PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, neon_composite_over_n_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, neon_composite_over_n_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, neon_composite_over_n_8888), ++ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, neon_composite_over_n_8888_8888_ca), ++ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, neon_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, neon_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, neon_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, neon_composite_over_8888_8_8888), +-- +cgit v0.8.3-6-g21f6 diff --git a/recipes/xorg-lib/pixman-0.17.8/2-composite.patch b/recipes/xorg-lib/pixman-0.17.8/2-composite.patch new file mode 100644 index 0000000000..96c87e2ac6 --- /dev/null +++ b/recipes/xorg-lib/pixman-0.17.8/2-composite.patch @@ -0,0 +1,100 @@ +From fa7f7b05fbb08b23678cf0d8928f1511e5a20ecc Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed, 24 Feb 2010 00:26:57 +0000 +Subject: ARM: added 'neon_composite_src_x888_8888' fast path + +This fast path function improves performance of 'gnome-system-monitor' +cairo-perf trace. + +Benchmark from ARM Cortex-A8 @720MHz + +before: + +[ # ] backend test min(s) median(s) stddev. count +[ 0] image gnome-system-monitor 68.838 68.899 0.05% 5/6 + +after: + +[ # ] backend test min(s) median(s) stddev. count +[ 0] image gnome-system-monitor 53.336 53.384 0.09% 6/6 +--- +diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S +index 2db4da8..42ac1cb 100644 +--- a/pixman/pixman-arm-neon-asm.S ++++ b/pixman/pixman-arm-neon-asm.S +@@ -947,6 +947,44 @@ generate_composite_function \ + + /******************************************************************************/ + ++.macro pixman_composite_src_x888_8888_process_pixblock_head ++ vorr q0, q0, q2 ++ vorr q1, q1, q2 ++.endm ++ ++.macro pixman_composite_src_x888_8888_process_pixblock_tail ++.endm ++ ++.macro pixman_composite_src_x888_8888_process_pixblock_tail_head ++ vst1.32 {d0, d1, d2, d3}, [DST_W, :128]! ++ vld1.32 {d0, d1, d2, d3}, [SRC]! ++ vorr q0, q0, q2 ++ vorr q1, q1, q2 ++ cache_preload 8, 8 ++.endm ++ ++.macro pixman_composite_src_x888_8888_init ++ vmov.u8 q2, #0xFF ++ vshl.u32 q2, q2, #24 ++.endm ++ ++generate_composite_function \ ++ pixman_composite_src_x888_8888_asm_neon, 32, 0, 32, \ ++ FLAG_DST_WRITEONLY, \ ++ 8, /* number of pixels, processed in a single block */ \ ++ 10, /* prefetch distance */ \ ++ pixman_composite_src_x888_8888_init, \ ++ default_cleanup, \ ++ pixman_composite_src_x888_8888_process_pixblock_head, \ ++ pixman_composite_src_x888_8888_process_pixblock_tail, \ ++ pixman_composite_src_x888_8888_process_pixblock_tail_head, \ ++ 0, /* dst_w_basereg */ \ ++ 0, /* dst_r_basereg */ \ ++ 0, /* src_basereg */ \ ++ 0 /* mask_basereg */ ++ ++/******************************************************************************/ ++ + .macro pixman_composite_over_n_8_8888_process_pixblock_head + /* expecting deinterleaved source data in {d8, d9, d10, d11} */ + /* d8 - blue, d9 - green, d10 - red, d11 - alpha */ +diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c +index 00b5c35..12d92a2 100644 +--- a/pixman/pixman-arm-neon.c ++++ b/pixman/pixman-arm-neon.c +@@ -251,6 +251,7 @@ neon_composite_##name (pixman_implementation_t *imp, \ + + + BIND_SRC_NULL_DST(src_8888_8888, uint32_t, 1, uint32_t, 1) ++BIND_SRC_NULL_DST(src_x888_8888, uint32_t, 1, uint32_t, 1) + BIND_SRC_NULL_DST(src_0565_0565, uint16_t, 1, uint16_t, 1) + BIND_SRC_NULL_DST(src_0888_0888, uint8_t, 3, uint8_t, 3) + BIND_SRC_NULL_DST(src_8888_0565, uint32_t, 1, uint16_t, 1) +@@ -400,6 +401,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] = + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, neon_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888), ++ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, neon_composite_src_8888_8888), ++ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, neon_composite_src_0888_0888), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, x8r8g8b8, neon_composite_src_0888_8888_rev), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, r5g6b5, neon_composite_src_0888_0565_rev), +@@ -428,6 +431,7 @@ static const pixman_fast_path_t arm_neon_fast_paths[] = + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, neon_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, neon_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, neon_composite_over_8888_8888), ++ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, neon_composite_add_n_8_8), + PIXMAN_STD_FAST_PATH (ADD, a8, a8, a8, neon_composite_add_8_8_8), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888), +-- +cgit v0.8.3-6-g21f6 diff --git a/recipes/xorg-lib/pixman-0.17.8/3-composite.patch b/recipes/xorg-lib/pixman-0.17.8/3-composite.patch new file mode 100644 index 0000000000..81f3ff162b --- /dev/null +++ b/recipes/xorg-lib/pixman-0.17.8/3-composite.patch @@ -0,0 +1,105 @@ +From 80b75405277bacc0df0ef7d91f1a2eabefb97901 Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed, 24 Feb 2010 02:14:45 +0000 +Subject: ARM: added 'neon_composite_over_reverse_n_8888' fast path + +This fast path function improves performance of 'poppler' cairo-perf trace. + +Benchmark from ARM Cortex-A8 @720MHz + +[ # ] backend test min(s) median(s) stddev. count +[ 0] image poppler 38.986 39.158 0.23% 6/6 + +after: + +[ # ] backend test min(s) median(s) stddev. count +[ 0] image poppler 24.981 25.136 0.28% 6/6 +--- +diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S +index 42ac1cb..272da27 100644 +--- a/pixman/pixman-arm-neon-asm.S ++++ b/pixman/pixman-arm-neon-asm.S +@@ -680,6 +680,61 @@ generate_composite_function \ + + /******************************************************************************/ + ++.macro pixman_composite_over_reverse_n_8888_process_pixblock_tail_head ++ vrshr.u16 q14, q8, #8 ++ PF add PF_X, PF_X, #8 ++ PF tst PF_CTL, #0xF ++ vrshr.u16 q15, q9, #8 ++ vrshr.u16 q12, q10, #8 ++ vrshr.u16 q13, q11, #8 ++ PF addne PF_X, PF_X, #8 ++ PF subne PF_CTL, PF_CTL, #1 ++ vraddhn.u16 d28, q14, q8 ++ vraddhn.u16 d29, q15, q9 ++ PF cmp PF_X, ORIG_W ++ vraddhn.u16 d30, q12, q10 ++ vraddhn.u16 d31, q13, q11 ++ vqadd.u8 q14, q0, q14 ++ vqadd.u8 q15, q1, q15 ++ vld4.8 {d0, d1, d2, d3}, [DST_R, :128]! ++ vmvn.8 d22, d3 ++ PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] ++ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! ++ PF subge PF_X, PF_X, ORIG_W ++ vmull.u8 q8, d22, d4 ++ PF subges PF_CTL, PF_CTL, #0x10 ++ vmull.u8 q9, d22, d5 ++ vmull.u8 q10, d22, d6 ++ PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! ++ vmull.u8 q11, d22, d7 ++.endm ++ ++.macro pixman_composite_over_reverse_n_8888_init ++ add DUMMY, sp, #ARGS_STACK_OFFSET ++ vld1.32 {d7[0]}, [DUMMY] ++ vdup.8 d4, d7[0] ++ vdup.8 d5, d7[1] ++ vdup.8 d6, d7[2] ++ vdup.8 d7, d7[3] ++.endm ++ ++generate_composite_function \ ++ pixman_composite_over_reverse_n_8888_asm_neon, 0, 0, 32, \ ++ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ ++ 8, /* number of pixels, processed in a single block */ \ ++ 5, /* prefetch distance */ \ ++ pixman_composite_over_reverse_n_8888_init, \ ++ default_cleanup, \ ++ pixman_composite_over_8888_8888_process_pixblock_head, \ ++ pixman_composite_over_8888_8888_process_pixblock_tail, \ ++ pixman_composite_over_reverse_n_8888_process_pixblock_tail_head, \ ++ 28, /* dst_w_basereg */ \ ++ 0, /* dst_r_basereg */ \ ++ 4, /* src_basereg */ \ ++ 24 /* mask_basereg */ ++ ++/******************************************************************************/ ++ + .macro pixman_composite_over_n_8_0565_process_pixblock_head + /* in */ + vmull.u8 q0, d24, d8 +diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c +index 12d92a2..417ce5a 100644 +--- a/pixman/pixman-arm-neon.c ++++ b/pixman/pixman-arm-neon.c +@@ -264,6 +264,7 @@ BIND_SRC_NULL_DST(add_8888_8888, uint32_t, 1, uint32_t, 1) + + BIND_N_NULL_DST(over_n_0565, uint16_t, 1) + BIND_N_NULL_DST(over_n_8888, uint32_t, 1) ++BIND_N_NULL_DST(over_reverse_n_8888, uint32_t, 1) + + BIND_SRC_NULL_DST(over_8888_0565, uint32_t, 1, uint16_t, 1) + BIND_SRC_NULL_DST(over_8888_8888, uint32_t, 1, uint32_t, 1) +@@ -438,6 +439,7 @@ static const pixman_fast_path_t arm_neon_fast_paths[] = + PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, neon_composite_add_8000_8000), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, neon_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, neon_composite_add_8888_8888), ++ PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888), + + { PIXMAN_OP_NONE }, + }; +-- +cgit v0.8.3-6-g21f6 |