diff options
Diffstat (limited to 'recipes/xorg-lib/pixman/over-8888-0565.patch')
-rw-r--r-- | recipes/xorg-lib/pixman/over-8888-0565.patch | 296 |
1 files changed, 0 insertions, 296 deletions
diff --git a/recipes/xorg-lib/pixman/over-8888-0565.patch b/recipes/xorg-lib/pixman/over-8888-0565.patch deleted file mode 100644 index 3e27094022..0000000000 --- a/recipes/xorg-lib/pixman/over-8888-0565.patch +++ /dev/null @@ -1,296 +0,0 @@ -From: Siarhei Siamashka <siarhei.siamashka@nokia.com> -Date: Mon, 27 Jul 2009 04:48:04 +0000 (+0300) -Subject: ARM: NEON optimized version of composite_over_8888_0565 -X-Git-Url: http://siarhei.siamashka.name/gitweb/?p=pixman.git;a=commitdiff_plain;h=17d8ab82858511f212dfb30c347255393eb12b0c - -ARM: NEON optimized version of composite_over_8888_0565 ---- - -diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c -index 9404c70..f1dcf1f 100644 ---- a/pixman/pixman-arm-neon.c -+++ b/pixman/pixman-arm-neon.c -@@ -1447,6 +1447,274 @@ neon_composite_src_16_16 (pixman_implementation_t * impl, - } - } - -+static inline void -+neon_composite_over_8888_0565_internal (uint32_t *src, -+ uint16_t *dst, -+ int32_t w, -+ int32_t h, -+ int32_t src_stride, -+ int32_t dst_stride) -+{ -+ int32_t dst_newline_delta = (dst_stride - w) * 2; -+ int32_t src_newline_delta = (src_stride - w) * 4; -+ asm volatile ( -+ -+ ".macro process_pixblock_head size\n" -+ /* load pixel data from memory */ -+ " .if \\size == 8\n" -+ " vld1.32 {d0, d1, d2, d3}, [%[src]]!\n" -+ " vld1.16 {d4, d5}, [%[dst_r]]!\n" -+ " .elseif \\size == 4\n" -+ " vld1.32 {d0, d1}, [%[src]]!\n" -+ " vld1.16 {d4}, [%[dst_r]]!\n" -+ " .elseif \\size == 2\n" -+ " vld1.32 {d0}, [%[src]]!\n" -+ " vld1.16 {d4[0]}, [%[dst_r]]!\n" -+ " vld1.16 {d4[1]}, [%[dst_r]]!\n" -+ " .elseif \\size == 1\n" -+ " vld1.32 {d0[0]}, [%[src]]!\n" -+ " vld1.16 {d4[0]}, [%[dst_r]]!\n" -+ " .endif\n" -+ /* deinterleave and convert both source and destination -+ to "planar" 8-bit format */ -+ " vshrn.u16 d16, q2, #8\n" -+ " vuzp.8 d0, d1\n" -+ " vshrn.u16 d17, q2, #3\n" -+ " vuzp.8 d2, d3\n" -+ " vsli.u16 q2, q2, #5\n" -+ " vuzp.8 d1, d3\n" -+ " vsri.u8 d16, d16, #5\n" -+ " vuzp.8 d0, d2\n" -+ " vmvn.8 d3, d3\n" -+ " vsri.u8 d17, d17, #6\n" -+ " vshrn.u16 d18, q2, #2\n" -+ /* source: d0 - blue, d1 - green, d2 - red, d3 - alpha */ -+ /* destination: d16 - red, d17 - green, d18 - blue */ -+ /* now do alpha blending */ -+ " vmull.u8 q10, d3, d16\n" -+ "pld [%[src], #128]\n" -+ " vmull.u8 q11, d3, d17\n" -+ "pld [%[dst_r], #64]\n" -+ " vmull.u8 q12, d3, d18\n" -+ " vrshr.u16 q13, q10, #8\n" -+ " vrshr.u16 q8, q11, #8\n" -+ " vrshr.u16 q9, q12, #8\n" -+ " vraddhn.u16 d20, q10, q13\n" -+ " vraddhn.u16 d21, q11, q8\n" -+ " vraddhn.u16 d22, q12, q9\n" -+ ".endm\n" -+ -+ ".macro process_pixblock_tail size\n" -+ /* result is ready in d28, d29, d30 (R, G, B) */ -+ " vqadd.u8 d28, d2, d20\n" -+ " vqadd.u8 d29, d1, d21\n" -+ " vqadd.u8 d30, d0, d22\n" -+ /* convert it to r5g6b5 */ -+ " vshll.u8 q3, d28, #8\n" -+ " vshll.u8 q14, d29, #8\n" -+ " vshll.u8 q15, d30, #8\n" -+ " vsri.u16 q3, q14, #5\n" -+ " vsri.u16 q3, q15, #11\n" -+ /* store pixel data to memory */ -+ " .if \\size == 8\n" -+ " vst1.16 {d6, d7}, [%[dst_w], :128]!\n" -+ " .elseif \\size == 4\n" -+ " vst1.16 {d6}, [%[dst_w]]!\n" -+ " .elseif \\size == 2\n" -+ " vst1.16 {d6[0]}, [%[dst_w]]!\n" -+ " vst1.16 {d6[1]}, [%[dst_w]]!\n" -+ " .elseif \\size == 1\n" -+ " vst1.16 {d6[0]}, [%[dst_w]]!\n" -+ " .endif\n" -+ ".endm\n" -+ -+ /* "tail" of the previous block and "head" of the next block -+ are merged and interleaved for better instructions scheduling */ -+ ".macro process_pixblock_tail_head_8\n" -+ " vqadd.u8 d28, d2, d20\n" -+ " vld1.16 {d4, d5}, [%[dst_r], :128]!\n" -+ " vqadd.u8 d29, d1, d21\n" /* TODO: try to join these into a */ -+ " vqadd.u8 d30, d0, d22\n" /* single 128-bit operation */ -+ " vshrn.u16 d16, q2, #8\n" -+ " vld1.32 {d0, d1, d2, d3}, [%[src]]!\n" /* TODO: maybe split */ -+ " vshrn.u16 d17, q2, #3\n" -+ " vsli.u16 q2, q2, #5\n" -+ " vuzp.8 d0, d1\n" -+ " vshll.u8 q3, d28, #8\n" -+ " vuzp.8 d2, d3\n" -+ " vshll.u8 q14, d29, #8\n" -+ " vuzp.8 d1, d3\n" -+ " vsri.u8 d16, d16, #5\n" -+ " vuzp.8 d0, d2\n" -+ " vmvn.8 d3, d3\n" -+ " vsri.u8 d17, d17, #6\n" -+ " vshrn.u16 d18, q2, #2\n" -+ " vmull.u8 q10, d3, d16\n" -+ "pld [%[src], #128]\n" -+ " vmull.u8 q11, d3, d17\n" -+ "pld [%[dst_r], #64]\n" -+ " vmull.u8 q12, d3, d18\n" -+ " vsri.u16 d6, d28, #5\n" -+ " vsri.u16 d7, d29, #5\n" -+ " vshll.u8 q15, d30, #8\n" -+ " vrshr.u16 q13, q10, #8\n" -+ " vrshr.u16 q8, q11, #8\n" -+ " vrshr.u16 q9, q12, #8\n" -+ " vsri.u16 d6, d30, #11\n" -+ " vsri.u16 d7, d31, #11\n" -+ " vraddhn.u16 d20, q10, q13\n" -+ " vraddhn.u16 d21, q11, q8\n" -+ " vraddhn.u16 d22, q12, q9\n" -+ " vst1.16 {d6, d7}, [%[dst_w], :128]!\n" -+ ".endm\n" -+ -+ "subs %[h], %[h], #1\n" -+ "blt 9f\n" -+ "0:\n" -+ "cmp %[w], #8\n" -+ "blt 8f\n" -+ -+ /* ensure 16 byte alignment of the destination buffer */ -+ "tst %[dst_r], #0xF\n" -+ "beq 2f\n" -+ "tst %[dst_r], #2\n" -+ "beq 1f\n" -+ "vld1.32 {d3[0]}, [%[src]]!\n" -+ "vld1.16 {d5[2]}, [%[dst_r]]!\n" -+ "sub %[w], %[w], #1\n" -+ "1:\n" -+ "tst %[dst_r], #4\n" -+ "beq 1f\n" -+ "vld1.32 {d2}, [%[src]]!\n" -+ "vld1.16 {d5[0]}, [%[dst_r]]!\n" -+ "vld1.16 {d5[1]}, [%[dst_r]]!\n" -+ "sub %[w], %[w], #2\n" -+ "1:\n" -+ "tst %[dst_r], #8\n" -+ "beq 1f\n" -+ "vld1.32 {d0, d1}, [%[src]]!\n" -+ "vld1.16 {d4}, [%[dst_r]]!\n" -+ "sub %[w], %[w], #4\n" -+ "1:\n" -+ "process_pixblock_head -1\n" -+ "process_pixblock_tail -1\n" -+ "tst %[dst_w], #2\n" -+ "beq 1f\n" -+ "vst1.16 {d7[2]}, [%[dst_w]]!\n" -+ "1:\n" -+ "tst %[dst_w], #4\n" -+ "beq 1f\n" -+ "vst1.16 {d7[0]}, [%[dst_w]]!\n" -+ "vst1.16 {d7[1]}, [%[dst_w]]!\n" -+ "1:\n" -+ "tst %[dst_w], #8\n" -+ "beq 2f\n" -+ "vst1.16 {d6}, [%[dst_w]]!\n" -+ "2:\n" -+ -+ "subs %[w], %[w], #8\n" -+ "blt 8f\n" -+ "process_pixblock_head 8\n" -+ "subs %[w], %[w], #8\n" -+ "blt 2f\n" -+ "1:\n" /* innermost pipelined loop */ -+ "process_pixblock_tail_head_8\n" -+ "subs %[w], %[w], #8\n" -+ "bge 1b\n" -+ "2:\n" -+ "process_pixblock_tail 8\n" -+ -+ "8:\n" -+ /* process up to 7 remaining pixels */ -+ "tst %[w], #7\n" -+ "beq 2f\n" -+ "tst %[w], #4\n" -+ "beq 1f\n" -+ "vld1.32 {d0, d1}, [%[src]]!\n" -+ "vld1.16 {d4}, [%[dst_r]]!\n" -+ "1:\n" -+ "tst %[w], #2\n" -+ "beq 1f\n" -+ "vld1.32 {d2}, [%[src]]!\n" -+ "vld1.16 {d5[0]}, [%[dst_r]]!\n" -+ "vld1.16 {d5[1]}, [%[dst_r]]!\n" -+ "1:\n" -+ "tst %[w], #1\n" -+ "beq 1f\n" -+ "vld1.32 {d3[0]}, [%[src]]!\n" -+ "vld1.16 {d5[2]}, [%[dst_r]]!\n" -+ "1:\n" -+ -+ "process_pixblock_head -1\n" -+ "process_pixblock_tail -1\n" -+ -+ "tst %[w], #4\n" -+ "beq 1f\n" -+ "vst1.16 {d6}, [%[dst_w]]!\n" -+ "1:\n" -+ "tst %[w], #2\n" -+ "beq 1f\n" -+ "vst1.16 {d7[0]}, [%[dst_w]]!\n" -+ "vst1.16 {d7[1]}, [%[dst_w]]!\n" -+ "1:\n" -+ "tst %[w], #1\n" -+ "beq 2f\n" -+ "vst1.16 {d7[2]}, [%[dst_w]]!\n" -+ "2:\n" -+ -+ "add %[src], %[src], %[src_newline_delta]\n" -+ "add %[dst_r], %[dst_r], %[dst_newline_delta]\n" -+ "add %[dst_w], %[dst_w], %[dst_newline_delta]\n" -+ "mov %[w], %[orig_w]\n" -+ "subs %[h], %[h], #1\n" -+ "bge 0b\n" -+ "9:\n" -+ ".purgem process_pixblock_head\n" -+ ".purgem process_pixblock_tail\n" -+ ".purgem process_pixblock_tail_head_8\n" -+ -+ : [src] "+&r" (src), [dst_r] "+&r" (dst), [dst_w] "+&r" (dst), -+ [w] "+&r" (w), [h] "+&r" (h) -+ : [dst_newline_delta] "r" (dst_newline_delta), -+ [src_newline_delta] "r" (src_newline_delta), [orig_w] "r" (w) -+ : "cc", "memory", -+ "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", -+ /* "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", */ -+ "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", -+ "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" -+ ); -+} -+ -+static void -+neon_composite_over_8888_0565 (pixman_implementation_t *imp, -+ pixman_op_t op, -+ pixman_image_t * src_image, -+ pixman_image_t * mask_image, -+ pixman_image_t * dst_image, -+ int32_t src_x, -+ int32_t src_y, -+ int32_t mask_x, -+ int32_t mask_y, -+ int32_t dest_x, -+ int32_t dest_y, -+ int32_t width, -+ int32_t height) -+{ -+ uint16_t *dst_line; -+ uint32_t *src_line; -+ int32_t dst_stride, src_stride; -+ -+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); -+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); -+ -+ neon_composite_over_8888_0565_internal (src_line, -+ dst_line, -+ width, -+ height, -+ src_stride, -+ dst_stride); -+} -+ - #endif /* USE_GCC_INLINE_ASM */ - - static void -@@ -1908,6 +2176,8 @@ static const pixman_fast_path_t arm_neon_fast_path_array[] = - #ifdef USE_GCC_INLINE_ASM - { PIXMAN_OP_SRC, PIXMAN_r5g6b5, PIXMAN_null, PIXMAN_r5g6b5, neon_composite_src_16_16, 0 }, - { PIXMAN_OP_SRC, PIXMAN_b5g6r5, PIXMAN_null, PIXMAN_b5g6r5, neon_composite_src_16_16, 0 }, -+ { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, neon_composite_over_8888_0565, 0 }, -+ { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, neon_composite_over_8888_0565, 0 }, - #endif - { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, neon_composite_over_8888_8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, neon_composite_over_8888_8888, 0 }, |