summaryrefslogtreecommitdiff
path: root/recipes/xorg-lib/pixman/over-8888-0565.patch
diff options
context:
space:
mode:
Diffstat (limited to 'recipes/xorg-lib/pixman/over-8888-0565.patch')
-rw-r--r--recipes/xorg-lib/pixman/over-8888-0565.patch296
1 files changed, 0 insertions, 296 deletions
diff --git a/recipes/xorg-lib/pixman/over-8888-0565.patch b/recipes/xorg-lib/pixman/over-8888-0565.patch
deleted file mode 100644
index 3e27094022..0000000000
--- a/recipes/xorg-lib/pixman/over-8888-0565.patch
+++ /dev/null
@@ -1,296 +0,0 @@
-From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
-Date: Mon, 27 Jul 2009 04:48:04 +0000 (+0300)
-Subject: ARM: NEON optimized version of composite_over_8888_0565
-X-Git-Url: http://siarhei.siamashka.name/gitweb/?p=pixman.git;a=commitdiff_plain;h=17d8ab82858511f212dfb30c347255393eb12b0c
-
-ARM: NEON optimized version of composite_over_8888_0565
----
-
-diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
-index 9404c70..f1dcf1f 100644
---- a/pixman/pixman-arm-neon.c
-+++ b/pixman/pixman-arm-neon.c
-@@ -1447,6 +1447,274 @@ neon_composite_src_16_16 (pixman_implementation_t * impl,
- }
- }
-
-+static inline void
-+neon_composite_over_8888_0565_internal (uint32_t *src,
-+ uint16_t *dst,
-+ int32_t w,
-+ int32_t h,
-+ int32_t src_stride,
-+ int32_t dst_stride)
-+{
-+ int32_t dst_newline_delta = (dst_stride - w) * 2;
-+ int32_t src_newline_delta = (src_stride - w) * 4;
-+ asm volatile (
-+
-+ ".macro process_pixblock_head size\n"
-+ /* load pixel data from memory */
-+ " .if \\size == 8\n"
-+ " vld1.32 {d0, d1, d2, d3}, [%[src]]!\n"
-+ " vld1.16 {d4, d5}, [%[dst_r]]!\n"
-+ " .elseif \\size == 4\n"
-+ " vld1.32 {d0, d1}, [%[src]]!\n"
-+ " vld1.16 {d4}, [%[dst_r]]!\n"
-+ " .elseif \\size == 2\n"
-+ " vld1.32 {d0}, [%[src]]!\n"
-+ " vld1.16 {d4[0]}, [%[dst_r]]!\n"
-+ " vld1.16 {d4[1]}, [%[dst_r]]!\n"
-+ " .elseif \\size == 1\n"
-+ " vld1.32 {d0[0]}, [%[src]]!\n"
-+ " vld1.16 {d4[0]}, [%[dst_r]]!\n"
-+ " .endif\n"
-+ /* deinterleave and convert both source and destination
-+ to "planar" 8-bit format */
-+ " vshrn.u16 d16, q2, #8\n"
-+ " vuzp.8 d0, d1\n"
-+ " vshrn.u16 d17, q2, #3\n"
-+ " vuzp.8 d2, d3\n"
-+ " vsli.u16 q2, q2, #5\n"
-+ " vuzp.8 d1, d3\n"
-+ " vsri.u8 d16, d16, #5\n"
-+ " vuzp.8 d0, d2\n"
-+ " vmvn.8 d3, d3\n"
-+ " vsri.u8 d17, d17, #6\n"
-+ " vshrn.u16 d18, q2, #2\n"
-+ /* source: d0 - blue, d1 - green, d2 - red, d3 - alpha */
-+ /* destination: d16 - red, d17 - green, d18 - blue */
-+ /* now do alpha blending */
-+ " vmull.u8 q10, d3, d16\n"
-+ "pld [%[src], #128]\n"
-+ " vmull.u8 q11, d3, d17\n"
-+ "pld [%[dst_r], #64]\n"
-+ " vmull.u8 q12, d3, d18\n"
-+ " vrshr.u16 q13, q10, #8\n"
-+ " vrshr.u16 q8, q11, #8\n"
-+ " vrshr.u16 q9, q12, #8\n"
-+ " vraddhn.u16 d20, q10, q13\n"
-+ " vraddhn.u16 d21, q11, q8\n"
-+ " vraddhn.u16 d22, q12, q9\n"
-+ ".endm\n"
-+
-+ ".macro process_pixblock_tail size\n"
-+ /* result is ready in d28, d29, d30 (R, G, B) */
-+ " vqadd.u8 d28, d2, d20\n"
-+ " vqadd.u8 d29, d1, d21\n"
-+ " vqadd.u8 d30, d0, d22\n"
-+ /* convert it to r5g6b5 */
-+ " vshll.u8 q3, d28, #8\n"
-+ " vshll.u8 q14, d29, #8\n"
-+ " vshll.u8 q15, d30, #8\n"
-+ " vsri.u16 q3, q14, #5\n"
-+ " vsri.u16 q3, q15, #11\n"
-+ /* store pixel data to memory */
-+ " .if \\size == 8\n"
-+ " vst1.16 {d6, d7}, [%[dst_w], :128]!\n"
-+ " .elseif \\size == 4\n"
-+ " vst1.16 {d6}, [%[dst_w]]!\n"
-+ " .elseif \\size == 2\n"
-+ " vst1.16 {d6[0]}, [%[dst_w]]!\n"
-+ " vst1.16 {d6[1]}, [%[dst_w]]!\n"
-+ " .elseif \\size == 1\n"
-+ " vst1.16 {d6[0]}, [%[dst_w]]!\n"
-+ " .endif\n"
-+ ".endm\n"
-+
-+ /* "tail" of the previous block and "head" of the next block
-+ are merged and interleaved for better instructions scheduling */
-+ ".macro process_pixblock_tail_head_8\n"
-+ " vqadd.u8 d28, d2, d20\n"
-+ " vld1.16 {d4, d5}, [%[dst_r], :128]!\n"
-+ " vqadd.u8 d29, d1, d21\n" /* TODO: try to join these into a */
-+ " vqadd.u8 d30, d0, d22\n" /* single 128-bit operation */
-+ " vshrn.u16 d16, q2, #8\n"
-+ " vld1.32 {d0, d1, d2, d3}, [%[src]]!\n" /* TODO: maybe split */
-+ " vshrn.u16 d17, q2, #3\n"
-+ " vsli.u16 q2, q2, #5\n"
-+ " vuzp.8 d0, d1\n"
-+ " vshll.u8 q3, d28, #8\n"
-+ " vuzp.8 d2, d3\n"
-+ " vshll.u8 q14, d29, #8\n"
-+ " vuzp.8 d1, d3\n"
-+ " vsri.u8 d16, d16, #5\n"
-+ " vuzp.8 d0, d2\n"
-+ " vmvn.8 d3, d3\n"
-+ " vsri.u8 d17, d17, #6\n"
-+ " vshrn.u16 d18, q2, #2\n"
-+ " vmull.u8 q10, d3, d16\n"
-+ "pld [%[src], #128]\n"
-+ " vmull.u8 q11, d3, d17\n"
-+ "pld [%[dst_r], #64]\n"
-+ " vmull.u8 q12, d3, d18\n"
-+ " vsri.u16 d6, d28, #5\n"
-+ " vsri.u16 d7, d29, #5\n"
-+ " vshll.u8 q15, d30, #8\n"
-+ " vrshr.u16 q13, q10, #8\n"
-+ " vrshr.u16 q8, q11, #8\n"
-+ " vrshr.u16 q9, q12, #8\n"
-+ " vsri.u16 d6, d30, #11\n"
-+ " vsri.u16 d7, d31, #11\n"
-+ " vraddhn.u16 d20, q10, q13\n"
-+ " vraddhn.u16 d21, q11, q8\n"
-+ " vraddhn.u16 d22, q12, q9\n"
-+ " vst1.16 {d6, d7}, [%[dst_w], :128]!\n"
-+ ".endm\n"
-+
-+ "subs %[h], %[h], #1\n"
-+ "blt 9f\n"
-+ "0:\n"
-+ "cmp %[w], #8\n"
-+ "blt 8f\n"
-+
-+ /* ensure 16 byte alignment of the destination buffer */
-+ "tst %[dst_r], #0xF\n"
-+ "beq 2f\n"
-+ "tst %[dst_r], #2\n"
-+ "beq 1f\n"
-+ "vld1.32 {d3[0]}, [%[src]]!\n"
-+ "vld1.16 {d5[2]}, [%[dst_r]]!\n"
-+ "sub %[w], %[w], #1\n"
-+ "1:\n"
-+ "tst %[dst_r], #4\n"
-+ "beq 1f\n"
-+ "vld1.32 {d2}, [%[src]]!\n"
-+ "vld1.16 {d5[0]}, [%[dst_r]]!\n"
-+ "vld1.16 {d5[1]}, [%[dst_r]]!\n"
-+ "sub %[w], %[w], #2\n"
-+ "1:\n"
-+ "tst %[dst_r], #8\n"
-+ "beq 1f\n"
-+ "vld1.32 {d0, d1}, [%[src]]!\n"
-+ "vld1.16 {d4}, [%[dst_r]]!\n"
-+ "sub %[w], %[w], #4\n"
-+ "1:\n"
-+ "process_pixblock_head -1\n"
-+ "process_pixblock_tail -1\n"
-+ "tst %[dst_w], #2\n"
-+ "beq 1f\n"
-+ "vst1.16 {d7[2]}, [%[dst_w]]!\n"
-+ "1:\n"
-+ "tst %[dst_w], #4\n"
-+ "beq 1f\n"
-+ "vst1.16 {d7[0]}, [%[dst_w]]!\n"
-+ "vst1.16 {d7[1]}, [%[dst_w]]!\n"
-+ "1:\n"
-+ "tst %[dst_w], #8\n"
-+ "beq 2f\n"
-+ "vst1.16 {d6}, [%[dst_w]]!\n"
-+ "2:\n"
-+
-+ "subs %[w], %[w], #8\n"
-+ "blt 8f\n"
-+ "process_pixblock_head 8\n"
-+ "subs %[w], %[w], #8\n"
-+ "blt 2f\n"
-+ "1:\n" /* innermost pipelined loop */
-+ "process_pixblock_tail_head_8\n"
-+ "subs %[w], %[w], #8\n"
-+ "bge 1b\n"
-+ "2:\n"
-+ "process_pixblock_tail 8\n"
-+
-+ "8:\n"
-+ /* process up to 7 remaining pixels */
-+ "tst %[w], #7\n"
-+ "beq 2f\n"
-+ "tst %[w], #4\n"
-+ "beq 1f\n"
-+ "vld1.32 {d0, d1}, [%[src]]!\n"
-+ "vld1.16 {d4}, [%[dst_r]]!\n"
-+ "1:\n"
-+ "tst %[w], #2\n"
-+ "beq 1f\n"
-+ "vld1.32 {d2}, [%[src]]!\n"
-+ "vld1.16 {d5[0]}, [%[dst_r]]!\n"
-+ "vld1.16 {d5[1]}, [%[dst_r]]!\n"
-+ "1:\n"
-+ "tst %[w], #1\n"
-+ "beq 1f\n"
-+ "vld1.32 {d3[0]}, [%[src]]!\n"
-+ "vld1.16 {d5[2]}, [%[dst_r]]!\n"
-+ "1:\n"
-+
-+ "process_pixblock_head -1\n"
-+ "process_pixblock_tail -1\n"
-+
-+ "tst %[w], #4\n"
-+ "beq 1f\n"
-+ "vst1.16 {d6}, [%[dst_w]]!\n"
-+ "1:\n"
-+ "tst %[w], #2\n"
-+ "beq 1f\n"
-+ "vst1.16 {d7[0]}, [%[dst_w]]!\n"
-+ "vst1.16 {d7[1]}, [%[dst_w]]!\n"
-+ "1:\n"
-+ "tst %[w], #1\n"
-+ "beq 2f\n"
-+ "vst1.16 {d7[2]}, [%[dst_w]]!\n"
-+ "2:\n"
-+
-+ "add %[src], %[src], %[src_newline_delta]\n"
-+ "add %[dst_r], %[dst_r], %[dst_newline_delta]\n"
-+ "add %[dst_w], %[dst_w], %[dst_newline_delta]\n"
-+ "mov %[w], %[orig_w]\n"
-+ "subs %[h], %[h], #1\n"
-+ "bge 0b\n"
-+ "9:\n"
-+ ".purgem process_pixblock_head\n"
-+ ".purgem process_pixblock_tail\n"
-+ ".purgem process_pixblock_tail_head_8\n"
-+
-+ : [src] "+&r" (src), [dst_r] "+&r" (dst), [dst_w] "+&r" (dst),
-+ [w] "+&r" (w), [h] "+&r" (h)
-+ : [dst_newline_delta] "r" (dst_newline_delta),
-+ [src_newline_delta] "r" (src_newline_delta), [orig_w] "r" (w)
-+ : "cc", "memory",
-+ "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
-+ /* "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", */
-+ "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
-+ "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31"
-+ );
-+}
-+
-+static void
-+neon_composite_over_8888_0565 (pixman_implementation_t *imp,
-+ pixman_op_t op,
-+ pixman_image_t * src_image,
-+ pixman_image_t * mask_image,
-+ pixman_image_t * dst_image,
-+ int32_t src_x,
-+ int32_t src_y,
-+ int32_t mask_x,
-+ int32_t mask_y,
-+ int32_t dest_x,
-+ int32_t dest_y,
-+ int32_t width,
-+ int32_t height)
-+{
-+ uint16_t *dst_line;
-+ uint32_t *src_line;
-+ int32_t dst_stride, src_stride;
-+
-+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-+
-+ neon_composite_over_8888_0565_internal (src_line,
-+ dst_line,
-+ width,
-+ height,
-+ src_stride,
-+ dst_stride);
-+}
-+
- #endif /* USE_GCC_INLINE_ASM */
-
- static void
-@@ -1908,6 +2176,8 @@ static const pixman_fast_path_t arm_neon_fast_path_array[] =
- #ifdef USE_GCC_INLINE_ASM
- { PIXMAN_OP_SRC, PIXMAN_r5g6b5, PIXMAN_null, PIXMAN_r5g6b5, neon_composite_src_16_16, 0 },
- { PIXMAN_OP_SRC, PIXMAN_b5g6r5, PIXMAN_null, PIXMAN_b5g6r5, neon_composite_src_16_16, 0 },
-+ { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, neon_composite_over_8888_0565, 0 },
-+ { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, neon_composite_over_8888_0565, 0 },
- #endif
- { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, neon_composite_over_8888_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, neon_composite_over_8888_8888, 0 },