diff options
author | Koen Kooi <koen@openembedded.org> | 2009-11-01 20:01:27 +0100 |
---|---|---|
committer | Koen Kooi <koen@openembedded.org> | 2009-11-01 20:01:27 +0100 |
commit | f1a20d6191e191a1ac74f5760e8931ed1ee8736e (patch) | |
tree | 4460863f7118e106a123f0e2f53e6cd826de245c /recipes/xorg-lib/pixman/neon-24bpp.patch | |
parent | 24c8f2b9e012595b15a2ffa3e042a558f736cc69 (diff) |
pixman git: add more NEON patches, bump SRCREV
Diffstat (limited to 'recipes/xorg-lib/pixman/neon-24bpp.patch')
-rw-r--r-- | recipes/xorg-lib/pixman/neon-24bpp.patch | 264 |
1 files changed, 264 insertions, 0 deletions
diff --git a/recipes/xorg-lib/pixman/neon-24bpp.patch b/recipes/xorg-lib/pixman/neon-24bpp.patch new file mode 100644 index 0000000000..edfd367626 --- /dev/null +++ b/recipes/xorg-lib/pixman/neon-24bpp.patch @@ -0,0 +1,264 @@ +From b101c115102b83bb1fc4e28de6136dd4940796bc Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Fri, 30 Oct 2009 17:02:14 +0000 +Subject: ARM: initial 24bpp support + +--- +diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S +index 35e6a7e..7f91ced 100644 +--- a/pixman/pixman-arm-neon-asm.S ++++ b/pixman/pixman-arm-neon-asm.S +@@ -977,3 +977,32 @@ generate_composite_function \ + pixman_composite_over_8888_n_8888_process_pixblock_head, \ + pixman_composite_over_8888_n_8888_process_pixblock_tail, \ + pixman_composite_over_8888_n_8888_process_pixblock_tail_head ++ ++/******************************************************************************/ ++ ++.macro pixman_composite_src_0888_0888_process_pixblock_head ++.endm ++ ++.macro pixman_composite_src_0888_0888_process_pixblock_tail ++.endm ++ ++.macro pixman_composite_src_0888_0888_process_pixblock_tail_head ++ vst3.8 {d0, d1, d2}, [DST_W]! ++ vld3.8 {d0, d1, d2}, [SRC]! ++ cache_preload 8, 8 ++.endm ++ ++generate_composite_function \ ++ pixman_composite_src_0888_0888_asm_neon, 24, 0, 24, \ ++ FLAG_DST_WRITEONLY, \ ++ 8, /* number of pixels, processed in a single block */ \ ++ 10, /* prefetch distance */ \ ++ default_init, \ ++ default_cleanup, \ ++ pixman_composite_src_0888_0888_process_pixblock_head, \ ++ pixman_composite_src_0888_0888_process_pixblock_tail, \ ++ pixman_composite_src_0888_0888_process_pixblock_tail_head, \ ++ 0, /* dst_w_basereg */ \ ++ 0, /* dst_r_basereg */ \ ++ 0, /* src_basereg */ \ ++ 0 /* mask_basereg */ +diff --git a/pixman/pixman-arm-neon-asm.h b/pixman/pixman-arm-neon-asm.h +index a2941ae..1653ef4 100644 +--- a/pixman/pixman-arm-neon-asm.h ++++ b/pixman/pixman-arm-neon-asm.h +@@ -95,6 +95,14 @@ + op&.&elem_size {d®1[idx]}, [&mem_operand&]! + .endm + ++.macro pixldst3 op, elem_size, reg1, reg2, reg3, mem_operand ++ op&.&elem_size {d®1, d®2, d®3}, [&mem_operand&]! ++.endm ++ ++.macro pixldst30 op, elem_size, reg1, reg2, reg3, idx, mem_operand ++ op&.&elem_size {d®1[idx], d®2[idx], d®3[idx]}, [&mem_operand&]! ++.endm ++ + .macro pixldst numbytes, op, elem_size, basereg, mem_operand, abits + .if numbytes == 32 + pixldst4 op, elem_size, %(basereg+4), %(basereg+5), \ +@@ -134,6 +142,18 @@ + .if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0) + pixldst4 vld4, 8, %(basereg+4), %(basereg+5), \ + %(basereg+6), %(basereg+7), mem_operand, abits ++.elseif (bpp == 24) && (numpix == 8) ++ pixldst3 vld3, 8, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand ++.elseif (bpp == 24) && (numpix == 4) ++ pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand ++ pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand ++ pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand ++ pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand ++.elseif (bpp == 24) && (numpix == 2) ++ pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand ++ pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand ++.elseif (bpp == 24) && (numpix == 1) ++ pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand + .else + pixldst %(numpix * bpp / 8), vld1, %(bpp), basereg, mem_operand, abits + .endif +@@ -145,6 +165,18 @@ + .if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0) + pixldst4 vst4, 8, %(basereg+4), %(basereg+5), \ + %(basereg+6), %(basereg+7), mem_operand, abits ++.elseif (bpp == 24) && (numpix == 8) ++ pixldst3 vst3, 8, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand ++.elseif (bpp == 24) && (numpix == 4) ++ pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand ++ pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand ++ pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand ++ pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand ++.elseif (bpp == 24) && (numpix == 2) ++ pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand ++ pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand ++.elseif (bpp == 24) && (numpix == 1) ++ pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand + .else + pixldst %(numpix * bpp / 8), vst1, %(bpp), basereg, mem_operand, abits + .endif +@@ -334,6 +366,8 @@ fname: + + .if src_bpp == 32 + .set src_bpp_shift, 2 ++.elseif src_bpp == 24 ++ .set src_bpp_shift, 0 + .elseif src_bpp == 16 + .set src_bpp_shift, 1 + .elseif src_bpp == 8 +@@ -345,6 +379,8 @@ fname: + .endif + .if mask_bpp == 32 + .set mask_bpp_shift, 2 ++.elseif mask_bpp == 24 ++ .set mask_bpp_shift, 0 + .elseif mask_bpp == 8 + .set mask_bpp_shift, 0 + .elseif mask_bpp == 0 +@@ -354,6 +390,8 @@ fname: + .endif + .if dst_w_bpp == 32 + .set dst_bpp_shift, 2 ++.elseif dst_w_bpp == 24 ++ .set dst_bpp_shift, 0 + .elseif dst_w_bpp == 16 + .set dst_bpp_shift, 1 + .elseif dst_w_bpp == 8 +@@ -398,6 +436,19 @@ fname: + PF mov PF_CTL, H, lsl #4 + PF add PF_CTL, #(prefetch_distance - 0x10) + ++.if src_bpp == 24 ++ sub SRC_STRIDE, SRC_STRIDE, W ++ sub SRC_STRIDE, SRC_STRIDE, W, lsl #1 ++.endif ++.if mask_bpp == 24 ++ sub MASK_STRIDE, MASK_STRIDE, W ++ sub MASK_STRIDE, MASK_STRIDE, W, lsl #1 ++.endif ++.if dst_w_bpp == 24 ++ sub DST_STRIDE, DST_STRIDE, W ++ sub DST_STRIDE, DST_STRIDE, W, lsl #1 ++.endif ++ + init + .if regs_shortage + push {r0, r1} +@@ -412,7 +463,8 @@ fname: + cmp W, #(pixblock_size * 2) + blt 8f + 0: +- /* ensure 16 byte alignment of the destination buffer */ ++ /* ensure 16 byte alignment of the destination buffer, except for 24bpp */ ++.if dst_w_bpp != 24 + tst DST_R, #0xF + beq 2f + +@@ -454,6 +506,7 @@ fname: + .endif + .endr + 2: ++.endif + + pixld_a pixblock_size, dst_r_bpp, \ + (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R +@@ -520,11 +573,13 @@ fname: + .if mask_bpp != 0 + add MASK, MASK, MASK_STRIDE, lsl #mask_bpp_shift + .endif ++.if (dst_w_bpp != 24) + sub DST_W, DST_W, W, lsl #dst_bpp_shift +-.if src_bpp != 0 ++.endif ++.if (src_bpp != 24) && (src_bpp != 0) + sub SRC, SRC, W, lsl #src_bpp_shift + .endif +-.if mask_bpp != 0 ++.if (mask_bpp != 24) && (mask_bpp != 0) + sub MASK, MASK, W, lsl #mask_bpp_shift + .endif + subs H, H, #1 +@@ -539,7 +594,7 @@ fname: + cleanup + pop {r4-r12, pc} /* exit */ + +-8: /* handle small rectangle, width up to 15 pixels */ ++8: /* handle small rectangle, width up to (pixblock_size * 2 - 1) pixels */ + tst W, #pixblock_size + beq 1f + pixld pixblock_size, dst_r_bpp, \ +@@ -592,11 +647,13 @@ fname: + .if mask_bpp != 0 + add MASK, MASK, MASK_STRIDE, lsl #mask_bpp_shift + .endif ++.if (dst_w_bpp != 24) + sub DST_W, DST_W, W, lsl #dst_bpp_shift +-.if src_bpp != 0 ++.endif ++.if (src_bpp != 24) && (src_bpp != 0) + sub SRC, SRC, W, lsl #src_bpp_shift + .endif +-.if mask_bpp != 0 ++.if (mask_bpp != 24) && (mask_bpp != 0) + sub MASK, MASK, W, lsl #mask_bpp_shift + .endif + subs H, H, #1 +diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c +index 2811099..f3f38a9 100644 +--- a/pixman/pixman-arm-neon.c ++++ b/pixman/pixman-arm-neon.c +@@ -2065,6 +2065,43 @@ neon_composite_src_8888_8888 (pixman_implementation_t *imp, + } + + void ++pixman_composite_src_0888_0888_asm_neon (int32_t w, ++ int32_t h, ++ uint8_t *dst, ++ int32_t dst_stride, ++ uint8_t *src, ++ int32_t src_stride); ++ ++static void ++neon_composite_src_0888_0888 (pixman_implementation_t *imp, ++ pixman_op_t op, ++ pixman_image_t * src_image, ++ pixman_image_t * mask_image, ++ pixman_image_t * dst_image, ++ int32_t src_x, ++ int32_t src_y, ++ int32_t mask_x, ++ int32_t mask_y, ++ int32_t dest_x, ++ int32_t dest_y, ++ int32_t width, ++ int32_t height) ++{ ++ uint8_t *dst_line; ++ uint8_t *src_line; ++ int32_t dst_stride, src_stride; ++ ++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, ++ src_stride, src_line, 3); ++ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, ++ dst_stride, dst_line, 3); ++ ++ pixman_composite_src_0888_0888_asm_neon (width, height, ++ dst_line, dst_stride, ++ src_line, src_stride); ++} ++ ++void + pixman_composite_over_8888_8888_asm_neon (int32_t w, + int32_t h, + uint32_t *dst, +@@ -2449,6 +2486,7 @@ static const pixman_fast_path_t arm_neon_fast_path_array[] = + { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, neon_composite_src_8888_8888, 0 }, + { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, neon_composite_src_8888_8888, 0 }, + { PIXMAN_OP_SRC, PIXMAN_b5g6r5, PIXMAN_null, PIXMAN_b5g6r5, neon_composite_src_0565_0565, 0 }, ++ { PIXMAN_OP_SRC, PIXMAN_r8g8b8, PIXMAN_null, PIXMAN_r8g8b8, neon_composite_src_0888_0888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, neon_composite_over_8888_0565, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, neon_composite_over_8888_0565, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, neon_composite_over_8888_8888, 0 }, +-- +cgit v0.8.2 |