summaryrefslogtreecommitdiff
path: root/recipes/xorg-lib/pixman/neon-24bpp.patch
diff options
context:
space:
mode:
authorKoen Kooi <koen@openembedded.org>2009-11-01 20:01:27 +0100
committerKoen Kooi <koen@openembedded.org>2009-11-01 20:01:27 +0100
commitf1a20d6191e191a1ac74f5760e8931ed1ee8736e (patch)
tree4460863f7118e106a123f0e2f53e6cd826de245c /recipes/xorg-lib/pixman/neon-24bpp.patch
parent24c8f2b9e012595b15a2ffa3e042a558f736cc69 (diff)
pixman git: add more NEON patches, bump SRCREV
Diffstat (limited to 'recipes/xorg-lib/pixman/neon-24bpp.patch')
-rw-r--r--recipes/xorg-lib/pixman/neon-24bpp.patch264
1 files changed, 264 insertions, 0 deletions
diff --git a/recipes/xorg-lib/pixman/neon-24bpp.patch b/recipes/xorg-lib/pixman/neon-24bpp.patch
new file mode 100644
index 0000000000..edfd367626
--- /dev/null
+++ b/recipes/xorg-lib/pixman/neon-24bpp.patch
@@ -0,0 +1,264 @@
+From b101c115102b83bb1fc4e28de6136dd4940796bc Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Fri, 30 Oct 2009 17:02:14 +0000
+Subject: ARM: initial 24bpp support
+
+---
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 35e6a7e..7f91ced 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -977,3 +977,32 @@ generate_composite_function \
+ pixman_composite_over_8888_n_8888_process_pixblock_head, \
+ pixman_composite_over_8888_n_8888_process_pixblock_tail, \
+ pixman_composite_over_8888_n_8888_process_pixblock_tail_head
++
++/******************************************************************************/
++
++.macro pixman_composite_src_0888_0888_process_pixblock_head
++.endm
++
++.macro pixman_composite_src_0888_0888_process_pixblock_tail
++.endm
++
++.macro pixman_composite_src_0888_0888_process_pixblock_tail_head
++ vst3.8 {d0, d1, d2}, [DST_W]!
++ vld3.8 {d0, d1, d2}, [SRC]!
++ cache_preload 8, 8
++.endm
++
++generate_composite_function \
++ pixman_composite_src_0888_0888_asm_neon, 24, 0, 24, \
++ FLAG_DST_WRITEONLY, \
++ 8, /* number of pixels, processed in a single block */ \
++ 10, /* prefetch distance */ \
++ default_init, \
++ default_cleanup, \
++ pixman_composite_src_0888_0888_process_pixblock_head, \
++ pixman_composite_src_0888_0888_process_pixblock_tail, \
++ pixman_composite_src_0888_0888_process_pixblock_tail_head, \
++ 0, /* dst_w_basereg */ \
++ 0, /* dst_r_basereg */ \
++ 0, /* src_basereg */ \
++ 0 /* mask_basereg */
+diff --git a/pixman/pixman-arm-neon-asm.h b/pixman/pixman-arm-neon-asm.h
+index a2941ae..1653ef4 100644
+--- a/pixman/pixman-arm-neon-asm.h
++++ b/pixman/pixman-arm-neon-asm.h
+@@ -95,6 +95,14 @@
+ op&.&elem_size {d&reg1[idx]}, [&mem_operand&]!
+ .endm
+
++.macro pixldst3 op, elem_size, reg1, reg2, reg3, mem_operand
++ op&.&elem_size {d&reg1, d&reg2, d&reg3}, [&mem_operand&]!
++.endm
++
++.macro pixldst30 op, elem_size, reg1, reg2, reg3, idx, mem_operand
++ op&.&elem_size {d&reg1[idx], d&reg2[idx], d&reg3[idx]}, [&mem_operand&]!
++.endm
++
+ .macro pixldst numbytes, op, elem_size, basereg, mem_operand, abits
+ .if numbytes == 32
+ pixldst4 op, elem_size, %(basereg+4), %(basereg+5), \
+@@ -134,6 +142,18 @@
+ .if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0)
+ pixldst4 vld4, 8, %(basereg+4), %(basereg+5), \
+ %(basereg+6), %(basereg+7), mem_operand, abits
++.elseif (bpp == 24) && (numpix == 8)
++ pixldst3 vld3, 8, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand
++.elseif (bpp == 24) && (numpix == 4)
++ pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand
++ pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand
++ pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand
++ pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand
++.elseif (bpp == 24) && (numpix == 2)
++ pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand
++ pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand
++.elseif (bpp == 24) && (numpix == 1)
++ pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand
+ .else
+ pixldst %(numpix * bpp / 8), vld1, %(bpp), basereg, mem_operand, abits
+ .endif
+@@ -145,6 +165,18 @@
+ .if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0)
+ pixldst4 vst4, 8, %(basereg+4), %(basereg+5), \
+ %(basereg+6), %(basereg+7), mem_operand, abits
++.elseif (bpp == 24) && (numpix == 8)
++ pixldst3 vst3, 8, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand
++.elseif (bpp == 24) && (numpix == 4)
++ pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand
++ pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand
++ pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand
++ pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand
++.elseif (bpp == 24) && (numpix == 2)
++ pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand
++ pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand
++.elseif (bpp == 24) && (numpix == 1)
++ pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand
+ .else
+ pixldst %(numpix * bpp / 8), vst1, %(bpp), basereg, mem_operand, abits
+ .endif
+@@ -334,6 +366,8 @@ fname:
+
+ .if src_bpp == 32
+ .set src_bpp_shift, 2
++.elseif src_bpp == 24
++ .set src_bpp_shift, 0
+ .elseif src_bpp == 16
+ .set src_bpp_shift, 1
+ .elseif src_bpp == 8
+@@ -345,6 +379,8 @@ fname:
+ .endif
+ .if mask_bpp == 32
+ .set mask_bpp_shift, 2
++.elseif mask_bpp == 24
++ .set mask_bpp_shift, 0
+ .elseif mask_bpp == 8
+ .set mask_bpp_shift, 0
+ .elseif mask_bpp == 0
+@@ -354,6 +390,8 @@ fname:
+ .endif
+ .if dst_w_bpp == 32
+ .set dst_bpp_shift, 2
++.elseif dst_w_bpp == 24
++ .set dst_bpp_shift, 0
+ .elseif dst_w_bpp == 16
+ .set dst_bpp_shift, 1
+ .elseif dst_w_bpp == 8
+@@ -398,6 +436,19 @@ fname:
+ PF mov PF_CTL, H, lsl #4
+ PF add PF_CTL, #(prefetch_distance - 0x10)
+
++.if src_bpp == 24
++ sub SRC_STRIDE, SRC_STRIDE, W
++ sub SRC_STRIDE, SRC_STRIDE, W, lsl #1
++.endif
++.if mask_bpp == 24
++ sub MASK_STRIDE, MASK_STRIDE, W
++ sub MASK_STRIDE, MASK_STRIDE, W, lsl #1
++.endif
++.if dst_w_bpp == 24
++ sub DST_STRIDE, DST_STRIDE, W
++ sub DST_STRIDE, DST_STRIDE, W, lsl #1
++.endif
++
+ init
+ .if regs_shortage
+ push {r0, r1}
+@@ -412,7 +463,8 @@ fname:
+ cmp W, #(pixblock_size * 2)
+ blt 8f
+ 0:
+- /* ensure 16 byte alignment of the destination buffer */
++ /* ensure 16 byte alignment of the destination buffer, except for 24bpp */
++.if dst_w_bpp != 24
+ tst DST_R, #0xF
+ beq 2f
+
+@@ -454,6 +506,7 @@ fname:
+ .endif
+ .endr
+ 2:
++.endif
+
+ pixld_a pixblock_size, dst_r_bpp, \
+ (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R
+@@ -520,11 +573,13 @@ fname:
+ .if mask_bpp != 0
+ add MASK, MASK, MASK_STRIDE, lsl #mask_bpp_shift
+ .endif
++.if (dst_w_bpp != 24)
+ sub DST_W, DST_W, W, lsl #dst_bpp_shift
+-.if src_bpp != 0
++.endif
++.if (src_bpp != 24) && (src_bpp != 0)
+ sub SRC, SRC, W, lsl #src_bpp_shift
+ .endif
+-.if mask_bpp != 0
++.if (mask_bpp != 24) && (mask_bpp != 0)
+ sub MASK, MASK, W, lsl #mask_bpp_shift
+ .endif
+ subs H, H, #1
+@@ -539,7 +594,7 @@ fname:
+ cleanup
+ pop {r4-r12, pc} /* exit */
+
+-8: /* handle small rectangle, width up to 15 pixels */
++8: /* handle small rectangle, width up to (pixblock_size * 2 - 1) pixels */
+ tst W, #pixblock_size
+ beq 1f
+ pixld pixblock_size, dst_r_bpp, \
+@@ -592,11 +647,13 @@ fname:
+ .if mask_bpp != 0
+ add MASK, MASK, MASK_STRIDE, lsl #mask_bpp_shift
+ .endif
++.if (dst_w_bpp != 24)
+ sub DST_W, DST_W, W, lsl #dst_bpp_shift
+-.if src_bpp != 0
++.endif
++.if (src_bpp != 24) && (src_bpp != 0)
+ sub SRC, SRC, W, lsl #src_bpp_shift
+ .endif
+-.if mask_bpp != 0
++.if (mask_bpp != 24) && (mask_bpp != 0)
+ sub MASK, MASK, W, lsl #mask_bpp_shift
+ .endif
+ subs H, H, #1
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index 2811099..f3f38a9 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -2065,6 +2065,43 @@ neon_composite_src_8888_8888 (pixman_implementation_t *imp,
+ }
+
+ void
++pixman_composite_src_0888_0888_asm_neon (int32_t w,
++ int32_t h,
++ uint8_t *dst,
++ int32_t dst_stride,
++ uint8_t *src,
++ int32_t src_stride);
++
++static void
++neon_composite_src_0888_0888 (pixman_implementation_t *imp,
++ pixman_op_t op,
++ pixman_image_t * src_image,
++ pixman_image_t * mask_image,
++ pixman_image_t * dst_image,
++ int32_t src_x,
++ int32_t src_y,
++ int32_t mask_x,
++ int32_t mask_y,
++ int32_t dest_x,
++ int32_t dest_y,
++ int32_t width,
++ int32_t height)
++{
++ uint8_t *dst_line;
++ uint8_t *src_line;
++ int32_t dst_stride, src_stride;
++
++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t,
++ src_stride, src_line, 3);
++ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t,
++ dst_stride, dst_line, 3);
++
++ pixman_composite_src_0888_0888_asm_neon (width, height,
++ dst_line, dst_stride,
++ src_line, src_stride);
++}
++
++void
+ pixman_composite_over_8888_8888_asm_neon (int32_t w,
+ int32_t h,
+ uint32_t *dst,
+@@ -2449,6 +2486,7 @@ static const pixman_fast_path_t arm_neon_fast_path_array[] =
+ { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, neon_composite_src_8888_8888, 0 },
+ { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, neon_composite_src_8888_8888, 0 },
+ { PIXMAN_OP_SRC, PIXMAN_b5g6r5, PIXMAN_null, PIXMAN_b5g6r5, neon_composite_src_0565_0565, 0 },
++ { PIXMAN_OP_SRC, PIXMAN_r8g8b8, PIXMAN_null, PIXMAN_r8g8b8, neon_composite_src_0888_0888, 0 },
+ { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, neon_composite_over_8888_0565, 0 },
+ { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, neon_composite_over_8888_0565, 0 },
+ { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, neon_composite_over_8888_8888, 0 },
+--
+cgit v0.8.2