summaryrefslogtreecommitdiff
path: root/recipes/xorg-lib/pixman/prefetch.patch
diff options
context:
space:
mode:
Diffstat (limited to 'recipes/xorg-lib/pixman/prefetch.patch')
-rw-r--r--recipes/xorg-lib/pixman/prefetch.patch298
1 files changed, 298 insertions, 0 deletions
diff --git a/recipes/xorg-lib/pixman/prefetch.patch b/recipes/xorg-lib/pixman/prefetch.patch
new file mode 100644
index 0000000000..c2e856ec25
--- /dev/null
+++ b/recipes/xorg-lib/pixman/prefetch.patch
@@ -0,0 +1,298 @@
+From d0044bfbd596f22ed1560579ea6537b39f3dc1af Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Thu, 29 Oct 2009 19:06:42 +0000
+Subject: ARM: Don't emit prefetch code if prefetch distance is set to 0
+
+Also it is now possible to disable prefetch globally with
+a configuration macro
+---
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index bca499a..35e6a7e 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -219,33 +219,33 @@
+ vshrn.u16 d7, q2, #3
+ vsli.u16 q2, q2, #5
+ vshll.u8 q14, d16, #8
+- add PF_X, PF_X, #8
++ PF add PF_X, PF_X, #8
+ vshll.u8 q8, d19, #8
+- tst PF_CTL, #0xF
++ PF tst PF_CTL, #0xF
+ vsri.u8 d6, d6, #5
+- addne PF_X, PF_X, #8
++ PF addne PF_X, PF_X, #8
+ vmvn.8 d3, d3
+- subne PF_CTL, PF_CTL, #1
++ PF subne PF_CTL, PF_CTL, #1
+ vsri.u8 d7, d7, #6
+ vshrn.u16 d30, q2, #2
+ vmull.u8 q10, d3, d6
+- pld [PF_SRC, PF_X, lsl #src_bpp_shift]
++ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
+ vmull.u8 q11, d3, d7
+ vmull.u8 q12, d3, d30
+- pld [PF_DST, PF_X, lsl #dst_bpp_shift]
++ PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
+ vsri.u16 q14, q8, #5
+- cmp PF_X, ORIG_W
++ PF cmp PF_X, ORIG_W
+ vshll.u8 q9, d18, #8
+ vrshr.u16 q13, q10, #8
+- subge PF_X, PF_X, ORIG_W
++ PF subge PF_X, PF_X, ORIG_W
+ vrshr.u16 q3, q11, #8
+ vrshr.u16 q15, q12, #8
+- subges PF_CTL, PF_CTL, #0x10
++ PF subges PF_CTL, PF_CTL, #0x10
+ vsri.u16 q14, q9, #11
+- ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
++ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
+ vraddhn.u16 d20, q10, q13
+ vraddhn.u16 d23, q11, q3
+- ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
++ PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
+ vraddhn.u16 d22, q12, q15
+ vst1.16 {d28, d29}, [DST_W, :128]!
+ .endm
+@@ -323,20 +323,20 @@ generate_composite_function \
+
+ .macro pixman_composite_src_8888_0565_process_pixblock_tail_head
+ vsri.u16 q14, q8, #5
+- add PF_X, PF_X, #8
+- tst PF_CTL, #0xF
++ PF add PF_X, PF_X, #8
++ PF tst PF_CTL, #0xF
+ vld4.8 {d0, d1, d2, d3}, [SRC]!
+- addne PF_X, PF_X, #8
+- subne PF_CTL, PF_CTL, #1
++ PF addne PF_X, PF_X, #8
++ PF subne PF_CTL, PF_CTL, #1
+ vsri.u16 q14, q9, #11
+- cmp PF_X, ORIG_W
+- pld [PF_SRC, PF_X, lsl #src_bpp_shift]
++ PF cmp PF_X, ORIG_W
++ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
+ vshll.u8 q8, d1, #8
+ vst1.16 {d28, d29}, [DST_W, :128]!
+- subge PF_X, PF_X, ORIG_W
+- subges PF_CTL, PF_CTL, #0x10
++ PF subge PF_X, PF_X, ORIG_W
++ PF subges PF_CTL, PF_CTL, #0x10
+ vshll.u8 q14, d2, #8
+- ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
++ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
+ vshll.u8 q9, d0, #8
+ .endm
+
+@@ -363,20 +363,20 @@ generate_composite_function \
+
+ .macro pixman_composite_add_8000_8000_process_pixblock_tail_head
+ vld1.8 {d0, d1, d2, d3}, [SRC]!
+- add PF_X, PF_X, #32
+- tst PF_CTL, #0xF
++ PF add PF_X, PF_X, #32
++ PF tst PF_CTL, #0xF
+ vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
+- addne PF_X, PF_X, #32
+- subne PF_CTL, PF_CTL, #1
++ PF addne PF_X, PF_X, #32
++ PF subne PF_CTL, PF_CTL, #1
+ vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
+- cmp PF_X, ORIG_W
+- pld [PF_SRC, PF_X, lsl #src_bpp_shift]
+- pld [PF_DST, PF_X, lsl #dst_bpp_shift]
+- subge PF_X, PF_X, ORIG_W
+- subges PF_CTL, PF_CTL, #0x10
++ PF cmp PF_X, ORIG_W
++ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
++ PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
++ PF subge PF_X, PF_X, ORIG_W
++ PF subges PF_CTL, PF_CTL, #0x10
+ vqadd.u8 q14, q0, q2
+- ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
+- ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
++ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
++ PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
+ vqadd.u8 q15, q1, q3
+ .endm
+
+@@ -418,32 +418,32 @@ generate_composite_function \
+ .macro pixman_composite_over_8888_8888_process_pixblock_tail_head
+ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ vrshr.u16 q14, q8, #8
+- add PF_X, PF_X, #8
+- tst PF_CTL, #0xF
++ PF add PF_X, PF_X, #8
++ PF tst PF_CTL, #0xF
+ vrshr.u16 q15, q9, #8
+ vrshr.u16 q12, q10, #8
+ vrshr.u16 q13, q11, #8
+- addne PF_X, PF_X, #8
+- subne PF_CTL, PF_CTL, #1
++ PF addne PF_X, PF_X, #8
++ PF subne PF_CTL, PF_CTL, #1
+ vraddhn.u16 d28, q14, q8
+ vraddhn.u16 d29, q15, q9
+- cmp PF_X, ORIG_W
++ PF cmp PF_X, ORIG_W
+ vraddhn.u16 d30, q12, q10
+ vraddhn.u16 d31, q13, q11
+ vqadd.u8 q14, q0, q14
+ vqadd.u8 q15, q1, q15
+ vld4.8 {d0, d1, d2, d3}, [SRC]!
+- pld [PF_SRC, PF_X, lsl #src_bpp_shift]
++ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
+ vmvn.8 d22, d3
+- pld [PF_DST, PF_X, lsl #dst_bpp_shift]
++ PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+- subge PF_X, PF_X, ORIG_W
++ PF subge PF_X, PF_X, ORIG_W
+ vmull.u8 q8, d22, d4
+- subges PF_CTL, PF_CTL, #0x10
++ PF subges PF_CTL, PF_CTL, #0x10
+ vmull.u8 q9, d22, d5
+- ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
++ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
+ vmull.u8 q10, d22, d6
+- ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
++ PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
+ vmull.u8 q11, d22, d7
+ .endm
+
+diff --git a/pixman/pixman-arm-neon-asm.h b/pixman/pixman-arm-neon-asm.h
+index d276ab9..a2941ae 100644
+--- a/pixman/pixman-arm-neon-asm.h
++++ b/pixman/pixman-arm-neon-asm.h
+@@ -58,6 +58,11 @@
+ #define RESPECT_STRICT_ALIGNMENT 1
+
+ /*
++ * If set to nonzero value, prefetch is globally disabled
++ */
++#define PREFETCH_GLOBALLY_DISABLED 0
++
++/*
+ * Definitions of supplementary pixld/pixst macros (for partial load/store of
+ * pixel data)
+ */
+@@ -218,37 +223,43 @@
+ * pixels processing like simple copy. Anyway, having prefetch is a must
+ * when working with graphics data.
+ */
++.macro PF a, x:vararg
++.if (ADVANCED_PREFETCH_ENABLED != 0) && (PREFETCH_GLOBALLY_DISABLED == 0)
++ a x
++.endif
++.endm
++
+ .macro cache_preload std_increment, boost_increment
+ .if (src_bpp_shift >= 0) || (dst_r_bpp != 0) || (mask_bpp_shift >= 0)
+ .if regs_shortage
+- ldr ORIG_W, [sp] /* If we are short on regs, ORIG_W is kept on stack */
++ PF ldr ORIG_W, [sp] /* If we are short on regs, ORIG_W is kept on stack */
+ .endif
+ .if std_increment != 0
+- add PF_X, PF_X, #std_increment
++ PF add PF_X, PF_X, #std_increment
+ .endif
+- tst PF_CTL, #0xF
+- addne PF_X, PF_X, #boost_increment
+- subne PF_CTL, PF_CTL, #1
+- cmp PF_X, ORIG_W
++ PF tst PF_CTL, #0xF
++ PF addne PF_X, PF_X, #boost_increment
++ PF subne PF_CTL, PF_CTL, #1
++ PF cmp PF_X, ORIG_W
+ .if src_bpp_shift >= 0
+- pld [PF_SRC, PF_X, lsl #src_bpp_shift]
++ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
+ .endif
+ .if dst_r_bpp != 0
+- pld [PF_DST, PF_X, lsl #dst_bpp_shift]
++ PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
+ .endif
+ .if mask_bpp_shift >= 0
+- pld [PF_MASK, PF_X, lsl #mask_bpp_shift]
++ PF pld, [PF_MASK, PF_X, lsl #mask_bpp_shift]
+ .endif
+- subge PF_X, PF_X, ORIG_W
+- subges PF_CTL, PF_CTL, #0x10
++ PF subge PF_X, PF_X, ORIG_W
++ PF subges PF_CTL, PF_CTL, #0x10
+ .if src_bpp_shift >= 0
+- ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
++ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
+ .endif
+ .if dst_r_bpp != 0
+- ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
++ PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
+ .endif
+ .if mask_bpp_shift >= 0
+- ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]!
++ PF ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]!
+ .endif
+ .endif
+ .endm
+@@ -297,6 +308,12 @@ fname:
+ PF_DST .req r12
+ PF_MASK .req r14
+
++.if prefetch_distance == 0
++ .set ADVANCED_PREFETCH_ENABLED, 0
++.else
++ .set ADVANCED_PREFETCH_ENABLED, 1
++.endif
++
+ .if mask_bpp == 0
+ ORIG_W .req r7 /* saved original width */
+ DUMMY .req r8 /* temporary register */
+@@ -374,12 +391,12 @@ fname:
+ ldr MASK_STRIDE, [sp, #52]
+ .endif
+ mov DST_R, DST_W
+- mov PF_SRC, SRC
+- mov PF_DST, DST_R
+- mov PF_MASK, MASK
+- mov PF_CTL, H, lsl #4
+- /* pf_ctl = 10 | ((h - 1) << 4) */
+- add PF_CTL, #(prefetch_distance - 0x10)
++ PF mov PF_SRC, SRC
++ PF mov PF_DST, DST_R
++ PF mov PF_MASK, MASK
++ /* PF_CTL = prefetch_distance | ((h - 1) << 4) */
++ PF mov PF_CTL, H, lsl #4
++ PF add PF_CTL, #(prefetch_distance - 0x10)
+
+ init
+ .if regs_shortage
+@@ -412,7 +429,7 @@ fname:
+ .else
+ add DST_R, DST_R, #lowbit
+ .endif
+- add PF_X, PF_X, #(lowbit * 8 / dst_w_bpp)
++ PF add PF_X, PF_X, #(lowbit * 8 / dst_w_bpp)
+ sub W, W, #(lowbit * 8 / dst_w_bpp)
+ 1:
+ .endif
+@@ -444,7 +461,7 @@ fname:
+ (src_basereg - pixblock_size * src_bpp / 64), SRC
+ pixld pixblock_size, mask_bpp, \
+ (mask_basereg - pixblock_size * mask_bpp / 64), MASK
+- add PF_X, PF_X, #pixblock_size
++ PF add PF_X, PF_X, #pixblock_size
+ process_pixblock_head
+ cache_preload 0, pixblock_size
+ subs W, W, #(pixblock_size * 2)
+@@ -468,7 +485,7 @@ fname:
+ pixld chunk_size, src_bpp, src_basereg, SRC
+ pixld chunk_size, mask_bpp, mask_basereg, MASK
+ pixld_a chunk_size, dst_r_bpp, dst_r_basereg, DST_R
+- add PF_X, PF_X, #chunk_size
++ PF add PF_X, PF_X, #chunk_size
+ 1:
+ .endif
+ .endr
+--
+cgit v0.8.2