1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
|
From edc80b41c6480b7c80ec5f7c835c92b2debb3774 Mon Sep 17 00:00:00 2001
From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
Date: Thu, 22 Oct 2009 05:45:54 +0300
Subject: [PATCH 5/6] Support of overlapping src/dst for pixman_blt_sse2
---
pixman/pixman-sse2.c | 55 +++++++++++++++++++++++++++++--------------------
1 files changed, 32 insertions(+), 23 deletions(-)
diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index 78b0ad1..b84636b 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -5300,34 +5300,43 @@ pixman_blt_sse2 (uint32_t *src_bits,
{
uint8_t * src_bytes;
uint8_t * dst_bytes;
- int byte_width;
+ int bpp;
- if (src_bpp != dst_bpp)
+ if (src_bpp != dst_bpp || src_bpp & 7)
return FALSE;
- if (src_bpp == 16)
- {
- src_stride = src_stride * (int) sizeof (uint32_t) / 2;
- dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
- src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
- dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
- byte_width = 2 * width;
- src_stride *= 2;
- dst_stride *= 2;
- }
- else if (src_bpp == 32)
+ bpp = src_bpp >> 3;
+ width *= bpp;
+ src_stride *= 4;
+ dst_stride *= 4;
+ src_bytes = (uint8_t *)src_bits + src_y * src_stride + src_x * bpp;
+ dst_bytes = (uint8_t *)dst_bits + dst_y * dst_stride + dst_x * bpp;
+
+ if (src_bpp != 16 && src_bpp != 32)
{
- src_stride = src_stride * (int) sizeof (uint32_t) / 4;
- dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
- src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
- dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
- byte_width = 4 * width;
- src_stride *= 4;
- dst_stride *= 4;
+ pixman_blt_helper (src_bytes, dst_bytes, src_stride, dst_stride,
+ width, height);
+ return TRUE;
}
- else
+
+ if (src_bytes < dst_bytes && src_bytes + src_stride * height > dst_bytes)
{
- return FALSE;
+ src_bytes += src_stride * height - src_stride;
+ dst_bytes += dst_stride * height - dst_stride;
+ dst_stride = -dst_stride;
+ src_stride = -src_stride;
+
+ if (src_bytes + width > dst_bytes)
+ {
+ /* TODO: reverse scanline copy using SSE2 */
+ while (--height >= 0)
+ {
+ memmove (dst_bytes, src_bytes, width);
+ dst_bytes += dst_stride;
+ src_bytes += src_stride;
+ }
+ return TRUE;
+ }
}
cache_prefetch ((__m128i*)src_bytes);
@@ -5340,7 +5349,7 @@ pixman_blt_sse2 (uint32_t *src_bits,
uint8_t *d = dst_bytes;
src_bytes += src_stride;
dst_bytes += dst_stride;
- w = byte_width;
+ w = width;
cache_prefetch_next ((__m128i*)s);
cache_prefetch_next ((__m128i*)d);
--
1.6.2.4
|