summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKoen Kooi <koen@openembedded.org>2009-09-03 21:33:55 +0200
committerKoen Kooi <koen@openembedded.org>2009-09-03 21:33:55 +0200
commit5366c510565553f17b28ed936b34dfa4d3026b21 (patch)
tree6e43d1f714e3fd9bea76e97588cc0cd0278970d6
parent19e6e8e30aaf2a791a657d752649865754963ce6 (diff)
pixman git: add some more NEON and fastpath patches
-rw-r--r--recipes/xorg-lib/pixman/nearest-neighbour.patch1040
-rw-r--r--recipes/xorg-lib/pixman/over-8888-0565.patch296
-rw-r--r--recipes/xorg-lib/pixman/pixman-28986.patch32
-rw-r--r--recipes/xorg-lib/pixman/remove-broken.patch826
-rw-r--r--recipes/xorg-lib/pixman_git.bb10
5 files changed, 2201 insertions, 3 deletions
diff --git a/recipes/xorg-lib/pixman/nearest-neighbour.patch b/recipes/xorg-lib/pixman/nearest-neighbour.patch
new file mode 100644
index 0000000000..29b140faf9
--- /dev/null
+++ b/recipes/xorg-lib/pixman/nearest-neighbour.patch
@@ -0,0 +1,1040 @@
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Fri, 17 Jul 2009 10:22:23 +0000 (+0300)
+Subject: Fastpath for nearest neighbour scaled compositing operations.
+X-Git-Url: http://siarhei.siamashka.name/gitweb/?p=pixman.git;a=commitdiff_plain;h=247531c6978725a88fd3706129b9d3e339026f54
+
+Fastpath for nearest neighbour scaled compositing operations.
+
+OVER 8888x8888, OVER 8888x0565, SRC 8888x8888, SRC 8888x0565
+and SRC 0565x0565 cases are supported.
+---
+
+diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
+index 7f80578..7f3a6ad 100644
+--- a/pixman/pixman-fast-path.c
++++ b/pixman/pixman-fast-path.c
+@@ -1261,6 +1261,993 @@ fast_composite_src_scale_nearest (pixman_implementation_t *imp,
+ }
+ }
+
++/*
++ * Functions, which implement the core inner loops for the nearest neighbour
++ * scaled fastpath compositing operations. The do not need to do clipping
++ * checks, also the loops are unrolled to process two pixels per iteration
++ * for better performance on most CPU architectures (superscalar processors
++ * can issue several operations simultaneously, other processors can hide
++ * instructions latencies by pipelining operations). Unrolling more
++ * does not make much sense because the compiler will start running out
++ * of spare registers soon.
++ */
++
++#undef READ
++#undef WRITE
++#define READ(img,x) (*(x))
++#define WRITE(img,ptr,v) ((*(ptr)) = (v))
++
++#define UN8x4_MUL_UN8_ADD_UN8x4_store_r5g6b5(x, a, y) do { \
++ UN8x4_MUL_UN8_ADD_UN8x4(x, a, y); \
++ x = CONVERT_8888_TO_0565(x); \
++ } while (0)
++
++static void fbCompositeTransformNearestNonrotatedAffineTrivialclipOver_8888x0565 (
++ pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst,
++ int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y)
++{
++ uint16_t *dstLine;
++ uint32_t *srcFirstLine;
++ uint32_t d;
++ uint32_t s1, s2;
++ uint8_t a1, a2;
++ int w;
++ int x1, x2, y;
++ int32_t orig_vx = vx;
++
++ uint32_t *src;
++ uint16_t *dst;
++ int srcStride, dstStride;
++ PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
++ /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be
++ * transformed from destination space to source space */
++ PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint32_t, srcStride, srcFirstLine, 1);
++
++ while (--height >= 0)
++ {
++ dst = dstLine;
++ dstLine += dstStride;
++
++ y = vy >> 16;
++ vy += unit_y;
++
++ if ((y < 0) || (y >= pSrc->bits.height)) {
++ continue;
++ }
++
++ src = srcFirstLine + srcStride * y;
++
++ w = width;
++ vx = orig_vx;
++ while ((w -= 2) >= 0)
++ {
++ x1 = vx >> 16;
++ vx += unit_x;
++ s1 = READ(pSrc, src + x1);
++
++ x2 = vx >> 16;
++ vx += unit_x;
++ s2 = READ(pSrc, src + x2);
++
++ a1 = s1 >> 24;
++ a2 = s2 >> 24;
++
++ if (a1 == 0xff)
++ WRITE(pDst, dst, CONVERT_8888_TO_0565(s1));
++ else if (s1) {
++ d = CONVERT_0565_TO_0888(READ(pDst, dst));
++ a1 ^= 0xff;
++ UN8x4_MUL_UN8_ADD_UN8x4_store_r5g6b5(d, a1, s1);
++ WRITE(pDst, dst, d);
++ }
++ dst++;
++
++ if (a2 == 0xff)
++ WRITE(pDst, dst, CONVERT_8888_TO_0565(s2));
++ else if (s2) {
++ d = CONVERT_0565_TO_0888(READ(pDst, dst));
++ a2 ^= 0xff;
++ UN8x4_MUL_UN8_ADD_UN8x4_store_r5g6b5(d, a2, s2);
++ WRITE(pDst, dst, d);
++ }
++ dst++;
++ }
++ if (w & 1) {
++ x1 = vx >> 16;
++ vx += unit_x;
++ s1 = READ(pSrc, src + x1);
++
++ a1 = s1 >> 24;
++ if (a1 == 0xff)
++ WRITE(pDst, dst, CONVERT_8888_TO_0565(s1));
++ else if (s1) {
++ d = CONVERT_0565_TO_0888(READ(pDst, dst));
++ a1 ^= 0xff;
++ UN8x4_MUL_UN8_ADD_UN8x4_store_r5g6b5(d, a1, s1);
++ WRITE(pDst, dst, d);
++ }
++ dst++;
++ }
++ }
++}
++
++static void fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatOver_8888x0565 (
++ pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst,
++ int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y)
++{
++ uint16_t *dstLine;
++ uint32_t *srcFirstLine;
++ uint32_t d;
++ uint32_t s1, s2;
++ uint8_t a1, a2;
++ int w;
++ int x1, x2, y;
++ int32_t orig_vx = vx;
++ int32_t max_vx, max_vy;
++
++ uint32_t *src;
++ uint16_t *dst;
++ int srcStride, dstStride;
++ PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
++ /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be
++ * transformed from destination space to source space */
++ PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint32_t, srcStride, srcFirstLine, 1);
++
++ max_vx = pSrc->bits.width << 16;
++ max_vy = pSrc->bits.height << 16;
++
++ while (orig_vx < 0) orig_vx += max_vx;
++ while (vy < 0) vy += max_vy;
++ while (orig_vx >= max_vx) orig_vx -= max_vx;
++ while (vy >= max_vy) vy -= max_vy;
++
++ while (--height >= 0)
++ {
++ dst = dstLine;
++ dstLine += dstStride;
++
++ y = vy >> 16;
++ vy += unit_y;
++ while (vy >= max_vy) vy -= max_vy;
++
++ src = srcFirstLine + srcStride * y;
++
++ w = width;
++ vx = orig_vx;
++ while ((w -= 2) >= 0)
++ {
++ x1 = vx >> 16;
++ vx += unit_x;
++ while (vx >= max_vx) vx -= max_vx;
++ s1 = READ(pSrc, src + x1);
++
++ x2 = vx >> 16;
++ vx += unit_x;
++ while (vx >= max_vx) vx -= max_vx;
++ s2 = READ(pSrc, src + x2);
++
++ a1 = s1 >> 24;
++ a2 = s2 >> 24;
++
++ if (a1 == 0xff)
++ WRITE(pDst, dst, CONVERT_8888_TO_0565(s1));
++ else if (s1) {
++ d = CONVERT_0565_TO_0888(READ(pDst, dst));
++ a1 ^= 0xff;
++ UN8x4_MUL_UN8_ADD_UN8x4_store_r5g6b5(d, a1, s1);
++ WRITE(pDst, dst, d);
++ }
++ dst++;
++
++ if (a2 == 0xff)
++ WRITE(pDst, dst, CONVERT_8888_TO_0565(s2));
++ else if (s2) {
++ d = CONVERT_0565_TO_0888(READ(pDst, dst));
++ a2 ^= 0xff;
++ UN8x4_MUL_UN8_ADD_UN8x4_store_r5g6b5(d, a2, s2);
++ WRITE(pDst, dst, d);
++ }
++ dst++;
++ }
++ if (w & 1) {
++ x1 = vx >> 16;
++ vx += unit_x;
++ while (vx >= max_vx) vx -= max_vx;
++ s1 = READ(pSrc, src + x1);
++
++ a1 = s1 >> 24;
++ if (a1 == 0xff)
++ WRITE(pDst, dst, CONVERT_8888_TO_0565(s1));
++ else if (s1) {
++ d = CONVERT_0565_TO_0888(READ(pDst, dst));
++ a1 ^= 0xff;
++ UN8x4_MUL_UN8_ADD_UN8x4_store_r5g6b5(d, a1, s1);
++ WRITE(pDst, dst, d);
++ }
++ dst++;
++ }
++ }
++}
++
++static void fbCompositeTransformNearestNonrotatedAffineTrivialclipOver_8888x8888 (
++ pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst,
++ int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y)
++{
++ uint32_t *dstLine;
++ uint32_t *srcFirstLine;
++ uint32_t d;
++ uint32_t s1, s2;
++ uint8_t a1, a2;
++ int w;
++ int x1, x2, y;
++ int32_t orig_vx = vx;
++
++ uint32_t *src, *dst;
++ int srcStride, dstStride;
++ PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
++ /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be
++ * transformed from destination space to source space */
++ PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint32_t, srcStride, srcFirstLine, 1);
++
++ while (--height >= 0)
++ {
++ dst = dstLine;
++ dstLine += dstStride;
++
++ y = vy >> 16;
++ vy += unit_y;
++
++ if ((y < 0) || (y >= pSrc->bits.height)) {
++ continue;
++ }
++
++ src = srcFirstLine + srcStride * y;
++
++ w = width;
++ vx = orig_vx;
++ while ((w -= 2) >= 0)
++ {
++ x1 = vx >> 16;
++ vx += unit_x;
++ s1 = READ(pSrc, src + x1);
++
++ x2 = vx >> 16;
++ vx += unit_x;
++ s2 = READ(pSrc, src + x2);
++
++ a1 = s1 >> 24;
++ a2 = s2 >> 24;
++
++ if (a1 == 0xff)
++ WRITE(pDst, dst, s1);
++ else if (s1) {
++ d = READ(pDst, dst);
++ a1 ^= 0xff;
++ UN8x4_MUL_UN8_ADD_UN8x4(d, a1, s1);
++ WRITE(pDst, dst, d);
++ }
++ dst++;
++
++ if (a2 == 0xff)
++ WRITE(pDst, dst, s2);
++ else if (s2) {
++ d = READ(pDst, dst);
++ a2 ^= 0xff;
++ UN8x4_MUL_UN8_ADD_UN8x4(d, a2, s2);
++ WRITE(pDst, dst, d);
++ }
++ dst++;
++ }
++ if (w & 1) {
++ x1 = vx >> 16;
++ vx += unit_x;
++ s1 = READ(pSrc, src + x1);
++
++ a1 = s1 >> 24;
++ if (a1 == 0xff)
++ WRITE(pDst, dst, s1);
++ else if (s1) {
++ d = READ(pDst, dst);
++ a1 ^= 0xff;
++ UN8x4_MUL_UN8_ADD_UN8x4(d, a1, s1);
++ WRITE(pDst, dst, d);
++ }
++ dst++;
++ }
++ }
++}
++
++static void fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatOver_8888x8888 (
++ pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst,
++ int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y)
++{
++ uint32_t *dstLine;
++ uint32_t *srcFirstLine;
++ uint32_t d;
++ uint32_t s1, s2;
++ uint8_t a1, a2;
++ int w;
++ int x1, x2, y;
++ int32_t orig_vx = vx;
++ int32_t max_vx, max_vy;
++
++ uint32_t *src, *dst;
++ int srcStride, dstStride;
++ PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
++ /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be
++ * transformed from destination space to source space */
++ PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint32_t, srcStride, srcFirstLine, 1);
++
++ max_vx = pSrc->bits.width << 16;
++ max_vy = pSrc->bits.height << 16;
++
++ while (orig_vx < 0) orig_vx += max_vx;
++ while (vy < 0) vy += max_vy;
++ while (orig_vx >= max_vx) orig_vx -= max_vx;
++ while (vy >= max_vy) vy -= max_vy;
++
++ while (--height >= 0)
++ {
++ dst = dstLine;
++ dstLine += dstStride;
++
++ y = vy >> 16;
++ vy += unit_y;
++ while (vy >= max_vy) vy -= max_vy;
++
++ src = srcFirstLine + srcStride * y;
++
++ w = width;
++ vx = orig_vx;
++ while ((w -= 2) >= 0)
++ {
++ x1 = vx >> 16;
++ vx += unit_x;
++ while (vx >= max_vx) vx -= max_vx;
++ s1 = READ(pSrc, src + x1);
++
++ x2 = vx >> 16;
++ vx += unit_x;
++ while (vx >= max_vx) vx -= max_vx;
++ s2 = READ(pSrc, src + x2);
++
++ a1 = s1 >> 24;
++ a2 = s2 >> 24;
++
++ if (a1 == 0xff)
++ WRITE(pDst, dst, s1);
++ else if (s1) {
++ d = READ(pDst, dst);
++ a1 ^= 0xff;
++ UN8x4_MUL_UN8_ADD_UN8x4(d, a1, s1);
++ WRITE(pDst, dst, d);
++ }
++ dst++;
++
++ if (a2 == 0xff)
++ WRITE(pDst, dst, s2);
++ else if (s2) {
++ d = READ(pDst, dst);
++ a2 ^= 0xff;
++ UN8x4_MUL_UN8_ADD_UN8x4(d, a2, s2);
++ WRITE(pDst, dst, d);
++ }
++ dst++;
++ }
++ if (w & 1) {
++ x1 = vx >> 16;
++ vx += unit_x;
++ while (vx >= max_vx) vx -= max_vx;
++ s1 = READ(pSrc, src + x1);
++
++ a1 = s1 >> 24;
++ if (a1 == 0xff)
++ WRITE(pDst, dst, s1);
++ else if (s1) {
++ d = READ(pDst, dst);
++ a1 ^= 0xff;
++ UN8x4_MUL_UN8_ADD_UN8x4(d, a1, s1);
++ WRITE(pDst, dst, d);
++ }
++ dst++;
++ }
++ }
++}
++
++static void fbCompositeTransformNearestNonrotatedAffineTrivialclipSrc_8888x8888 (
++ pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst,
++ int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y)
++{
++ uint32_t *dstLine;
++ uint32_t *srcFirstLine;
++ uint32_t s1, s2;
++ int w;
++ int x1, x2, y;
++ int32_t orig_vx = vx;
++
++ uint32_t *src, *dst;
++ int srcStride, dstStride;
++ PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
++ /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be
++ * transformed from destination space to source space */
++ PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint32_t, srcStride, srcFirstLine, 1);
++
++ while (--height >= 0)
++ {
++ dst = dstLine;
++ dstLine += dstStride;
++
++ y = vy >> 16;
++ vy += unit_y;
++
++ if ((y < 0) || (y >= pSrc->bits.height)) {
++ memset(dst, 0, width * sizeof(*dst));
++ continue;
++ }
++
++ src = srcFirstLine + srcStride * y;
++
++ w = width;
++ vx = orig_vx;
++ while ((w -= 2) >= 0)
++ {
++ x1 = vx >> 16;
++ vx += unit_x;
++ s1 = READ(pSrc, src + x1);
++
++ x2 = vx >> 16;
++ vx += unit_x;
++ s2 = READ(pSrc, src + x2);
++
++ WRITE(pDst, dst, s1);
++ dst++;
++ WRITE(pDst, dst, s2);
++ dst++;
++ }
++ if (w & 1) {
++ x1 = vx >> 16;
++ vx += unit_x;
++ s1 = READ(pSrc, src + x1);
++ WRITE(pDst, dst, s1);
++ dst++;
++ }
++ }
++}
++
++static void fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatSrc_8888x8888 (
++ pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst,
++ int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y)
++{
++ uint32_t *dstLine;
++ uint32_t *srcFirstLine;
++ uint32_t s1, s2;
++ int w;
++ int x1, x2, y;
++ int32_t orig_vx = vx;
++ int32_t max_vx, max_vy;
++
++ uint32_t *src, *dst;
++ int srcStride, dstStride;
++ PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
++ /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be
++ * transformed from destination space to source space */
++ PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint32_t, srcStride, srcFirstLine, 1);
++
++ max_vx = pSrc->bits.width << 16;
++ max_vy = pSrc->bits.height << 16;
++
++ while (orig_vx < 0) orig_vx += max_vx;
++ while (vy < 0) vy += max_vy;
++ while (orig_vx >= max_vx) orig_vx -= max_vx;
++ while (vy >= max_vy) vy -= max_vy;
++
++ while (--height >= 0)
++ {
++ dst = dstLine;
++ dstLine += dstStride;
++
++ y = vy >> 16;
++ vy += unit_y;
++ while (vy >= max_vy) vy -= max_vy;
++
++ src = srcFirstLine + srcStride * y;
++
++ w = width;
++ vx = orig_vx;
++ while ((w -= 2) >= 0)
++ {
++ x1 = vx >> 16;
++ vx += unit_x;
++ while (vx >= max_vx) vx -= max_vx;
++ s1 = READ(pSrc, src + x1);
++
++ x2 = vx >> 16;
++ vx += unit_x;
++ while (vx >= max_vx) vx -= max_vx;
++ s2 = READ(pSrc, src + x2);
++
++ WRITE(pDst, dst, s1);
++ dst++;
++ WRITE(pDst, dst, s2);
++ dst++;
++ }
++ if (w & 1) {
++ x1 = vx >> 16;
++ vx += unit_x;
++ while (vx >= max_vx) vx -= max_vx;
++ s1 = READ(pSrc, src + x1);
++
++ WRITE(pDst, dst, s1);
++ dst++;
++ }
++ }
++}
++
++static void fbCompositeTransformNearestNonrotatedAffineTrivialclipSrc_0565x0565 (
++ pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst,
++ int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y)
++{
++ uint16_t *dstLine;
++ uint16_t *srcFirstLine;
++ uint16_t s1, s2;
++ int w;
++ int x1, x2, y;
++ int32_t orig_vx = vx;
++
++ uint16_t *src, *dst;
++ int srcStride, dstStride;
++ PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
++ /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be
++ * transformed from destination space to source space */
++ PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint16_t, srcStride, srcFirstLine, 1);
++
++ while (--height >= 0)
++ {
++ dst = dstLine;
++ dstLine += dstStride;
++
++ y = vy >> 16;
++ vy += unit_y;
++
++ if ((y < 0) || (y >= pSrc->bits.height)) {
++ memset(dst, 0, width * sizeof(*dst));
++ continue;
++ }
++
++ src = srcFirstLine + srcStride * y;
++
++ w = width;
++ vx = orig_vx;
++ while ((w -= 2) >= 0)
++ {
++ x1 = vx >> 16;
++ vx += unit_x;
++ s1 = READ(pSrc, src + x1);
++
++ x2 = vx >> 16;
++ vx += unit_x;
++ s2 = READ(pSrc, src + x2);
++
++ WRITE(pDst, dst, s1);
++ dst++;
++ WRITE(pDst, dst, s2);
++ dst++;
++ }
++ if (w & 1) {
++ x1 = vx >> 16;
++ vx += unit_x;
++ s1 = READ(pSrc, src + x1);
++ WRITE(pDst, dst, s1);
++ dst++;
++ }
++ }
++}
++
++static void fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatSrc_0565x0565 (
++ pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst,
++ int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y)
++{
++ uint16_t *dstLine;
++ uint16_t *srcFirstLine;
++ uint16_t s1, s2;
++ int w;
++ int x1, x2, y;
++ int32_t orig_vx = vx;
++ int32_t max_vx, max_vy;
++
++ uint16_t *src, *dst;
++ int srcStride, dstStride;
++ PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
++ /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be
++ * transformed from destination space to source space */
++ PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint16_t, srcStride, srcFirstLine, 1);
++
++ max_vx = pSrc->bits.width << 16;
++ max_vy = pSrc->bits.height << 16;
++
++ while (orig_vx < 0) orig_vx += max_vx;
++ while (vy < 0) vy += max_vy;
++ while (orig_vx >= max_vx) orig_vx -= max_vx;
++ while (vy >= max_vy) vy -= max_vy;
++
++ while (--height >= 0)
++ {
++ dst = dstLine;
++ dstLine += dstStride;
++
++ y = vy >> 16;
++ vy += unit_y;
++ while (vy >= max_vy) vy -= max_vy;
++
++ src = srcFirstLine + srcStride * y;
++
++ w = width;
++ vx = orig_vx;
++ while ((w -= 2) >= 0)
++ {
++ x1 = vx >> 16;
++ vx += unit_x;
++ while (vx >= max_vx) vx -= max_vx;
++ s1 = READ(pSrc, src + x1);
++
++ x2 = vx >> 16;
++ vx += unit_x;
++ while (vx >= max_vx) vx -= max_vx;
++ s2 = READ(pSrc, src + x2);
++
++ WRITE(pDst, dst, s1);
++ dst++;
++ WRITE(pDst, dst, s2);
++ dst++;
++ }
++ if (w & 1) {
++ x1 = vx >> 16;
++ vx += unit_x;
++ while (vx >= max_vx) vx -= max_vx;
++ s1 = READ(pSrc, src + x1);
++
++ WRITE(pDst, dst, s1);
++ dst++;
++ }
++ }
++}
++
++static void fbCompositeTransformNearestNonrotatedAffineTrivialclipSrc_8888x0565 (
++ pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst,
++ int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y)
++{
++ uint16_t *dstLine;
++ uint32_t *srcFirstLine;
++ uint32_t s1, s2;
++ int w;
++ int x1, x2, y;
++ int32_t orig_vx = vx;
++
++ uint32_t *src;
++ uint16_t *dst;
++ int srcStride, dstStride;
++ PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
++ /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be
++ * transformed from destination space to source space */
++ PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint32_t, srcStride, srcFirstLine, 1);
++
++ while (--height >= 0)
++ {
++ dst = dstLine;
++ dstLine += dstStride;
++
++ y = vy >> 16;
++ vy += unit_y;
++
++ if ((y < 0) || (y >= pSrc->bits.height)) {
++ memset(dst, 0, width * sizeof(*dst));
++ continue;
++ }
++
++ src = srcFirstLine + srcStride * y;
++
++ w = width;
++ vx = orig_vx;
++ while ((w -= 2) >= 0)
++ {
++ x1 = vx >> 16;
++ vx += unit_x;
++ s1 = READ(pSrc, src + x1);
++
++ x2 = vx >> 16;
++ vx += unit_x;
++ s2 = READ(pSrc, src + x2);
++
++ WRITE(pDst, dst, CONVERT_8888_TO_0565(s1));
++ dst++;
++ WRITE(pDst, dst, CONVERT_8888_TO_0565(s2));
++ dst++;
++ }
++ if (w & 1) {
++ x1 = vx >> 16;
++ vx += unit_x;
++ s1 = READ(pSrc, src + x1);
++ WRITE(pDst, dst, CONVERT_8888_TO_0565(s1));
++ dst++;
++ }
++ }
++}
++
++static void fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatSrc_8888x0565 (
++ pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst,
++ int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y)
++{
++ uint16_t *dstLine;
++ uint32_t *srcFirstLine;
++ uint32_t s1, s2;
++ int w;
++ int x1, x2, y;
++ int32_t orig_vx = vx;
++ int32_t max_vx, max_vy;
++
++ uint32_t *src;
++ uint16_t *dst;
++ int srcStride, dstStride;
++ PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
++ /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be
++ * transformed from destination space to source space */
++ PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint32_t, srcStride, srcFirstLine, 1);
++
++ max_vx = pSrc->bits.width << 16;
++ max_vy = pSrc->bits.height << 16;
++
++ while (orig_vx < 0) orig_vx += max_vx;
++ while (vy < 0) vy += max_vy;
++ while (orig_vx >= max_vx) orig_vx -= max_vx;
++ while (vy >= max_vy) vy -= max_vy;
++
++ while (--height >= 0)
++ {
++ dst = dstLine;
++ dstLine += dstStride;
++
++ y = vy >> 16;
++ vy += unit_y;
++ while (vy >= max_vy) vy -= max_vy;
++
++ src = srcFirstLine + srcStride * y;
++
++ w = width;
++ vx = orig_vx;
++ while ((w -= 2) >= 0)
++ {
++ x1 = vx >> 16;
++ vx += unit_x;
++ while (vx >= max_vx) vx -= max_vx;
++ s1 = READ(pSrc, src + x1);
++
++ x2 = vx >> 16;
++ vx += unit_x;
++ while (vx >= max_vx) vx -= max_vx;
++ s2 = READ(pSrc, src + x2);
++
++ WRITE(pDst, dst, CONVERT_8888_TO_0565(s1));
++ dst++;
++ WRITE(pDst, dst, CONVERT_8888_TO_0565(s2));
++ dst++;
++ }
++ if (w & 1) {
++ x1 = vx >> 16;
++ vx += unit_x;
++ while (vx >= max_vx) vx -= max_vx;
++ s1 = READ(pSrc, src + x1);
++
++ WRITE(pDst, dst, CONVERT_8888_TO_0565(s1));
++ dst++;
++ }
++ }
++}
++
++/*
++ * Check if the clipping boundary is crossed on horizontal scaling
++ */
++static inline pixman_bool_t
++fbTransformVerifyHorizontalClipping(pixman_image_t *pict, int width, int32_t vx, int32_t unit_x)
++{
++ while (--width >= 0) {
++ int x = vx >> 16;
++ if ((x < 0) || (x >= pict->bits.width)) return 1;
++ vx += unit_x;
++ }
++ return 0;
++}
++
++/*
++ * Check if the clipping boundary is crossed on vertical scaling
++ */
++static inline pixman_bool_t
++fbTransformVerifyVerticalClipping(pixman_image_t *pict, int height, int32_t vy, int32_t unit_y)
++{
++ while (--height >= 0) {
++ int y = vy >> 16;
++ if ((y < 0) || (y >= pict->bits.height)) return 1;
++ vy += unit_y;
++ }
++ return 0;
++}
++
++/*
++ * Easy case of transform without rotation or complex clipping
++ * Returns 1 in the case if it was able to handle this operation and 0 otherwise
++ */
++static pixman_bool_t
++fbCompositeTransformNonrotatedAffineTrivialclip (
++ pixman_op_t op,
++ pixman_image_t *pSrc,
++ pixman_image_t *pMask,
++ pixman_image_t *pDst,
++ int16_t xSrc,
++ int16_t ySrc,
++ int16_t xMask,
++ int16_t yMask,
++ int16_t xDst,
++ int16_t yDst,
++ uint16_t width,
++ uint16_t height)
++{
++ pixman_vector_t v, unit;
++ int skipdst_x = 0, skipdst_y = 0;
++
++ /* Handle destination clipping */
++ if (xDst < pDst->common.clip_region.extents.x1) {
++ skipdst_x = pDst->common.clip_region.extents.x1 - xDst;
++ if (skipdst_x >= (int)width)
++ return 1;
++ xDst = pDst->common.clip_region.extents.x1;
++ width -= skipdst_x;
++ }
++
++ if (yDst < pDst->common.clip_region.extents.y1) {
++ skipdst_y = pDst->common.clip_region.extents.y1 - yDst;
++ if (skipdst_y >= (int)height)
++ return 1;
++ yDst = pDst->common.clip_region.extents.y1;
++ height -= skipdst_y;
++ }
++
++ if (xDst >= pDst->common.clip_region.extents.x2 ||
++ yDst >= pDst->common.clip_region.extents.y2)
++ {
++ return 1;
++ }
++
++ if (xDst + width > pDst->common.clip_region.extents.x2)
++ width = pDst->common.clip_region.extents.x2 - xDst;
++ if (yDst + height > pDst->common.clip_region.extents.y2)
++ height = pDst->common.clip_region.extents.y2 - yDst;
++
++ /* reference point is the center of the pixel */
++ v.vector[0] = pixman_int_to_fixed(xSrc) + pixman_fixed_1 / 2;
++ v.vector[1] = pixman_int_to_fixed(ySrc) + pixman_fixed_1 / 2;
++ v.vector[2] = pixman_fixed_1;
++
++ if (!pixman_transform_point_3d (pSrc->common.transform, &v))
++ return 0;
++
++ /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */
++ v.vector[0] -= pixman_fixed_e;
++ v.vector[1] -= pixman_fixed_e;
++
++ unit.vector[0] = pSrc->common.transform->matrix[0][0];
++ unit.vector[1] = pSrc->common.transform->matrix[1][1];
++
++ v.vector[0] += unit.vector[0] * skipdst_x;
++ v.vector[1] += unit.vector[1] * skipdst_y;
++
++ /* Check for possible fixed point arithmetics problems/overflows */
++ if (unit.vector[0] <= 0 || unit.vector[1] <= 0)
++ return 0;
++ if (width == 0 || height == 0)
++ return 0;
++ if ((uint32_t)width + (unit.vector[0] >> 16) >= 0x7FFF)
++ return 0;
++ if ((uint32_t)height + (unit.vector[1] >> 16) >= 0x7FFF)
++ return 0;
++
++ /* Horizontal source clipping is only supported for NORMAL repeat */
++ if (pSrc->common.repeat != PIXMAN_REPEAT_NORMAL
++ && fbTransformVerifyHorizontalClipping(pSrc, width, v.vector[0], unit.vector[0])) {
++ return 0;
++ }
++
++ /* Vertical source clipping is only supported for NONE and NORMAL repeat */
++ if (pSrc->common.repeat != PIXMAN_REPEAT_NONE && pSrc->common.repeat != PIXMAN_REPEAT_NORMAL
++ && fbTransformVerifyVerticalClipping(pSrc, height, v.vector[1], unit.vector[1])) {
++ return 0;
++ }
++
++ if (op == PIXMAN_OP_OVER && pSrc->bits.format == PIXMAN_a8r8g8b8
++ && (pDst->bits.format == PIXMAN_x8r8g8b8 || pDst->bits.format == PIXMAN_a8r8g8b8))
++ {
++ if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat != PIXMAN_REPEAT_NORMAL) {
++ fbCompositeTransformNearestNonrotatedAffineTrivialclipOver_8888x8888(
++ pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height,
++ v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]);
++ return 1;
++ }
++ if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat == PIXMAN_REPEAT_NORMAL) {
++ fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatOver_8888x8888(
++ pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height,
++ v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]);
++ return 1;
++ }
++ }
++
++ if (op == PIXMAN_OP_SRC && (pSrc->bits.format == PIXMAN_x8r8g8b8 || pSrc->bits.format == PIXMAN_a8r8g8b8)
++ && (pDst->bits.format == PIXMAN_x8r8g8b8 || pDst->bits.format == pSrc->bits.format))
++ {
++ if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat != PIXMAN_REPEAT_NORMAL) {
++ fbCompositeTransformNearestNonrotatedAffineTrivialclipSrc_8888x8888(
++ pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height,
++ v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]);
++ return 1;
++ }
++ if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat == PIXMAN_REPEAT_NORMAL) {
++ fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatSrc_8888x8888(
++ pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height,
++ v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]);
++ return 1;
++ }
++ }
++
++ if (op == PIXMAN_OP_OVER && pSrc->bits.format == PIXMAN_a8r8g8b8 && pDst->bits.format == PIXMAN_r5g6b5)
++ {
++ if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat != PIXMAN_REPEAT_NORMAL) {
++ fbCompositeTransformNearestNonrotatedAffineTrivialclipOver_8888x0565(
++ pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height,
++ v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]);
++ return 1;
++ }
++ if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat == PIXMAN_REPEAT_NORMAL) {
++ fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatOver_8888x0565(
++ pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height,
++ v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]);
++ return 1;
++ }
++ }
++
++ if (op == PIXMAN_OP_SRC && pSrc->bits.format == PIXMAN_r5g6b5 && pDst->bits.format == PIXMAN_r5g6b5)
++ {
++ if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat != PIXMAN_REPEAT_NORMAL) {
++ fbCompositeTransformNearestNonrotatedAffineTrivialclipSrc_0565x0565(
++ pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height,
++ v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]);
++ return 1;
++ }
++ if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat == PIXMAN_REPEAT_NORMAL) {
++ fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatSrc_0565x0565(
++ pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height,
++ v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]);
++ return 1;
++ }
++ }
++
++ if (op == PIXMAN_OP_SRC && (pSrc->bits.format == PIXMAN_x8r8g8b8 || pSrc->bits.format == PIXMAN_a8r8g8b8)
++ && pDst->bits.format == PIXMAN_r5g6b5)
++ {
++ if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat != PIXMAN_REPEAT_NORMAL) {
++ fbCompositeTransformNearestNonrotatedAffineTrivialclipSrc_8888x0565(
++ pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height,
++ v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]);
++ return 1;
++ }
++ if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat == PIXMAN_REPEAT_NORMAL) {
++ fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatSrc_8888x0565(
++ pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height,
++ v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]);
++ return 1;
++ }
++ }
++
++ /* No fastpath scaling implemented for this case */
++ return 0;
++}
++
+ static void
+ fast_path_composite (pixman_implementation_t *imp,
+ pixman_op_t op,
+@@ -1279,6 +2266,30 @@ fast_path_composite (pixman_implementation_t *imp,
+ if (src->type == BITS