From: Siarhei Siamashka <siarhei.siamashka@nokia.com> Date: Fri, 17 Jul 2009 10:22:23 +0000 (+0300) Subject: Fastpath for nearest neighbour scaled compositing operations. X-Git-Url: http://siarhei.siamashka.name/gitweb/?p=pixman.git;a=commitdiff_plain;h=247531c6978725a88fd3706129b9d3e339026f54 Fastpath for nearest neighbour scaled compositing operations. OVER 8888x8888, OVER 8888x0565, SRC 8888x8888, SRC 8888x0565 and SRC 0565x0565 cases are supported. --- diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c index 7f80578..7f3a6ad 100644 --- a/pixman/pixman-fast-path.c +++ b/pixman/pixman-fast-path.c @@ -1261,6 +1261,993 @@ fast_composite_src_scale_nearest (pixman_implementation_t *imp, } } +/* + * Functions, which implement the core inner loops for the nearest neighbour + * scaled fastpath compositing operations. The do not need to do clipping + * checks, also the loops are unrolled to process two pixels per iteration + * for better performance on most CPU architectures (superscalar processors + * can issue several operations simultaneously, other processors can hide + * instructions latencies by pipelining operations). Unrolling more + * does not make much sense because the compiler will start running out + * of spare registers soon. + */ + +#undef READ +#undef WRITE +#define READ(img,x) (*(x)) +#define WRITE(img,ptr,v) ((*(ptr)) = (v)) + +#define UN8x4_MUL_UN8_ADD_UN8x4_store_r5g6b5(x, a, y) do { \ + UN8x4_MUL_UN8_ADD_UN8x4(x, a, y); \ + x = CONVERT_8888_TO_0565(x); \ + } while (0) + +static void fbCompositeTransformNearestNonrotatedAffineTrivialclipOver_8888x0565 ( + pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst, + int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y) +{ + uint16_t *dstLine; + uint32_t *srcFirstLine; + uint32_t d; + uint32_t s1, s2; + uint8_t a1, a2; + int w; + int x1, x2, y; + int32_t orig_vx = vx; + + uint32_t *src; + uint16_t *dst; + int srcStride, dstStride; + PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1); + /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be + * transformed from destination space to source space */ + PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint32_t, srcStride, srcFirstLine, 1); + + while (--height >= 0) + { + dst = dstLine; + dstLine += dstStride; + + y = vy >> 16; + vy += unit_y; + + if ((y < 0) || (y >= pSrc->bits.height)) { + continue; + } + + src = srcFirstLine + srcStride * y; + + w = width; + vx = orig_vx; + while ((w -= 2) >= 0) + { + x1 = vx >> 16; + vx += unit_x; + s1 = READ(pSrc, src + x1); + + x2 = vx >> 16; + vx += unit_x; + s2 = READ(pSrc, src + x2); + + a1 = s1 >> 24; + a2 = s2 >> 24; + + if (a1 == 0xff) + WRITE(pDst, dst, CONVERT_8888_TO_0565(s1)); + else if (s1) { + d = CONVERT_0565_TO_0888(READ(pDst, dst)); + a1 ^= 0xff; + UN8x4_MUL_UN8_ADD_UN8x4_store_r5g6b5(d, a1, s1); + WRITE(pDst, dst, d); + } + dst++; + + if (a2 == 0xff) + WRITE(pDst, dst, CONVERT_8888_TO_0565(s2)); + else if (s2) { + d = CONVERT_0565_TO_0888(READ(pDst, dst)); + a2 ^= 0xff; + UN8x4_MUL_UN8_ADD_UN8x4_store_r5g6b5(d, a2, s2); + WRITE(pDst, dst, d); + } + dst++; + } + if (w & 1) { + x1 = vx >> 16; + vx += unit_x; + s1 = READ(pSrc, src + x1); + + a1 = s1 >> 24; + if (a1 == 0xff) + WRITE(pDst, dst, CONVERT_8888_TO_0565(s1)); + else if (s1) { + d = CONVERT_0565_TO_0888(READ(pDst, dst)); + a1 ^= 0xff; + UN8x4_MUL_UN8_ADD_UN8x4_store_r5g6b5(d, a1, s1); + WRITE(pDst, dst, d); + } + dst++; + } + } +} + +static void fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatOver_8888x0565 ( + pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst, + int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y) +{ + uint16_t *dstLine; + uint32_t *srcFirstLine; + uint32_t d; + uint32_t s1, s2; + uint8_t a1, a2; + int w; + int x1, x2, y; + int32_t orig_vx = vx; + int32_t max_vx, max_vy; + + uint32_t *src; + uint16_t *dst; + int srcStride, dstStride; + PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1); + /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be + * transformed from destination space to source space */ + PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint32_t, srcStride, srcFirstLine, 1); + + max_vx = pSrc->bits.width << 16; + max_vy = pSrc->bits.height << 16; + + while (orig_vx < 0) orig_vx += max_vx; + while (vy < 0) vy += max_vy; + while (orig_vx >= max_vx) orig_vx -= max_vx; + while (vy >= max_vy) vy -= max_vy; + + while (--height >= 0) + { + dst = dstLine; + dstLine += dstStride; + + y = vy >> 16; + vy += unit_y; + while (vy >= max_vy) vy -= max_vy; + + src = srcFirstLine + srcStride * y; + + w = width; + vx = orig_vx; + while ((w -= 2) >= 0) + { + x1 = vx >> 16; + vx += unit_x; + while (vx >= max_vx) vx -= max_vx; + s1 = READ(pSrc, src + x1); + + x2 = vx >> 16; + vx += unit_x; + while (vx >= max_vx) vx -= max_vx; + s2 = READ(pSrc, src + x2); + + a1 = s1 >> 24; + a2 = s2 >> 24; + + if (a1 == 0xff) + WRITE(pDst, dst, CONVERT_8888_TO_0565(s1)); + else if (s1) { + d = CONVERT_0565_TO_0888(READ(pDst, dst)); + a1 ^= 0xff; + UN8x4_MUL_UN8_ADD_UN8x4_store_r5g6b5(d, a1, s1); + WRITE(pDst, dst, d); + } + dst++; + + if (a2 == 0xff) + WRITE(pDst, dst, CONVERT_8888_TO_0565(s2)); + else if (s2) { + d = CONVERT_0565_TO_0888(READ(pDst, dst)); + a2 ^= 0xff; + UN8x4_MUL_UN8_ADD_UN8x4_store_r5g6b5(d, a2, s2); + WRITE(pDst, dst, d); + } + dst++; + } + if (w & 1) { + x1 = vx >> 16; + vx += unit_x; + while (vx >= max_vx) vx -= max_vx; + s1 = READ(pSrc, src + x1); + + a1 = s1 >> 24; + if (a1 == 0xff) + WRITE(pDst, dst, CONVERT_8888_TO_0565(s1)); + else if (s1) { + d = CONVERT_0565_TO_0888(READ(pDst, dst)); + a1 ^= 0xff; + UN8x4_MUL_UN8_ADD_UN8x4_store_r5g6b5(d, a1, s1); + WRITE(pDst, dst, d); + } + dst++; + } + } +} + +static void fbCompositeTransformNearestNonrotatedAffineTrivialclipOver_8888x8888 ( + pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst, + int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y) +{ + uint32_t *dstLine; + uint32_t *srcFirstLine; + uint32_t d; + uint32_t s1, s2; + uint8_t a1, a2; + int w; + int x1, x2, y; + int32_t orig_vx = vx; + + uint32_t *src, *dst; + int srcStride, dstStride; + PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); + /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be + * transformed from destination space to source space */ + PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint32_t, srcStride, srcFirstLine, 1); + + while (--height >= 0) + { + dst = dstLine; + dstLine += dstStride; + + y = vy >> 16; + vy += unit_y; + + if ((y < 0) || (y >= pSrc->bits.height)) { + continue; + } + + src = srcFirstLine + srcStride * y; + + w = width; + vx = orig_vx; + while ((w -= 2) >= 0) + { + x1 = vx >> 16; + vx += unit_x; + s1 = READ(pSrc, src + x1); + + x2 = vx >> 16; + vx += unit_x; + s2 = READ(pSrc, src + x2); + + a1 = s1 >> 24; + a2 = s2 >> 24; + + if (a1 == 0xff) + WRITE(pDst, dst, s1); + else if (s1) { + d = READ(pDst, dst); + a1 ^= 0xff; + UN8x4_MUL_UN8_ADD_UN8x4(d, a1, s1); + WRITE(pDst, dst, d); + } + dst++; + + if (a2 == 0xff) + WRITE(pDst, dst, s2); + else if (s2) { + d = READ(pDst, dst); + a2 ^= 0xff; + UN8x4_MUL_UN8_ADD_UN8x4(d, a2, s2); + WRITE(pDst, dst, d); + } + dst++; + } + if (w & 1) { + x1 = vx >> 16; + vx += unit_x; + s1 = READ(pSrc, src + x1); + + a1 = s1 >> 24; + if (a1 == 0xff) + WRITE(pDst, dst, s1); + else if (s1) { + d = READ(pDst, dst); + a1 ^= 0xff; + UN8x4_MUL_UN8_ADD_UN8x4(d, a1, s1); + WRITE(pDst, dst, d); + } + dst++; + } + } +} + +static void fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatOver_8888x8888 ( + pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst, + int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y) +{ + uint32_t *dstLine; + uint32_t *srcFirstLine; + uint32_t d; + uint32_t s1, s2; + uint8_t a1, a2; + int w; + int x1, x2, y; + int32_t orig_vx = vx; + int32_t max_vx, max_vy; + + uint32_t *src, *dst; + int srcStride, dstStride; + PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); + /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be + * transformed from destination space to source space */ + PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint32_t, srcStride, srcFirstLine, 1); + + max_vx = pSrc->bits.width << 16; + max_vy = pSrc->bits.height << 16; + + while (orig_vx < 0) orig_vx += max_vx; + while (vy < 0) vy += max_vy; + while (orig_vx >= max_vx) orig_vx -= max_vx; + while (vy >= max_vy) vy -= max_vy; + + while (--height >= 0) + { + dst = dstLine; + dstLine += dstStride; + + y = vy >> 16; + vy += unit_y; + while (vy >= max_vy) vy -= max_vy; + + src = srcFirstLine + srcStride * y; + + w = width; + vx = orig_vx; + while ((w -= 2) >= 0) + { + x1 = vx >> 16; + vx += unit_x; + while (vx >= max_vx) vx -= max_vx; + s1 = READ(pSrc, src + x1); + + x2 = vx >> 16; + vx += unit_x; + while (vx >= max_vx) vx -= max_vx; + s2 = READ(pSrc, src + x2); + + a1 = s1 >> 24; + a2 = s2 >> 24; + + if (a1 == 0xff) + WRITE(pDst, dst, s1); + else if (s1) { + d = READ(pDst, dst); + a1 ^= 0xff; + UN8x4_MUL_UN8_ADD_UN8x4(d, a1, s1); + WRITE(pDst, dst, d); + } + dst++; + + if (a2 == 0xff) + WRITE(pDst, dst, s2); + else if (s2) { + d = READ(pDst, dst); + a2 ^= 0xff; + UN8x4_MUL_UN8_ADD_UN8x4(d, a2, s2); + WRITE(pDst, dst, d); + } + dst++; + } + if (w & 1) { + x1 = vx >> 16; + vx += unit_x; + while (vx >= max_vx) vx -= max_vx; + s1 = READ(pSrc, src + x1); + + a1 = s1 >> 24; + if (a1 == 0xff) + WRITE(pDst, dst, s1); + else if (s1) { + d = READ(pDst, dst); + a1 ^= 0xff; + UN8x4_MUL_UN8_ADD_UN8x4(d, a1, s1); + WRITE(pDst, dst, d); + } + dst++; + } + } +} + +static void fbCompositeTransformNearestNonrotatedAffineTrivialclipSrc_8888x8888 ( + pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst, + int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y) +{ + uint32_t *dstLine; + uint32_t *srcFirstLine; + uint32_t s1, s2; + int w; + int x1, x2, y; + int32_t orig_vx = vx; + + uint32_t *src, *dst; + int srcStride, dstStride; + PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); + /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be + * transformed from destination space to source space */ + PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint32_t, srcStride, srcFirstLine, 1); + + while (--height >= 0) + { + dst = dstLine; + dstLine += dstStride; + + y = vy >> 16; + vy += unit_y; + + if ((y < 0) || (y >= pSrc->bits.height)) { + memset(dst, 0, width * sizeof(*dst)); + continue; + } + + src = srcFirstLine + srcStride * y; + + w = width; + vx = orig_vx; + while ((w -= 2) >= 0) + { + x1 = vx >> 16; + vx += unit_x; + s1 = READ(pSrc, src + x1); + + x2 = vx >> 16; + vx += unit_x; + s2 = READ(pSrc, src + x2); + + WRITE(pDst, dst, s1); + dst++; + WRITE(pDst, dst, s2); + dst++; + } + if (w & 1) { + x1 = vx >> 16; + vx += unit_x; + s1 = READ(pSrc, src + x1); + WRITE(pDst, dst, s1); + dst++; + } + } +} + +static void fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatSrc_8888x8888 ( + pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst, + int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y) +{ + uint32_t *dstLine; + uint32_t *srcFirstLine; + uint32_t s1, s2; + int w; + int x1, x2, y; + int32_t orig_vx = vx; + int32_t max_vx, max_vy; + + uint32_t *src, *dst; + int srcStride, dstStride; + PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); + /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be + * transformed from destination space to source space */ + PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint32_t, srcStride, srcFirstLine, 1); + + max_vx = pSrc->bits.width << 16; + max_vy = pSrc->bits.height << 16; + + while (orig_vx < 0) orig_vx += max_vx; + while (vy < 0) vy += max_vy; + while (orig_vx >= max_vx) orig_vx -= max_vx; + while (vy >= max_vy) vy -= max_vy; + + while (--height >= 0) + { + dst = dstLine; + dstLine += dstStride; + + y = vy >> 16; + vy += unit_y; + while (vy >= max_vy) vy -= max_vy; + + src = srcFirstLine + srcStride * y; + + w = width; + vx = orig_vx; + while ((w -= 2) >= 0) + { + x1 = vx >> 16; + vx += unit_x; + while (vx >= max_vx) vx -= max_vx; + s1 = READ(pSrc, src + x1); + + x2 = vx >> 16; + vx += unit_x; + while (vx >= max_vx) vx -= max_vx; + s2 = READ(pSrc, src + x2); + + WRITE(pDst, dst, s1); + dst++; + WRITE(pDst, dst, s2); + dst++; + } + if (w & 1) { + x1 = vx >> 16; + vx += unit_x; + while (vx >= max_vx) vx -= max_vx; + s1 = READ(pSrc, src + x1); + + WRITE(pDst, dst, s1); + dst++; + } + } +} + +static void fbCompositeTransformNearestNonrotatedAffineTrivialclipSrc_0565x0565 ( + pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst, + int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y) +{ + uint16_t *dstLine; + uint16_t *srcFirstLine; + uint16_t s1, s2; + int w; + int x1, x2, y; + int32_t orig_vx = vx; + + uint16_t *src, *dst; + int srcStride, dstStride; + PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1); + /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be + * transformed from destination space to source space */ + PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint16_t, srcStride, srcFirstLine, 1); + + while (--height >= 0) + { + dst = dstLine; + dstLine += dstStride; + + y = vy >> 16; + vy += unit_y; + + if ((y < 0) || (y >= pSrc->bits.height)) { + memset(dst, 0, width * sizeof(*dst)); + continue; + } + + src = srcFirstLine + srcStride * y; + + w = width; + vx = orig_vx; + while ((w -= 2) >= 0) + { + x1 = vx >> 16; + vx += unit_x; + s1 = READ(pSrc, src + x1); + + x2 = vx >> 16; + vx += unit_x; + s2 = READ(pSrc, src + x2); + + WRITE(pDst, dst, s1); + dst++; + WRITE(pDst, dst, s2); + dst++; + } + if (w & 1) { + x1 = vx >> 16; + vx += unit_x; + s1 = READ(pSrc, src + x1); + WRITE(pDst, dst, s1); + dst++; + } + } +} + +static void fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatSrc_0565x0565 ( + pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst, + int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y) +{ + uint16_t *dstLine; + uint16_t *srcFirstLine; + uint16_t s1, s2; + int w; + int x1, x2, y; + int32_t orig_vx = vx; + int32_t max_vx, max_vy; + + uint16_t *src, *dst; + int srcStride, dstStride; + PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1); + /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be + * transformed from destination space to source space */ + PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint16_t, srcStride, srcFirstLine, 1); + + max_vx = pSrc->bits.width << 16; + max_vy = pSrc->bits.height << 16; + + while (orig_vx < 0) orig_vx += max_vx; + while (vy < 0) vy += max_vy; + while (orig_vx >= max_vx) orig_vx -= max_vx; + while (vy >= max_vy) vy -= max_vy; + + while (--height >= 0) + { + dst = dstLine; + dstLine += dstStride; + + y = vy >> 16; + vy += unit_y; + while (vy >= max_vy) vy -= max_vy; + + src = srcFirstLine + srcStride * y; + + w = width; + vx = orig_vx; + while ((w -= 2) >= 0) + { + x1 = vx >> 16; + vx += unit_x; + while (vx >= max_vx) vx -= max_vx; + s1 = READ(pSrc, src + x1); + + x2 = vx >> 16; + vx += unit_x; + while (vx >= max_vx) vx -= max_vx; + s2 = READ(pSrc, src + x2); + + WRITE(pDst, dst, s1); + dst++; + WRITE(pDst, dst, s2); + dst++; + } + if (w & 1) { + x1 = vx >> 16; + vx += unit_x; + while (vx >= max_vx) vx -= max_vx; + s1 = READ(pSrc, src + x1); + + WRITE(pDst, dst, s1); + dst++; + } + } +} + +static void fbCompositeTransformNearestNonrotatedAffineTrivialclipSrc_8888x0565 ( + pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst, + int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y) +{ + uint16_t *dstLine; + uint32_t *srcFirstLine; + uint32_t s1, s2; + int w; + int x1, x2, y; + int32_t orig_vx = vx; + + uint32_t *src; + uint16_t *dst; + int srcStride, dstStride; + PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1); + /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be + * transformed from destination space to source space */ + PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint32_t, srcStride, srcFirstLine, 1); + + while (--height >= 0) + { + dst = dstLine; + dstLine += dstStride; + + y = vy >> 16; + vy += unit_y; + + if ((y < 0) || (y >= pSrc->bits.height)) { + memset(dst, 0, width * sizeof(*dst)); + continue; + } + + src = srcFirstLine + srcStride * y; + + w = width; + vx = orig_vx; + while ((w -= 2) >= 0) + { + x1 = vx >> 16; + vx += unit_x; + s1 = READ(pSrc, src + x1); + + x2 = vx >> 16; + vx += unit_x; + s2 = READ(pSrc, src + x2); + + WRITE(pDst, dst, CONVERT_8888_TO_0565(s1)); + dst++; + WRITE(pDst, dst, CONVERT_8888_TO_0565(s2)); + dst++; + } + if (w & 1) { + x1 = vx >> 16; + vx += unit_x; + s1 = READ(pSrc, src + x1); + WRITE(pDst, dst, CONVERT_8888_TO_0565(s1)); + dst++; + } + } +} + +static void fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatSrc_8888x0565 ( + pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst, + int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y) +{ + uint16_t *dstLine; + uint32_t *srcFirstLine; + uint32_t s1, s2; + int w; + int x1, x2, y; + int32_t orig_vx = vx; + int32_t max_vx, max_vy; + + uint32_t *src; + uint16_t *dst; + int srcStride, dstStride; + PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1); + /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be + * transformed from destination space to source space */ + PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint32_t, srcStride, srcFirstLine, 1); + + max_vx = pSrc->bits.width << 16; + max_vy = pSrc->bits.height << 16; + + while (orig_vx < 0) orig_vx += max_vx; + while (vy < 0) vy += max_vy; + while (orig_vx >= max_vx) orig_vx -= max_vx; + while (vy >= max_vy) vy -= max_vy; + + while (--height >= 0) + { + dst = dstLine; + dstLine += dstStride; + + y = vy >> 16; + vy += unit_y; + while (vy >= max_vy) vy -= max_vy; + + src = srcFirstLine + srcStride * y; + + w = width; + vx = orig_vx; + while ((w -= 2) >= 0) + { + x1 = vx >> 16; + vx += unit_x; + while (vx >= max_vx) vx -= max_vx; + s1 = READ(pSrc, src + x1); + + x2 = vx >> 16; + vx += unit_x; + while (vx >= max_vx) vx -= max_vx; + s2 = READ(pSrc, src + x2); + + WRITE(pDst, dst, CONVERT_8888_TO_0565(s1)); + dst++; + WRITE(pDst, dst, CONVERT_8888_TO_0565(s2)); + dst++; + } + if (w & 1) { + x1 = vx >> 16; + vx += unit_x; + while (vx >= max_vx) vx -= max_vx; + s1 = READ(pSrc, src + x1); + + WRITE(pDst, dst, CONVERT_8888_TO_0565(s1)); + dst++; + } + } +} + +/* + * Check if the clipping boundary is crossed on horizontal scaling + */ +static inline pixman_bool_t +fbTransformVerifyHorizontalClipping(pixman_image_t *pict, int width, int32_t vx, int32_t unit_x) +{ + while (--width >= 0) { + int x = vx >> 16; + if ((x < 0) || (x >= pict->bits.width)) return 1; + vx += unit_x; + } + return 0; +} + +/* + * Check if the clipping boundary is crossed on vertical scaling + */ +static inline pixman_bool_t +fbTransformVerifyVerticalClipping(pixman_image_t *pict, int height, int32_t vy, int32_t unit_y) +{ + while (--height >= 0) { + int y = vy >> 16; + if ((y < 0) || (y >= pict->bits.height)) return 1; + vy += unit_y; + } + return 0; +} + +/* + * Easy case of transform without rotation or complex clipping + * Returns 1 in the case if it was able to handle this operation and 0 otherwise + */ +static pixman_bool_t +fbCompositeTransformNonrotatedAffineTrivialclip ( + pixman_op_t op, + pixman_image_t *pSrc, + pixman_image_t *pMask, + pixman_image_t *pDst, + int16_t xSrc, + int16_t ySrc, + int16_t xMask, + int16_t yMask, + int16_t xDst, + int16_t yDst, + uint16_t width, + uint16_t height) +{ + pixman_vector_t v, unit; + int skipdst_x = 0, skipdst_y = 0; + + /* Handle destination clipping */ + if (xDst < pDst->common.clip_region.extents.x1) { + skipdst_x = pDst->common.clip_region.extents.x1 - xDst; + if (skipdst_x >= (int)width) + return 1; + xDst = pDst->common.clip_region.extents.x1; + width -= skipdst_x; + } + + if (yDst < pDst->common.clip_region.extents.y1) { + skipdst_y = pDst->common.clip_region.extents.y1 - yDst; + if (skipdst_y >= (int)height) + return 1; + yDst = pDst->common.clip_region.extents.y1; + height -= skipdst_y; + } + + if (xDst >= pDst->common.clip_region.extents.x2 || + yDst >= pDst->common.clip_region.extents.y2) + { + return 1; + } + + if (xDst + width > pDst->common.clip_region.extents.x2) + width = pDst->common.clip_region.extents.x2 - xDst; + if (yDst + height > pDst->common.clip_region.extents.y2) + height = pDst->common.clip_region.extents.y2 - yDst; + + /* reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed(xSrc) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed(ySrc) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (!pixman_transform_point_3d (pSrc->common.transform, &v)) + return 0; + + /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ + v.vector[0] -= pixman_fixed_e; + v.vector[1] -= pixman_fixed_e; + + unit.vector[0] = pSrc->common.transform->matrix[0][0]; + unit.vector[1] = pSrc->common.transform->matrix[1][1]; + + v.vector[0] += unit.vector[0] * skipdst_x; + v.vector[1] += unit.vector[1] * skipdst_y; + + /* Check for possible fixed point arithmetics problems/overflows */ + if (unit.vector[0] <= 0 || unit.vector[1] <= 0) + return 0; + if (width == 0 || height == 0) + return 0; + if ((uint32_t)width + (unit.vector[0] >> 16) >= 0x7FFF) + return 0; + if ((uint32_t)height + (unit.vector[1] >> 16) >= 0x7FFF) + return 0; + + /* Horizontal source clipping is only supported for NORMAL repeat */ + if (pSrc->common.repeat != PIXMAN_REPEAT_NORMAL + && fbTransformVerifyHorizontalClipping(pSrc, width, v.vector[0], unit.vector[0])) { + return 0; + } + + /* Vertical source clipping is only supported for NONE and NORMAL repeat */ + if (pSrc->common.repeat != PIXMAN_REPEAT_NONE && pSrc->common.repeat != PIXMAN_REPEAT_NORMAL + && fbTransformVerifyVerticalClipping(pSrc, height, v.vector[1], unit.vector[1])) { + return 0; + } + + if (op == PIXMAN_OP_OVER && pSrc->bits.format == PIXMAN_a8r8g8b8 + && (pDst->bits.format == PIXMAN_x8r8g8b8 || pDst->bits.format == PIXMAN_a8r8g8b8)) + { + if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat != PIXMAN_REPEAT_NORMAL) { + fbCompositeTransformNearestNonrotatedAffineTrivialclipOver_8888x8888( + pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height, + v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]); + return 1; + } + if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat == PIXMAN_REPEAT_NORMAL) { + fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatOver_8888x8888( + pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height, + v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]); + return 1; + } + } + + if (op == PIXMAN_OP_SRC && (pSrc->bits.format == PIXMAN_x8r8g8b8 || pSrc->bits.format == PIXMAN_a8r8g8b8) + && (pDst->bits.format == PIXMAN_x8r8g8b8 || pDst->bits.format == pSrc->bits.format)) + { + if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat != PIXMAN_REPEAT_NORMAL) { + fbCompositeTransformNearestNonrotatedAffineTrivialclipSrc_8888x8888( + pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height, + v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]); + return 1; + } + if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat == PIXMAN_REPEAT_NORMAL) { + fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatSrc_8888x8888( + pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height, + v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]); + return 1; + } + } + + if (op == PIXMAN_OP_OVER && pSrc->bits.format == PIXMAN_a8r8g8b8 && pDst->bits.format == PIXMAN_r5g6b5) + { + if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat != PIXMAN_REPEAT_NORMAL) { + fbCompositeTransformNearestNonrotatedAffineTrivialclipOver_8888x0565( + pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height, + v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]); + return 1; + } + if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat == PIXMAN_REPEAT_NORMAL) { + fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatOver_8888x0565( + pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height, + v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]); + return 1; + } + } + + if (op == PIXMAN_OP_SRC && pSrc->bits.format == PIXMAN_r5g6b5 && pDst->bits.format == PIXMAN_r5g6b5) + { + if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat != PIXMAN_REPEAT_NORMAL) { + fbCompositeTransformNearestNonrotatedAffineTrivialclipSrc_0565x0565( + pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height, + v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]); + return 1; + } + if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat == PIXMAN_REPEAT_NORMAL) { + fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatSrc_0565x0565( + pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height, + v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]); + return 1; + } + } + + if (op == PIXMAN_OP_SRC && (pSrc->bits.format == PIXMAN_x8r8g8b8 || pSrc->bits.format == PIXMAN_a8r8g8b8) + && pDst->bits.format == PIXMAN_r5g6b5) + { + if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat != PIXMAN_REPEAT_NORMAL) { + fbCompositeTransformNearestNonrotatedAffineTrivialclipSrc_8888x0565( + pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height, + v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]); + return 1; + } + if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat == PIXMAN_REPEAT_NORMAL) { + fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatSrc_8888x0565( + pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height, + v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]); + return 1; + } + } + + /* No fastpath scaling implemented for this case */ + return 0; +} + static void fast_path_composite (pixman_implementation_t *imp, pixman_op_t op, @@ -1279,6 +2266,30 @@ fast_path_composite (pixman_implementation_t *imp, if (src->type == BITS && src->common.transform && !mask + && !src->common.alpha_map && !dest->common.alpha_map + && (src->common.filter == PIXMAN_FILTER_NEAREST) + && !src->bits.read_func && !src->bits.write_func + && !dest->bits.read_func && !dest->bits.write_func) + { + /* ensure that the transform matrix only has a scale */ + if (src->common.transform->matrix[0][1] == 0 && + src->common.transform->matrix[1][0] == 0 && + src->common.transform->matrix[2][0] == 0 && + src->common.transform->matrix[2][1] == 0 && + src->common.transform->matrix[2][2] == pixman_fixed_1 && + dest->common.clip_region.data == NULL) + { + if (fbCompositeTransformNonrotatedAffineTrivialclip (op, src, mask, dest, + src_x, src_y, mask_x, mask_y, dest_x, dest_y, width, height)) + { + return; + } + } + } + + if (src->type == BITS + && src->common.transform + && !mask && op == PIXMAN_OP_SRC && !src->common.alpha_map && !dest->common.alpha_map && (src->common.filter == PIXMAN_FILTER_NEAREST)