#include "include/core/SkColor.h"
#include "include/core/SkColorPriv.h"
#include "include/core/SkColorType.h"
#include "include/core/SkPaint.h"
#include "include/core/SkPixmap.h"
#include "include/core/SkRect.h"
#include "include/core/SkTypes.h"
#include "include/private/SkColorData.h"
#include "include/private/base/SkCPUTypes.h"
#include "include/private/base/SkDebug.h"
#include "include/private/base/SkMalloc.h"
#include "include/private/base/SkTo.h"
#include "src/base/SkUtils.h"
#include "src/base/SkVx.h"
#include "src/core/SkBlitMask.h"
#include "src/core/SkBlitRow.h"
#include "src/core/SkCoreBlitters.h"
#include "src/core/SkMask.h"
#include "src/core/SkMemset.h"
#include "src/shaders/SkShaderBase.h"
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <emmintrin.h>
#include "src/core/SkBlitBWMaskTemplate.h"

Macros
#define	SK_R16x5_R32x5_SHIFT (SK_R32_SHIFT - SK_R16_SHIFT - SK_R16_BITS + 5)

#define	SK_G16x5_G32x5_SHIFT (SK_G32_SHIFT - SK_G16_SHIFT - SK_G16_BITS + 5)

#define	SK_B16x5_B32x5_SHIFT (SK_B32_SHIFT - SK_B16_SHIFT - SK_B16_BITS + 5)

#define	SkPackedR16x5ToUnmaskedR32x5_SSE2(x) (_mm_slli_epi32(x, SK_R16x5_R32x5_SHIFT))

#define	SkPackedG16x5ToUnmaskedG32x5_SSE2(x) (_mm_slli_epi32(x, SK_G16x5_G32x5_SHIFT))

#define	SkPackedB16x5ToUnmaskedB32x5_SSE2(x) (_mm_slli_epi32(x, SK_B16x5_B32x5_SHIFT))

#define	solid_8_pixels(mask, dst, color)

#define	SK_BLITBWMASK_NAME SkARGB32_BlitBW

#define	SK_BLITBWMASK_ARGS , SkPMColor color

#define	SK_BLITBWMASK_BLIT8(mask, dst) solid_8_pixels(mask, dst, color)

#define	SK_BLITBWMASK_GETADDR writable_addr32

#define	SK_BLITBWMASK_DEVTYPE uint32_t

#define	blend_8_pixels(mask, dst, sc, dst_scale)

#define	SK_BLITBWMASK_NAME SkARGB32_BlendBW

#define	SK_BLITBWMASK_ARGS , uint32_t sc, unsigned dst_scale

#define	SK_BLITBWMASK_BLIT8(mask, dst) blend_8_pixels(mask, dst, sc, dst_scale)

#define	SK_BLITBWMASK_GETADDR writable_addr32

#define	SK_BLITBWMASK_DEVTYPE uint32_t

Typedefs
using	U32 = skvx::Vec< 4, uint32_t >

using	U8x4 = skvx::Vec< 16, uint8_t >

using	U8 = skvx::Vec< 4, uint8_t >

Functions
static int	upscale_31_to_32 (int value)

static int	blend_32 (int src, int dst, int scale)

static SkPMColor	blend_lcd16 (int srcA, int srcR, int srcG, int srcB, SkPMColor dst, uint16_t mask)

static SkPMColor	blend_lcd16_opaque (int srcR, int srcG, int srcB, SkPMColor dst, uint16_t mask, SkPMColor opaqueDst)

static __m128i	blend_lcd16_sse2 (__m128i &src, __m128i &dst, __m128i &mask, __m128i &srcA)

static __m128i	blend_lcd16_opaque_sse2 (__m128i &src, __m128i &dst, __m128i &mask)

void	blit_row_lcd16 (SkPMColor dst[], const uint16_t mask[], SkColor src, int width, SkPMColor)

void	blit_row_lcd16_opaque (SkPMColor dst[], const uint16_t mask[], SkColor src, int width, SkPMColor opaqueDst)

static bool	blit_color (const SkPixmap &device, const SkMask &mask, const SkIRect &clip, SkColor color)

static void	SkARGB32_Blit32 (const SkPixmap &device, const SkMask &mask, const SkIRect &clip, SkPMColor srcColor)

static void	drive (SkPMColor dst, const SkPMColor src, const uint8_t cov, int n, U8x4(kernel)(U8x4, U8x4, U8x4))

static void	blend_row_A8 (SkPMColor dst, const void mask, const SkPMColor *src, int n)

static void	blend_row_A8_opaque (SkPMColor dst, const void mask, const SkPMColor *src, int n)

static void	blend_row_lcd16 (SkPMColor dst, const void vmask, const SkPMColor *src, int n)

static void	blend_row_LCD16_opaque (SkPMColor dst, const void vmask, const SkPMColor *src, int n)

Macro Definition Documentation

◆ blend_8_pixels

#define blend_8_pixels	(	mask,
		dst,
		sc,
		dst_scale
	)

Value:

    do {                                                                    \
        if (mask & 0x80) { dst[0] = sc + SkAlphaMulQ(dst[0], dst_scale); }  \
        if (mask & 0x40) { dst[1] = sc + SkAlphaMulQ(dst[1], dst_scale); }  \
        if (mask & 0x20) { dst[2] = sc + SkAlphaMulQ(dst[2], dst_scale); }  \
        if (mask & 0x10) { dst[3] = sc + SkAlphaMulQ(dst[3], dst_scale); }  \
        if (mask & 0x08) { dst[4] = sc + SkAlphaMulQ(dst[4], dst_scale); }  \
        if (mask & 0x04) { dst[5] = sc + SkAlphaMulQ(dst[5], dst_scale); }  \
        if (mask & 0x02) { dst[6] = sc + SkAlphaMulQ(dst[6], dst_scale); }  \
        if (mask & 0x01) { dst[7] = sc + SkAlphaMulQ(dst[7], dst_scale); }  \
    } while (0)

Definition at line 1540 of file SkBlitter_ARGB32.cpp.

◆ SK_B16x5_B32x5_SHIFT

#define SK_B16x5_B32x5_SHIFT (SK_B32_SHIFT - SK_B16_SHIFT - SK_B16_BITS + 5)

Definition at line 134 of file SkBlitter_ARGB32.cpp.

◆ SK_BLITBWMASK_ARGS [1/2]

#define SK_BLITBWMASK_ARGS , SkPMColor color

Definition at line 1553 of file SkBlitter_ARGB32.cpp.

◆ SK_BLITBWMASK_ARGS [2/2]

#define SK_BLITBWMASK_ARGS , uint32_t sc, unsigned dst_scale

Definition at line 1553 of file SkBlitter_ARGB32.cpp.

◆ SK_BLITBWMASK_BLIT8 [1/2]

#define SK_BLITBWMASK_BLIT8	(	mask,
		dst
	)	solid_8_pixels(mask, dst, color)

Definition at line 1554 of file SkBlitter_ARGB32.cpp.

◆ SK_BLITBWMASK_BLIT8 [2/2]

#define SK_BLITBWMASK_BLIT8	(	mask,
		dst
	)	blend_8_pixels(mask, dst, sc, dst_scale)

Definition at line 1554 of file SkBlitter_ARGB32.cpp.

◆ SK_BLITBWMASK_DEVTYPE [1/2]

#define SK_BLITBWMASK_DEVTYPE uint32_t

Definition at line 1556 of file SkBlitter_ARGB32.cpp.

◆ SK_BLITBWMASK_DEVTYPE [2/2]

#define SK_BLITBWMASK_DEVTYPE uint32_t

Definition at line 1556 of file SkBlitter_ARGB32.cpp.

◆ SK_BLITBWMASK_GETADDR [1/2]

#define SK_BLITBWMASK_GETADDR writable_addr32

Definition at line 1555 of file SkBlitter_ARGB32.cpp.

◆ SK_BLITBWMASK_GETADDR [2/2]

#define SK_BLITBWMASK_GETADDR writable_addr32

Definition at line 1555 of file SkBlitter_ARGB32.cpp.

◆ SK_BLITBWMASK_NAME [1/2]

#define SK_BLITBWMASK_NAME SkARGB32_BlitBW

Definition at line 1552 of file SkBlitter_ARGB32.cpp.

◆ SK_BLITBWMASK_NAME [2/2]

#define SK_BLITBWMASK_NAME SkARGB32_BlendBW

Definition at line 1552 of file SkBlitter_ARGB32.cpp.

◆ SK_G16x5_G32x5_SHIFT

#define SK_G16x5_G32x5_SHIFT (SK_G32_SHIFT - SK_G16_SHIFT - SK_G16_BITS + 5)

Definition at line 133 of file SkBlitter_ARGB32.cpp.

◆ SK_R16x5_R32x5_SHIFT

#define SK_R16x5_R32x5_SHIFT (SK_R32_SHIFT - SK_R16_SHIFT - SK_R16_BITS + 5)

Definition at line 132 of file SkBlitter_ARGB32.cpp.

◆ SkPackedB16x5ToUnmaskedB32x5_SSE2

#define SkPackedB16x5ToUnmaskedB32x5_SSE2 ( x ) (_mm_slli_epi32(x, SK_B16x5_B32x5_SHIFT))

Definition at line 155 of file SkBlitter_ARGB32.cpp.

◆ SkPackedG16x5ToUnmaskedG32x5_SSE2

#define SkPackedG16x5ToUnmaskedG32x5_SSE2 ( x ) (_mm_slli_epi32(x, SK_G16x5_G32x5_SHIFT))

Definition at line 147 of file SkBlitter_ARGB32.cpp.

◆ SkPackedR16x5ToUnmaskedR32x5_SSE2

#define SkPackedR16x5ToUnmaskedR32x5_SSE2 ( x ) (_mm_slli_epi32(x, SK_R16x5_R32x5_SHIFT))

Definition at line 139 of file SkBlitter_ARGB32.cpp.

◆ solid_8_pixels

#define solid_8_pixels	(	mask,
		dst,
		color
	)

Value:

    do {                                    \
        if (mask & 0x80) dst[0] = color;    \
        if (mask & 0x40) dst[1] = color;    \
        if (mask & 0x20) dst[2] = color;    \
        if (mask & 0x10) dst[3] = color;    \
        if (mask & 0x08) dst[4] = color;    \
        if (mask & 0x04) dst[5] = color;    \
        if (mask & 0x02) dst[6] = color;    \
        if (mask & 0x01) dst[7] = color;    \
    } while (0)

Definition at line 1521 of file SkBlitter_ARGB32.cpp.

Typedef Documentation

◆ U32

using U32 = skvx::Vec< 4, uint32_t>

Definition at line 1834 of file SkBlitter_ARGB32.cpp.

◆ U8

using U8 = skvx::Vec< 4, uint8_t>

Definition at line 1836 of file SkBlitter_ARGB32.cpp.

◆ U8x4

using U8x4 = skvx::Vec<16, uint8_t>

Definition at line 1835 of file SkBlitter_ARGB32.cpp.

Function Documentation

◆ blend_32()

static int blend_32	(	int	src,
		int	dst,
		int	scale
	)

inlinestatic

Definition at line 38 of file SkBlitter_ARGB32.cpp.

                                                        {
    SkASSERT((unsigned)src <= 0xFF);
    SkASSERT((unsigned)dst <= 0xFF);
    SkASSERT((unsigned)scale <= 32);
    return dst + ((src - dst) * scale >> 5);
}

◆ blend_lcd16()

static SkPMColor blend_lcd16	(	int	srcA,
		int	srcR,
		int	srcG,
		int	srcB,
		SkPMColor	dst,
		uint16_t	mask
	)

inlinestatic

Definition at line 45 of file SkBlitter_ARGB32.cpp.

                                                                   {
    if (mask == 0) {
        return dst;
    }
 
    /*  We want all of these in 5bits, hence the shifts in case one of them
     *  (green) is 6bits.
     */
    int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
    int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
    int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
 
    // Now upscale them to 0..32, so we can use blend32
    maskR = upscale_31_to_32(maskR);
    maskG = upscale_31_to_32(maskG);
    maskB = upscale_31_to_32(maskB);
 
    // srcA has been upscaled to 256 before passed into this function
    maskR = maskR * srcA >> 8;
    maskG = maskG * srcA >> 8;
    maskB = maskB * srcA >> 8;
 
    int dstA = SkGetPackedA32(dst);
    int dstR = SkGetPackedR32(dst);
    int dstG = SkGetPackedG32(dst);
    int dstB = SkGetPackedB32(dst);
 
    // Subtract 1 from srcA to bring it back to [0-255] to compare against dstA, alpha needs to
    // use either the min or the max of the LCD coverages. See https:/skbug.com/40037823
    int maskA = (srcA-1) < dstA ? std::min(maskR, std::min(maskG, maskB))
                                : std::max(maskR, std::max(maskG, maskB));
 
    return SkPackARGB32(blend_32(0xFF, dstA, maskA),
                        blend_32(srcR, dstR, maskR),
                        blend_32(srcG, dstG, maskG),
                        blend_32(srcB, dstB, maskB));
}

◆ blend_lcd16_opaque()

static SkPMColor blend_lcd16_opaque	(	int	srcR,
		int	srcG,
		int	srcB,
		SkPMColor	dst,
		uint16_t	mask,
		SkPMColor	opaqueDst
	)

inlinestatic

Definition at line 84 of file SkBlitter_ARGB32.cpp.

                                                                {
    if (mask == 0) {
        return dst;
    }
 
    if (0xFFFF == mask) {
        return opaqueDst;
    }
 
    /*  We want all of these in 5bits, hence the shifts in case one of them
     *  (green) is 6bits.
     */
    int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
    int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
    int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
 
    // Now upscale them to 0..32, so we can use blend32
    maskR = upscale_31_to_32(maskR);
    maskG = upscale_31_to_32(maskG);
    maskB = upscale_31_to_32(maskB);
 
    int dstA = SkGetPackedA32(dst);
    int dstR = SkGetPackedR32(dst);
    int dstG = SkGetPackedG32(dst);
    int dstB = SkGetPackedB32(dst);
 
    // Opaque src alpha always uses the max of the LCD coverages.
    int maskA = std::max(maskR, std::max(maskG, maskB));
 
    // LCD blitting is only supported if the dst is known/required
    // to be opaque
    return SkPackARGB32(blend_32(0xFF, dstA, maskA),
                        blend_32(srcR, dstR, maskR),
                        blend_32(srcG, dstG, maskG),
                        blend_32(srcB, dstB, maskB));
}

◆ blend_lcd16_opaque_sse2()

static __m128i blend_lcd16_opaque_sse2	(	__m128i &	src,
		__m128i &	dst,
		__m128i &	mask
	)

static

Definition at line 262 of file SkBlitter_ARGB32.cpp.

                                                                                      {
        // In the following comments, the components of src, dst and mask are
        // abbreviated as (s)rc, (d)st, and (m)ask. Color components are marked
        // by an R, G, B, or A suffix. Components of one of the four pixels that
        // are processed in parallel are marked with 0, 1, 2, and 3. "d1B", for
        // example is the blue channel of the second destination pixel. Memory
        // layout is shown for an ARGB byte order in a color value.
 
        // src and srcA store 8-bit values interleaved with zeros.
        // src  = (0xFF, 0, sR, 0, sG, 0, sB, 0, 0xFF, 0, sR, 0, sG, 0, sB, 0)
        // mask stores 16-bit values (shown as high and low bytes) interleaved with
        // zeros
        // mask = (m0RGBLo, m0RGBHi, 0, 0, m1RGBLo, m1RGBHi, 0, 0,
        //         m2RGBLo, m2RGBHi, 0, 0, m3RGBLo, m3RGBHi, 0, 0)
 
        // Get the R,G,B of each 16bit mask pixel, we want all of them in 5 bits.
        // r = (0, m0R, 0, 0, 0, m1R, 0, 0, 0, m2R, 0, 0, 0, m3R, 0, 0)
        __m128i r = _mm_and_si128(SkPackedR16x5ToUnmaskedR32x5_SSE2(mask),
                                  _mm_set1_epi32(0x1F << SK_R32_SHIFT));
 
        // g = (0, 0, m0G, 0, 0, 0, m1G, 0, 0, 0, m2G, 0, 0, 0, m3G, 0)
        __m128i g = _mm_and_si128(SkPackedG16x5ToUnmaskedG32x5_SSE2(mask),
                                  _mm_set1_epi32(0x1F << SK_G32_SHIFT));
 
        // b = (0, 0, 0, m0B, 0, 0, 0, m1B, 0, 0, 0, m2B, 0, 0, 0, m3B)
        __m128i b = _mm_and_si128(SkPackedB16x5ToUnmaskedB32x5_SSE2(mask),
                                  _mm_set1_epi32(0x1F << SK_B32_SHIFT));
 
        // a = max(r, g, b) since opaque src alpha uses max of LCD coverages
        __m128i a = _mm_max_epu8(_mm_slli_epi32(r, SK_A32_SHIFT - SK_R32_SHIFT),
                    _mm_max_epu8(_mm_slli_epi32(g, SK_A32_SHIFT - SK_G32_SHIFT),
                                 _mm_slli_epi32(b, SK_A32_SHIFT - SK_B32_SHIFT)));
 
        // Pack the 4 16bit mask pixels into 4 32bit pixels, (p0, p1, p2, p3)
        // Each component (m0R, m0G, etc.) is then a 5-bit value aligned to an
        // 8-bit position
        // mask = (m0A, m0R, m0G, m0B, m1A, m1R, m1G, m1B,
        //         m2A, m2R, m2G, m2B, m3A, m3R, m3G, m3B)
        mask = _mm_or_si128(_mm_or_si128(a, r), _mm_or_si128(g, b));
 
        // Interleave R,G,B into the lower byte of word.
        // i.e. split the sixteen 8-bit values from mask into two sets of eight
        // 16-bit values, padded by zero.
        __m128i maskLo, maskHi;
        // maskLo = (m0A, 0, m0R, 0, m0G, 0, m0B, 0, m1A, 0, m1R, 0, m1G, 0, m1B, 0)
        maskLo = _mm_unpacklo_epi8(mask, _mm_setzero_si128());
        // maskHi = (m2A, 0, m2R, 0, m2G, 0, m2B, 0, m3A, 0, m3R, 0, m3G, 0, m3B, 0)
        maskHi = _mm_unpackhi_epi8(mask, _mm_setzero_si128());
 
        // Upscale from 0..31 to 0..32
        // (allows to replace division by left-shift further down)
        // Left-shift each component by 4 and add the result back to that component,
        // mapping numbers in the range 0..15 to 0..15, and 16..31 to 17..32
        maskLo = _mm_add_epi16(maskLo, _mm_srli_epi16(maskLo, 4));
        maskHi = _mm_add_epi16(maskHi, _mm_srli_epi16(maskHi, 4));
 
        // Interleave R,G,B into the lower byte of the word
        // dstLo = (d0A, 0, d0R, 0, d0G, 0, d0B, 0, d1A, 0, d1R, 0, d1G, 0, d1B, 0)
        __m128i dstLo = _mm_unpacklo_epi8(dst, _mm_setzero_si128());
        // dstLo = (d2A, 0, d2R, 0, d2G, 0, d2B, 0, d3A, 0, d3R, 0, d3G, 0, d3B, 0)
        __m128i dstHi = _mm_unpackhi_epi8(dst, _mm_setzero_si128());
 
        // mask = (src - dst) * mask
        maskLo = _mm_mullo_epi16(maskLo, _mm_sub_epi16(src, dstLo));
        maskHi = _mm_mullo_epi16(maskHi, _mm_sub_epi16(src, dstHi));
 
        // mask = (src - dst) * mask >> 5
        maskLo = _mm_srai_epi16(maskLo, 5);
        maskHi = _mm_srai_epi16(maskHi, 5);
 
        // Add two pixels into result.
        // result = dst + ((src - dst) * mask >> 5)
        __m128i resultLo = _mm_add_epi16(dstLo, maskLo);
        __m128i resultHi = _mm_add_epi16(dstHi, maskHi);
 
        // Merge into one SSE regsiter with sixteen 8-bit values (four pixels),
        // clamping to 255 if necessary.
        return _mm_packus_epi16(resultLo, resultHi);
    }

◆ blend_lcd16_sse2()

static __m128i blend_lcd16_sse2	(	__m128i &	src,
		__m128i &	dst,
		__m128i &	mask,
		__m128i &	srcA
	)

static

Definition at line 160 of file SkBlitter_ARGB32.cpp.

                                                                                              {
        // In the following comments, the components of src, dst and mask are
        // abbreviated as (s)rc, (d)st, and (m)ask. Color components are marked
        // by an R, G, B, or A suffix. Components of one of the four pixels that
        // are processed in parallel are marked with 0, 1, 2, and 3. "d1B", for
        // example is the blue channel of the second destination pixel. Memory
        // layout is shown for an ARGB byte order in a color value.
 
        // src and srcA store 8-bit values interleaved with zeros.
        // src  = (0xFF, 0, sR, 0, sG, 0, sB, 0, 0xFF, 0, sR, 0, sG, 0, sB, 0)
        // srcA = (srcA, 0, srcA, 0, srcA, 0, srcA, 0,
        //         srcA, 0, srcA, 0, srcA, 0, srcA, 0)
        // mask stores 16-bit values (compressed three channels) interleaved with zeros.
        // Lo and Hi denote the low and high bytes of a 16-bit value, respectively.
        // mask = (m0RGBLo, m0RGBHi, 0, 0, m1RGBLo, m1RGBHi, 0, 0,
        //         m2RGBLo, m2RGBHi, 0, 0, m3RGBLo, m3RGBHi, 0, 0)
 
        // Get the R,G,B of each 16bit mask pixel, we want all of them in 5 bits.
        // r = (0, m0R, 0, 0, 0, m1R, 0, 0, 0, m2R, 0, 0, 0, m3R, 0, 0)
        __m128i r = _mm_and_si128(SkPackedR16x5ToUnmaskedR32x5_SSE2(mask),
                                  _mm_set1_epi32(0x1F << SK_R32_SHIFT));
 
        // g = (0, 0, m0G, 0, 0, 0, m1G, 0, 0, 0, m2G, 0, 0, 0, m3G, 0)
        __m128i g = _mm_and_si128(SkPackedG16x5ToUnmaskedG32x5_SSE2(mask),
                                  _mm_set1_epi32(0x1F << SK_G32_SHIFT));
 
        // b = (0, 0, 0, m0B, 0, 0, 0, m1B, 0, 0, 0, m2B, 0, 0, 0, m3B)
        __m128i b = _mm_and_si128(SkPackedB16x5ToUnmaskedB32x5_SSE2(mask),
                                  _mm_set1_epi32(0x1F << SK_B32_SHIFT));
 
        // a needs to be either the min or the max of the LCD coverages, depending on srcA < dstA
        __m128i aMin = _mm_min_epu8(_mm_slli_epi32(r, SK_A32_SHIFT - SK_R32_SHIFT),
                       _mm_min_epu8(_mm_slli_epi32(g, SK_A32_SHIFT - SK_G32_SHIFT),
                                    _mm_slli_epi32(b, SK_A32_SHIFT - SK_B32_SHIFT)));
        __m128i aMax = _mm_max_epu8(_mm_slli_epi32(r, SK_A32_SHIFT - SK_R32_SHIFT),
                       _mm_max_epu8(_mm_slli_epi32(g, SK_A32_SHIFT - SK_G32_SHIFT),
                                    _mm_slli_epi32(b, SK_A32_SHIFT - SK_B32_SHIFT)));
        // srcA has been biased to [0-256], so compare srcA against (dstA+1)
        __m128i a = _mm_cmplt_epi32(srcA,
                                    _mm_and_si128(
                                            _mm_add_epi32(dst, _mm_set1_epi32(1 << SK_A32_SHIFT)),
                                            _mm_set1_epi32(SK_A32_MASK)));
        // a = if_then_else(a, aMin, aMax) == (aMin & a) | (aMax & ~a)
        a = _mm_or_si128(_mm_and_si128(a, aMin), _mm_andnot_si128(a, aMax));
 
        // Pack the 4 16bit mask pixels into 4 32bit pixels, (p0, p1, p2, p3)
        // Each component (m0R, m0G, etc.) is then a 5-bit value aligned to an
        // 8-bit position
        // mask = (m0A, m0R, m0G, m0B, m1A, m1R, m1G, m1B,
        //         m2A, m2R, m2G, m2B, m3A, m3R, m3G, m3B)
        mask = _mm_or_si128(_mm_or_si128(a, r), _mm_or_si128(g, b));
 
        // Interleave R,G,B into the lower byte of word.
        // i.e. split the sixteen 8-bit values from mask into two sets of eight
        // 16-bit values, padded by zero.
        __m128i maskLo, maskHi;
        // maskLo = (m0A, 0, m0R, 0, m0G, 0, m0B, 0, m1A, 0, m1R, 0, m1G, 0, m1B, 0)
        maskLo = _mm_unpacklo_epi8(mask, _mm_setzero_si128());
        // maskHi = (m2A, 0, m2R, 0, m2G, 0, m2B, 0, m3A, 0, m3R, 0, m3G, 0, m3B, 0)
        maskHi = _mm_unpackhi_epi8(mask, _mm_setzero_si128());
 
        // Upscale from 0..31 to 0..32
        // (allows to replace division by left-shift further down)
        // Left-shift each component by 4 and add the result back to that component,
        // mapping numbers in the range 0..15 to 0..15, and 16..31 to 17..32
        maskLo = _mm_add_epi16(maskLo, _mm_srli_epi16(maskLo, 4));
        maskHi = _mm_add_epi16(maskHi, _mm_srli_epi16(maskHi, 4));
 
        // Multiply each component of maskLo and maskHi by srcA
        maskLo = _mm_mullo_epi16(maskLo, srcA);
        maskHi = _mm_mullo_epi16(maskHi, srcA);
 
        // Left shift mask components by 8 (divide by 256)
        maskLo = _mm_srli_epi16(maskLo, 8);
        maskHi = _mm_srli_epi16(maskHi, 8);
 
        // Interleave R,G,B into the lower byte of the word
        // dstLo = (d0A, 0, d0R, 0, d0G, 0, d0B, 0, d1A, 0, d1R, 0, d1G, 0, d1B, 0)
        __m128i dstLo = _mm_unpacklo_epi8(dst, _mm_setzero_si128());
        // dstLo = (d2A, 0, d2R, 0, d2G, 0, d2B, 0, d3A, 0, d3R, 0, d3G, 0, d3B, 0)
        __m128i dstHi = _mm_unpackhi_epi8(dst, _mm_setzero_si128());
 
        // mask = (src - dst) * mask
        maskLo = _mm_mullo_epi16(maskLo, _mm_sub_epi16(src, dstLo));
        maskHi = _mm_mullo_epi16(maskHi, _mm_sub_epi16(src, dstHi));
 
        // mask = (src - dst) * mask >> 5
        maskLo = _mm_srai_epi16(maskLo, 5);
        maskHi = _mm_srai_epi16(maskHi, 5);
 
        // Add two pixels into result.
        // result = dst + ((src - dst) * mask >> 5)
        __m128i resultLo = _mm_add_epi16(dstLo, maskLo);
        __m128i resultHi = _mm_add_epi16(dstHi, maskHi);
 
        // Pack into 4 32bit dst pixels.
        // resultLo and resultHi contain eight 16-bit components (two pixels) each.
        // Merge into one SSE regsiter with sixteen 8-bit values (four pixels),
        // clamping to 255 if necessary.
        return _mm_packus_epi16(resultLo, resultHi);
    }

◆ blend_row_A8()

static void blend_row_A8	(	SkPMColor *	dst,
		const void *	mask,
		const SkPMColor *	src,
		int	n
	)

static

Definition at line 1862 of file SkBlitter_ARGB32.cpp.

                                                                                        {
    auto cov = (const uint8_t*)mask;
    drive(dst, src, cov, n, [](U8x4 d, U8x4 s, U8x4 c) {
        U8x4 s_aa  = skvx::approx_scale(s, c),
             alpha = skvx::shuffle<3,3,3,3, 7,7,7,7, 11,11,11,11, 15,15,15,15>(s_aa);
        return s_aa + skvx::approx_scale(d, 255 - alpha);
    });
}

◆ blend_row_A8_opaque()

static void blend_row_A8_opaque	(	SkPMColor *	dst,
		const void *	mask,
		const SkPMColor *	src,
		int	n
	)

static

Definition at line 1871 of file SkBlitter_ARGB32.cpp.

                                                                                               {
    auto cov = (const uint8_t*)mask;
    drive(dst, src, cov, n, [](U8x4 d, U8x4 s, U8x4 c) {
        return skvx::div255( skvx::cast<uint16_t>(s) * skvx::cast<uint16_t>(  c  )
                           + skvx::cast<uint16_t>(d) * skvx::cast<uint16_t>(255-c));
    });
}

◆ blend_row_lcd16()

static void blend_row_lcd16	(	SkPMColor *	dst,
		const void *	vmask,
		const SkPMColor *	src,
		int	n
	)

static

Definition at line 1879 of file SkBlitter_ARGB32.cpp.

                                                                                            {
    auto src_alpha_blend = [](int s, int d, int sa, int m) {
        return d + SkAlphaMul(s - SkAlphaMul(sa, d), m);
    };
 
    auto upscale_31_to_255 = [](int v) {
        return (v << 3) | (v >> 2);
    };
 
    auto mask = (const uint16_t*)vmask;
    for (int i = 0; i < n; ++i) {
        uint16_t m = mask[i];
        if (0 == m) {
            continue;
        }
 
        SkPMColor s = src[i];
        SkPMColor d = dst[i];
 
        int srcA = SkGetPackedA32(s);
        int srcR = SkGetPackedR32(s);
        int srcG = SkGetPackedG32(s);
        int srcB = SkGetPackedB32(s);
 
        srcA += srcA >> 7;
 
        // We're ignoring the least significant bit of the green coverage channel here.
        int maskR = SkGetPackedR16(m) >> (SK_R16_BITS - 5);
        int maskG = SkGetPackedG16(m) >> (SK_G16_BITS - 5);
        int maskB = SkGetPackedB16(m) >> (SK_B16_BITS - 5);
 
        // Scale up to 8-bit coverage to work with SkAlphaMul() in src_alpha_blend().
        maskR = upscale_31_to_255(maskR);
        maskG = upscale_31_to_255(maskG);
        maskB = upscale_31_to_255(maskB);
 
        // This LCD blit routine only works if the destination is opaque.
        dst[i] = SkPackARGB32(0xFF,
                              src_alpha_blend(srcR, SkGetPackedR32(d), srcA, maskR),
                              src_alpha_blend(srcG, SkGetPackedG32(d), srcA, maskG),
                              src_alpha_blend(srcB, SkGetPackedB32(d), srcA, maskB));
    }
}

◆ blend_row_LCD16_opaque()

static void blend_row_LCD16_opaque	(	SkPMColor *	dst,
		const void *	vmask,
		const SkPMColor *	src,
		int	n
	)

static

Definition at line 1923 of file SkBlitter_ARGB32.cpp.

                                                                                                   {
    auto mask = (const uint16_t*)vmask;
 
    for (int i = 0; i < n; ++i) {
        uint16_t m = mask[i];
        if (0 == m) {
            continue;
        }
 
        SkPMColor s = src[i];
        SkPMColor d = dst[i];
 
        int srcR = SkGetPackedR32(s);
        int srcG = SkGetPackedG32(s);
        int srcB = SkGetPackedB32(s);
 
        // We're ignoring the least significant bit of the green coverage channel here.
        int maskR = SkGetPackedR16(m) >> (SK_R16_BITS - 5);
        int maskG = SkGetPackedG16(m) >> (SK_G16_BITS - 5);
        int maskB = SkGetPackedB16(m) >> (SK_B16_BITS - 5);
 
        // Now upscale them to 0..32, so we can use blend_32.
        maskR = upscale_31_to_32(maskR);
        maskG = upscale_31_to_32(maskG);
        maskB = upscale_31_to_32(maskB);
 
        // This LCD blit routine only works if the destination is opaque.
        dst[i] = SkPackARGB32(0xFF,
                              blend_32(srcR, SkGetPackedR32(d), maskR),
                              blend_32(srcG, SkGetPackedG32(d), maskG),
                              blend_32(srcB, SkGetPackedB32(d), maskB));
    }
}

◆ blit_color()

static bool blit_color	(	const SkPixmap &	device,
		const SkMask &	mask,
		const SkIRect &	clip,
		SkColor	color
	)

static

Definition at line 1379 of file SkBlitter_ARGB32.cpp.

                                      {
    int x = clip.fLeft,
        y = clip.fTop;
 
    if (device.colorType() == kN32_SkColorType && mask.fFormat == SkMask::kA8_Format) {
        SkOpts::blit_mask_d32_a8(device.writable_addr32(x,y), device.rowBytes(),
                                 (const SkAlpha*)mask.getAddr(x,y), mask.fRowBytes,
                                 color, clip.width(), clip.height());
        return true;
    }
 
    if (device.colorType() == kN32_SkColorType && mask.fFormat == SkMask::kLCD16_Format) {
        auto dstRow  = device.writable_addr32(x,y);
        auto maskRow = (const uint16_t*)mask.getAddr(x,y);
 
        auto blit_row = blit_row_lcd16;
        SkPMColor opaqueDst = 0;  // ignored unless opaque
 
        if (0xff == SkColorGetA(color)) {
            blit_row  = blit_row_lcd16_opaque;
            opaqueDst = SkPreMultiplyColor(color);
        }
 
        for (int height = clip.height(); height --> 0; ) {
            blit_row(dstRow, maskRow, color, clip.width(), opaqueDst);
 
            dstRow  = (SkPMColor*)     ((      char*) dstRow + device.rowBytes());
            maskRow = (const uint16_t*)((const char*)maskRow +  mask.fRowBytes);
        }
        return true;
    }
 
    return false;
}

◆ blit_row_lcd16()

void blit_row_lcd16	(	SkPMColor	dst[],
		const uint16_t	mask[],
		SkColor	src,
		int	width,
		SkPMColor
	)

Definition at line 342 of file SkBlitter_ARGB32.cpp.

                                                                                                   {
        if (width <= 0) {
            return;
        }
 
        int srcA = SkColorGetA(src);
        int srcR = SkColorGetR(src);
        int srcG = SkColorGetG(src);
        int srcB = SkColorGetB(src);
 
        srcA = SkAlpha255To256(srcA);
 
        if (width >= 4) {
            SkASSERT(((size_t)dst & 0x03) == 0);
            while (((size_t)dst & 0x0F) != 0) {
                *dst = blend_lcd16(srcA, srcR, srcG, srcB, *dst, *mask);
                mask++;
                dst++;
                width--;
            }
 
            __m128i *d = reinterpret_cast<__m128i*>(dst);
            // Set alpha to 0xFF and replicate source four times in SSE register.
            __m128i src_sse = _mm_set1_epi32(SkPackARGB32(0xFF, srcR, srcG, srcB));
            // Interleave with zeros to get two sets of four 16-bit values.
            src_sse = _mm_unpacklo_epi8(src_sse, _mm_setzero_si128());
            // Set srcA_sse to contain eight copies of srcA, padded with zero.
            // src_sse=(0xFF, 0, sR, 0, sG, 0, sB, 0, 0xFF, 0, sR, 0, sG, 0, sB, 0)
            __m128i srcA_sse = _mm_set1_epi16(srcA);
            while (width >= 4) {
                // Load four destination pixels into dst_sse.
                __m128i dst_sse = _mm_load_si128(d);
                // Load four 16-bit masks into lower half of mask_sse.
                __m128i mask_sse = _mm_loadu_si64(mask);
 
                // Check whether masks are equal to 0 and get the highest bit
                // of each byte of result, if masks are all zero, we will get
                // pack_cmp to 0xFFFF
                int pack_cmp = _mm_movemask_epi8(_mm_cmpeq_epi16(mask_sse,
                                                 _mm_setzero_si128()));
 
                // if mask pixels are not all zero, we will blend the dst pixels
                if (pack_cmp != 0xFFFF) {
                    // Unpack 4 16bit mask pixels to
                    // mask_sse = (m0RGBLo, m0RGBHi, 0, 0, m1RGBLo, m1RGBHi, 0, 0,
                    //             m2RGBLo, m2RGBHi, 0, 0, m3RGBLo, m3RGBHi, 0, 0)
                    mask_sse = _mm_unpacklo_epi16(mask_sse,
                                                  _mm_setzero_si128());
 
                    // Process 4 32bit dst pixels
                    __m128i result = blend_lcd16_sse2(src_sse, dst_sse, mask_sse, srcA_sse);
                    _mm_store_si128(d, result);
                }
 
                d++;
                mask += 4;
                width -= 4;
            }
 
            dst = reinterpret_cast<SkPMColor*>(d);
        }
 
        while (width > 0) {
            *dst = blend_lcd16(srcA, srcR, srcG, srcB, *dst, *mask);
            mask++;
            dst++;
            width--;
        }
    }

◆ blit_row_lcd16_opaque()

void blit_row_lcd16_opaque	(	SkPMColor	dst[],
		const uint16_t	mask[],
		SkColor	src,
		int	width,
		SkPMColor	opaqueDst
	)

Definition at line 412 of file SkBlitter_ARGB32.cpp.

                                                                                {
        if (width <= 0) {
            return;
        }
 
        int srcR = SkColorGetR(src);
        int srcG = SkColorGetG(src);
        int srcB = SkColorGetB(src);
 
        if (width >= 4) {
            SkASSERT(((size_t)dst & 0x03) == 0);
            while (((size_t)dst & 0x0F) != 0) {
                *dst = blend_lcd16_opaque(srcR, srcG, srcB, *dst, *mask, opaqueDst);
                mask++;
                dst++;
                width--;
            }
 
            __m128i *d = reinterpret_cast<__m128i*>(dst);
            // Set alpha to 0xFF and replicate source four times in SSE register.
            __m128i src_sse = _mm_set1_epi32(SkPackARGB32(0xFF, srcR, srcG, srcB));
            // Set srcA_sse to contain eight copies of srcA, padded with zero.
            // src_sse=(0xFF, 0, sR, 0, sG, 0, sB, 0, 0xFF, 0, sR, 0, sG, 0, sB, 0)
            src_sse = _mm_unpacklo_epi8(src_sse, _mm_setzero_si128());
            while (width >= 4) {
                // Load four destination pixels into dst_sse.
                __m128i dst_sse = _mm_load_si128(d);
                // Load four 16-bit masks into lower half of mask_sse.
                __m128i mask_sse = _mm_loadu_si64(mask);
 
                // Check whether masks are equal to 0 and get the highest bit
                // of each byte of result, if masks are all zero, we will get
                // pack_cmp to 0xFFFF
                int pack_cmp = _mm_movemask_epi8(_mm_cmpeq_epi16(mask_sse,
                                                 _mm_setzero_si128()));
 
                // if mask pixels are not all zero, we will blend the dst pixels
                if (pack_cmp != 0xFFFF) {
                    // Unpack 4 16bit mask pixels to
                    // mask_sse = (m0RGBLo, m0RGBHi, 0, 0, m1RGBLo, m1RGBHi, 0, 0,
                    //             m2RGBLo, m2RGBHi, 0, 0, m3RGBLo, m3RGBHi, 0, 0)
                    mask_sse = _mm_unpacklo_epi16(mask_sse,
                                                  _mm_setzero_si128());
 
                    // Process 4 32bit dst pixels
                    __m128i result = blend_lcd16_opaque_sse2(src_sse, dst_sse, mask_sse);
                    _mm_store_si128(d, result);
                }
 
                d++;
                mask += 4;
                width -= 4;
            }
 
            dst = reinterpret_cast<SkPMColor*>(d);
        }
 
        while (width > 0) {
            *dst = blend_lcd16_opaque(srcR, srcG, srcB, *dst, *mask, opaqueDst);
            mask++;
            dst++;
            width--;
        }
    }

◆ drive()

static void drive	(	SkPMColor *	dst,
		const SkPMColor *	src,
		const uint8_t *	cov,
		int	n,
		U8x4(*)(U8x4, U8x4, U8x4)	kernel
	)

static

Definition at line 1838 of file SkBlitter_ARGB32.cpp.

                                                  {
 
    auto apply = [kernel](U32 dst, U32 src, U8 cov) -> U32 {
        U8x4 cov_splat = skvx::shuffle<0,0,0,0, 1,1,1,1, 2,2,2,2, 3,3,3,3>(cov);
        return sk_bit_cast<U32>(kernel(sk_bit_cast<U8x4>(dst),
                                       sk_bit_cast<U8x4>(src),
                                       cov_splat));
    };
    while (n >= 4) {
        apply(U32::Load(dst), U32::Load(src), U8::Load(cov)).store(dst);
        dst += 4;
        src += 4;
        cov += 4;
        n   -= 4;
    }
    while (n --> 0) {
        *dst = apply(U32{*dst}, U32{*src}, U8{*cov})[0];
        dst++;
        src++;
        cov++;
    }
}

◆ SkARGB32_Blit32()

static void SkARGB32_Blit32	(	const SkPixmap &	device,
		const SkMask &	mask,
		const SkIRect &	clip,
		SkPMColor	srcColor
	)

static

Definition at line 1419 of file SkBlitter_ARGB32.cpp.

                                                                     {
    U8CPU alpha = SkGetPackedA32(srcColor);
    unsigned flags = SkBlitRow::kSrcPixelAlpha_Flag32;
    if (alpha != 255) {
        flags |= SkBlitRow::kGlobalAlpha_Flag32;
    }
    SkBlitRow::Proc32 proc = SkBlitRow::Factory32(flags);
 
    int x = clip.fLeft;
    int y = clip.fTop;
    int width = clip.width();
    int height = clip.height();
 
    SkPMColor* dstRow = device.writable_addr32(x, y);
    const SkPMColor* srcRow = reinterpret_cast<const SkPMColor*>(mask.getAddr8(x, y));
 
    do {
        proc(dstRow, srcRow, width, alpha);
        dstRow = (SkPMColor*)((char*)dstRow + device.rowBytes());
        srcRow = (const SkPMColor*)((const char*)srcRow + mask.fRowBytes);
    } while (--height != 0);
}

◆ upscale_31_to_32()

static int upscale_31_to_32 ( int value )

inlinestatic

Definition at line 33 of file SkBlitter_ARGB32.cpp.

                                              {
    SkASSERT((unsigned)value <= 31);
    return value + (value >> 4);
}

Macros

Typedefs

Functions

Macro Definition Documentation

◆ blend_8_pixels

◆ SK_B16x5_B32x5_SHIFT

◆ SK_BLITBWMASK_ARGS [1/2]

◆ SK_BLITBWMASK_ARGS [2/2]

◆ SK_BLITBWMASK_BLIT8 [1/2]

◆ SK_BLITBWMASK_BLIT8 [2/2]

◆ SK_BLITBWMASK_DEVTYPE [1/2]

◆ SK_BLITBWMASK_DEVTYPE [2/2]

◆ SK_BLITBWMASK_GETADDR [1/2]

◆ SK_BLITBWMASK_GETADDR [2/2]

◆ SK_BLITBWMASK_NAME [1/2]

◆ SK_BLITBWMASK_NAME [2/2]

◆ SK_G16x5_G32x5_SHIFT

◆ SK_R16x5_R32x5_SHIFT

◆ SkPackedB16x5ToUnmaskedB32x5_SSE2

◆ SkPackedG16x5ToUnmaskedG32x5_SSE2

◆ SkPackedR16x5ToUnmaskedR32x5_SSE2

◆ solid_8_pixels

Typedef Documentation

◆ U32

◆ U8

◆ U8x4

Function Documentation

◆ blend_32()

◆ blend_lcd16()

◆ blend_lcd16_opaque()

◆ blend_lcd16_opaque_sse2()

◆ blend_lcd16_sse2()

◆ blend_row_A8()

◆ blend_row_A8_opaque()

◆ blend_row_lcd16()

◆ blend_row_LCD16_opaque()

◆ blit_color()

◆ blit_row_lcd16()

◆ blit_row_lcd16_opaque()

◆ drive()

◆ SkARGB32_Blit32()

◆ upscale_31_to_32()