Flutter Engine
The Flutter Engine
Macros | Typedefs | Functions
SkBlitter_ARGB32.cpp File Reference
#include "include/core/SkColor.h"
#include "include/core/SkColorPriv.h"
#include "include/core/SkColorType.h"
#include "include/core/SkPaint.h"
#include "include/core/SkPixmap.h"
#include "include/core/SkRect.h"
#include "include/core/SkTypes.h"
#include "include/private/SkColorData.h"
#include "include/private/base/SkCPUTypes.h"
#include "include/private/base/SkDebug.h"
#include "include/private/base/SkMalloc.h"
#include "include/private/base/SkTo.h"
#include "src/base/SkUtils.h"
#include "src/base/SkVx.h"
#include "src/core/SkBlitMask.h"
#include "src/core/SkBlitRow.h"
#include "src/core/SkCoreBlitters.h"
#include "src/core/SkMask.h"
#include "src/core/SkMemset.h"
#include "src/shaders/SkShaderBase.h"
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <emmintrin.h>
#include "src/core/SkBlitBWMaskTemplate.h"

Go to the source code of this file.

Macros

#define SK_R16x5_R32x5_SHIFT   (SK_R32_SHIFT - SK_R16_SHIFT - SK_R16_BITS + 5)
 
#define SK_G16x5_G32x5_SHIFT   (SK_G32_SHIFT - SK_G16_SHIFT - SK_G16_BITS + 5)
 
#define SK_B16x5_B32x5_SHIFT   (SK_B32_SHIFT - SK_B16_SHIFT - SK_B16_BITS + 5)
 
#define SkPackedR16x5ToUnmaskedR32x5_SSE2(x)   (_mm_slli_epi32(x, SK_R16x5_R32x5_SHIFT))
 
#define SkPackedG16x5ToUnmaskedG32x5_SSE2(x)   (_mm_slli_epi32(x, SK_G16x5_G32x5_SHIFT))
 
#define SkPackedB16x5ToUnmaskedB32x5_SSE2(x)   (_mm_slli_epi32(x, SK_B16x5_B32x5_SHIFT))
 
#define solid_8_pixels(mask, dst, color)
 
#define SK_BLITBWMASK_NAME   SkARGB32_BlitBW
 
#define SK_BLITBWMASK_ARGS   , SkPMColor color
 
#define SK_BLITBWMASK_BLIT8(mask, dst)   solid_8_pixels(mask, dst, color)
 
#define SK_BLITBWMASK_GETADDR   writable_addr32
 
#define SK_BLITBWMASK_DEVTYPE   uint32_t
 
#define blend_8_pixels(mask, dst, sc, dst_scale)
 
#define SK_BLITBWMASK_NAME   SkARGB32_BlendBW
 
#define SK_BLITBWMASK_ARGS   , uint32_t sc, unsigned dst_scale
 
#define SK_BLITBWMASK_BLIT8(mask, dst)   blend_8_pixels(mask, dst, sc, dst_scale)
 
#define SK_BLITBWMASK_GETADDR   writable_addr32
 
#define SK_BLITBWMASK_DEVTYPE   uint32_t
 

Typedefs

using U32 = skvx::Vec< 4, uint32_t >
 
using U8x4 = skvx::Vec< 16, uint8_t >
 
using U8 = skvx::Vec< 4, uint8_t >
 

Functions

static int upscale_31_to_32 (int value)
 
static int blend_32 (int src, int dst, int scale)
 
static SkPMColor blend_lcd16 (int srcA, int srcR, int srcG, int srcB, SkPMColor dst, uint16_t mask)
 
static SkPMColor blend_lcd16_opaque (int srcR, int srcG, int srcB, SkPMColor dst, uint16_t mask, SkPMColor opaqueDst)
 
static __m128i blend_lcd16_sse2 (__m128i &src, __m128i &dst, __m128i &mask, __m128i &srcA)
 
static __m128i blend_lcd16_opaque_sse2 (__m128i &src, __m128i &dst, __m128i &mask)
 
void blit_row_lcd16 (SkPMColor dst[], const uint16_t mask[], SkColor src, int width, SkPMColor)
 
void blit_row_lcd16_opaque (SkPMColor dst[], const uint16_t mask[], SkColor src, int width, SkPMColor opaqueDst)
 
static bool blit_color (const SkPixmap &device, const SkMask &mask, const SkIRect &clip, SkColor color)
 
static void SkARGB32_Blit32 (const SkPixmap &device, const SkMask &mask, const SkIRect &clip, SkPMColor srcColor)
 
static void drive (SkPMColor *dst, const SkPMColor *src, const uint8_t *cov, int n, U8x4(*kernel)(U8x4, U8x4, U8x4))
 
static void blend_row_A8 (SkPMColor *dst, const void *mask, const SkPMColor *src, int n)
 
static void blend_row_A8_opaque (SkPMColor *dst, const void *mask, const SkPMColor *src, int n)
 
static void blend_row_lcd16 (SkPMColor *dst, const void *vmask, const SkPMColor *src, int n)
 
static void blend_row_LCD16_opaque (SkPMColor *dst, const void *vmask, const SkPMColor *src, int n)
 

Macro Definition Documentation

◆ blend_8_pixels

#define blend_8_pixels (   mask,
  dst,
  sc,
  dst_scale 
)
Value:
do { \
if (mask & 0x80) { dst[0] = sc + SkAlphaMulQ(dst[0], dst_scale); } \
if (mask & 0x40) { dst[1] = sc + SkAlphaMulQ(dst[1], dst_scale); } \
if (mask & 0x20) { dst[2] = sc + SkAlphaMulQ(dst[2], dst_scale); } \
if (mask & 0x10) { dst[3] = sc + SkAlphaMulQ(dst[3], dst_scale); } \
if (mask & 0x08) { dst[4] = sc + SkAlphaMulQ(dst[4], dst_scale); } \
if (mask & 0x04) { dst[5] = sc + SkAlphaMulQ(dst[5], dst_scale); } \
if (mask & 0x02) { dst[6] = sc + SkAlphaMulQ(dst[6], dst_scale); } \
if (mask & 0x01) { dst[7] = sc + SkAlphaMulQ(dst[7], dst_scale); } \
} while (0)
static SK_ALWAYS_INLINE uint32_t SkAlphaMulQ(uint32_t c, unsigned scale)
Definition: SkColorPriv.h:142
dst
Definition: cp.py:12

Definition at line 1540 of file SkBlitter_ARGB32.cpp.

◆ SK_B16x5_B32x5_SHIFT

#define SK_B16x5_B32x5_SHIFT   (SK_B32_SHIFT - SK_B16_SHIFT - SK_B16_BITS + 5)

Definition at line 134 of file SkBlitter_ARGB32.cpp.

◆ SK_BLITBWMASK_ARGS [1/2]

#define SK_BLITBWMASK_ARGS   , SkPMColor color

Definition at line 1553 of file SkBlitter_ARGB32.cpp.

◆ SK_BLITBWMASK_ARGS [2/2]

#define SK_BLITBWMASK_ARGS   , uint32_t sc, unsigned dst_scale

Definition at line 1553 of file SkBlitter_ARGB32.cpp.

◆ SK_BLITBWMASK_BLIT8 [1/2]

#define SK_BLITBWMASK_BLIT8 (   mask,
  dst 
)    solid_8_pixels(mask, dst, color)

Definition at line 1554 of file SkBlitter_ARGB32.cpp.

◆ SK_BLITBWMASK_BLIT8 [2/2]

#define SK_BLITBWMASK_BLIT8 (   mask,
  dst 
)    blend_8_pixels(mask, dst, sc, dst_scale)

Definition at line 1554 of file SkBlitter_ARGB32.cpp.

◆ SK_BLITBWMASK_DEVTYPE [1/2]

#define SK_BLITBWMASK_DEVTYPE   uint32_t

Definition at line 1556 of file SkBlitter_ARGB32.cpp.

◆ SK_BLITBWMASK_DEVTYPE [2/2]

#define SK_BLITBWMASK_DEVTYPE   uint32_t

Definition at line 1556 of file SkBlitter_ARGB32.cpp.

◆ SK_BLITBWMASK_GETADDR [1/2]

#define SK_BLITBWMASK_GETADDR   writable_addr32

Definition at line 1555 of file SkBlitter_ARGB32.cpp.

◆ SK_BLITBWMASK_GETADDR [2/2]

#define SK_BLITBWMASK_GETADDR   writable_addr32

Definition at line 1555 of file SkBlitter_ARGB32.cpp.

◆ SK_BLITBWMASK_NAME [1/2]

#define SK_BLITBWMASK_NAME   SkARGB32_BlitBW

Definition at line 1552 of file SkBlitter_ARGB32.cpp.

◆ SK_BLITBWMASK_NAME [2/2]

#define SK_BLITBWMASK_NAME   SkARGB32_BlendBW

Definition at line 1552 of file SkBlitter_ARGB32.cpp.

◆ SK_G16x5_G32x5_SHIFT

#define SK_G16x5_G32x5_SHIFT   (SK_G32_SHIFT - SK_G16_SHIFT - SK_G16_BITS + 5)

Definition at line 133 of file SkBlitter_ARGB32.cpp.

◆ SK_R16x5_R32x5_SHIFT

#define SK_R16x5_R32x5_SHIFT   (SK_R32_SHIFT - SK_R16_SHIFT - SK_R16_BITS + 5)

Definition at line 132 of file SkBlitter_ARGB32.cpp.

◆ SkPackedB16x5ToUnmaskedB32x5_SSE2

#define SkPackedB16x5ToUnmaskedB32x5_SSE2 (   x)    (_mm_slli_epi32(x, SK_B16x5_B32x5_SHIFT))

Definition at line 155 of file SkBlitter_ARGB32.cpp.

◆ SkPackedG16x5ToUnmaskedG32x5_SSE2

#define SkPackedG16x5ToUnmaskedG32x5_SSE2 (   x)    (_mm_slli_epi32(x, SK_G16x5_G32x5_SHIFT))

Definition at line 147 of file SkBlitter_ARGB32.cpp.

◆ SkPackedR16x5ToUnmaskedR32x5_SSE2

#define SkPackedR16x5ToUnmaskedR32x5_SSE2 (   x)    (_mm_slli_epi32(x, SK_R16x5_R32x5_SHIFT))

Definition at line 139 of file SkBlitter_ARGB32.cpp.

◆ solid_8_pixels

#define solid_8_pixels (   mask,
  dst,
  color 
)
Value:
do { \
if (mask & 0x80) dst[0] = color; \
if (mask & 0x40) dst[1] = color; \
if (mask & 0x20) dst[2] = color; \
if (mask & 0x10) dst[3] = color; \
if (mask & 0x08) dst[4] = color; \
if (mask & 0x04) dst[5] = color; \
if (mask & 0x02) dst[6] = color; \
if (mask & 0x01) dst[7] = color; \
} while (0)
DlColor color

Definition at line 1521 of file SkBlitter_ARGB32.cpp.

Typedef Documentation

◆ U32

using U32 = skvx::Vec< 4, uint32_t>

Definition at line 1834 of file SkBlitter_ARGB32.cpp.

◆ U8

using U8 = skvx::Vec< 4, uint8_t>

Definition at line 1836 of file SkBlitter_ARGB32.cpp.

◆ U8x4

using U8x4 = skvx::Vec<16, uint8_t>

Definition at line 1835 of file SkBlitter_ARGB32.cpp.

Function Documentation

◆ blend_32()

static int blend_32 ( int  src,
int  dst,
int  scale 
)
inlinestatic

Definition at line 38 of file SkBlitter_ARGB32.cpp.

38 {
39 SkASSERT((unsigned)src <= 0xFF);
40 SkASSERT((unsigned)dst <= 0xFF);
41 SkASSERT((unsigned)scale <= 32);
42 return dst + ((src - dst) * scale >> 5);
43}
#define SkASSERT(cond)
Definition: SkAssert.h:116
const Scalar scale

◆ blend_lcd16()

static SkPMColor blend_lcd16 ( int  srcA,
int  srcR,
int  srcG,
int  srcB,
SkPMColor  dst,
uint16_t  mask 
)
inlinestatic

Definition at line 45 of file SkBlitter_ARGB32.cpp.

46 {
47 if (mask == 0) {
48 return dst;
49 }
50
51 /* We want all of these in 5bits, hence the shifts in case one of them
52 * (green) is 6bits.
53 */
54 int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
55 int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
56 int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
57
58 // Now upscale them to 0..32, so we can use blend32
59 maskR = upscale_31_to_32(maskR);
60 maskG = upscale_31_to_32(maskG);
61 maskB = upscale_31_to_32(maskB);
62
63 // srcA has been upscaled to 256 before passed into this function
64 maskR = maskR * srcA >> 8;
65 maskG = maskG * srcA >> 8;
66 maskB = maskB * srcA >> 8;
67
68 int dstA = SkGetPackedA32(dst);
69 int dstR = SkGetPackedR32(dst);
70 int dstG = SkGetPackedG32(dst);
71 int dstB = SkGetPackedB32(dst);
72
73 // Subtract 1 from srcA to bring it back to [0-255] to compare against dstA, alpha needs to
74 // use either the min or the max of the LCD coverages. See https:/skbug.com/40037823
75 int maskA = (srcA-1) < dstA ? std::min(maskR, std::min(maskG, maskB))
76 : std::max(maskR, std::max(maskG, maskB));
77
78 return SkPackARGB32(blend_32(0xFF, dstA, maskA),
79 blend_32(srcR, dstR, maskR),
80 blend_32(srcG, dstG, maskG),
81 blend_32(srcB, dstB, maskB));
82}
static int blend_32(int src, int dst, int scale)
static int upscale_31_to_32(int value)
#define SkGetPackedB16(color)
Definition: SkColorData.h:32
#define SkGetPackedG16(color)
Definition: SkColorData.h:31
#define SK_G16_BITS
Definition: SkColorData.h:19
#define SkGetPackedR16(color)
Definition: SkColorData.h:30
#define SK_R16_BITS
Definition: SkColorData.h:18
#define SK_B16_BITS
Definition: SkColorData.h:20
#define SkGetPackedB32(packed)
Definition: SkColorPriv.h:95
#define SkGetPackedR32(packed)
Definition: SkColorPriv.h:93
#define SkGetPackedA32(packed)
Definition: SkColorPriv.h:92
#define SkGetPackedG32(packed)
Definition: SkColorPriv.h:94
static SkPMColor SkPackARGB32(U8CPU a, U8CPU r, U8CPU g, U8CPU b)
Definition: SkColorPriv.h:106
static float max(float r, float g, float b)
Definition: hsl.cpp:49
static float min(float r, float g, float b)
Definition: hsl.cpp:48

◆ blend_lcd16_opaque()

static SkPMColor blend_lcd16_opaque ( int  srcR,
int  srcG,
int  srcB,
SkPMColor  dst,
uint16_t  mask,
SkPMColor  opaqueDst 
)
inlinestatic

Definition at line 84 of file SkBlitter_ARGB32.cpp.

86 {
87 if (mask == 0) {
88 return dst;
89 }
90
91 if (0xFFFF == mask) {
92 return opaqueDst;
93 }
94
95 /* We want all of these in 5bits, hence the shifts in case one of them
96 * (green) is 6bits.
97 */
98 int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
99 int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
100 int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
101
102 // Now upscale them to 0..32, so we can use blend32
103 maskR = upscale_31_to_32(maskR);
104 maskG = upscale_31_to_32(maskG);
105 maskB = upscale_31_to_32(maskB);
106
107 int dstA = SkGetPackedA32(dst);
108 int dstR = SkGetPackedR32(dst);
109 int dstG = SkGetPackedG32(dst);
110 int dstB = SkGetPackedB32(dst);
111
112 // Opaque src alpha always uses the max of the LCD coverages.
113 int maskA = std::max(maskR, std::max(maskG, maskB));
114
115 // LCD blitting is only supported if the dst is known/required
116 // to be opaque
117 return SkPackARGB32(blend_32(0xFF, dstA, maskA),
118 blend_32(srcR, dstR, maskR),
119 blend_32(srcG, dstG, maskG),
120 blend_32(srcB, dstB, maskB));
121}

◆ blend_lcd16_opaque_sse2()

static __m128i blend_lcd16_opaque_sse2 ( __m128i &  src,
__m128i &  dst,
__m128i &  mask 
)
static

Definition at line 262 of file SkBlitter_ARGB32.cpp.

262 {
263 // In the following comments, the components of src, dst and mask are
264 // abbreviated as (s)rc, (d)st, and (m)ask. Color components are marked
265 // by an R, G, B, or A suffix. Components of one of the four pixels that
266 // are processed in parallel are marked with 0, 1, 2, and 3. "d1B", for
267 // example is the blue channel of the second destination pixel. Memory
268 // layout is shown for an ARGB byte order in a color value.
269
270 // src and srcA store 8-bit values interleaved with zeros.
271 // src = (0xFF, 0, sR, 0, sG, 0, sB, 0, 0xFF, 0, sR, 0, sG, 0, sB, 0)
272 // mask stores 16-bit values (shown as high and low bytes) interleaved with
273 // zeros
274 // mask = (m0RGBLo, m0RGBHi, 0, 0, m1RGBLo, m1RGBHi, 0, 0,
275 // m2RGBLo, m2RGBHi, 0, 0, m3RGBLo, m3RGBHi, 0, 0)
276
277 // Get the R,G,B of each 16bit mask pixel, we want all of them in 5 bits.
278 // r = (0, m0R, 0, 0, 0, m1R, 0, 0, 0, m2R, 0, 0, 0, m3R, 0, 0)
279 __m128i r = _mm_and_si128(SkPackedR16x5ToUnmaskedR32x5_SSE2(mask),
280 _mm_set1_epi32(0x1F << SK_R32_SHIFT));
281
282 // g = (0, 0, m0G, 0, 0, 0, m1G, 0, 0, 0, m2G, 0, 0, 0, m3G, 0)
283 __m128i g = _mm_and_si128(SkPackedG16x5ToUnmaskedG32x5_SSE2(mask),
284 _mm_set1_epi32(0x1F << SK_G32_SHIFT));
285
286 // b = (0, 0, 0, m0B, 0, 0, 0, m1B, 0, 0, 0, m2B, 0, 0, 0, m3B)
287 __m128i b = _mm_and_si128(SkPackedB16x5ToUnmaskedB32x5_SSE2(mask),
288 _mm_set1_epi32(0x1F << SK_B32_SHIFT));
289
290 // a = max(r, g, b) since opaque src alpha uses max of LCD coverages
291 __m128i a = _mm_max_epu8(_mm_slli_epi32(r, SK_A32_SHIFT - SK_R32_SHIFT),
292 _mm_max_epu8(_mm_slli_epi32(g, SK_A32_SHIFT - SK_G32_SHIFT),
293 _mm_slli_epi32(b, SK_A32_SHIFT - SK_B32_SHIFT)));
294
295 // Pack the 4 16bit mask pixels into 4 32bit pixels, (p0, p1, p2, p3)
296 // Each component (m0R, m0G, etc.) is then a 5-bit value aligned to an
297 // 8-bit position
298 // mask = (m0A, m0R, m0G, m0B, m1A, m1R, m1G, m1B,
299 // m2A, m2R, m2G, m2B, m3A, m3R, m3G, m3B)
300 mask = _mm_or_si128(_mm_or_si128(a, r), _mm_or_si128(g, b));
301
302 // Interleave R,G,B into the lower byte of word.
303 // i.e. split the sixteen 8-bit values from mask into two sets of eight
304 // 16-bit values, padded by zero.
305 __m128i maskLo, maskHi;
306 // maskLo = (m0A, 0, m0R, 0, m0G, 0, m0B, 0, m1A, 0, m1R, 0, m1G, 0, m1B, 0)
307 maskLo = _mm_unpacklo_epi8(mask, _mm_setzero_si128());
308 // maskHi = (m2A, 0, m2R, 0, m2G, 0, m2B, 0, m3A, 0, m3R, 0, m3G, 0, m3B, 0)
309 maskHi = _mm_unpackhi_epi8(mask, _mm_setzero_si128());
310
311 // Upscale from 0..31 to 0..32
312 // (allows to replace division by left-shift further down)
313 // Left-shift each component by 4 and add the result back to that component,
314 // mapping numbers in the range 0..15 to 0..15, and 16..31 to 17..32
315 maskLo = _mm_add_epi16(maskLo, _mm_srli_epi16(maskLo, 4));
316 maskHi = _mm_add_epi16(maskHi, _mm_srli_epi16(maskHi, 4));
317
318 // Interleave R,G,B into the lower byte of the word
319 // dstLo = (d0A, 0, d0R, 0, d0G, 0, d0B, 0, d1A, 0, d1R, 0, d1G, 0, d1B, 0)
320 __m128i dstLo = _mm_unpacklo_epi8(dst, _mm_setzero_si128());
321 // dstLo = (d2A, 0, d2R, 0, d2G, 0, d2B, 0, d3A, 0, d3R, 0, d3G, 0, d3B, 0)
322 __m128i dstHi = _mm_unpackhi_epi8(dst, _mm_setzero_si128());
323
324 // mask = (src - dst) * mask
325 maskLo = _mm_mullo_epi16(maskLo, _mm_sub_epi16(src, dstLo));
326 maskHi = _mm_mullo_epi16(maskHi, _mm_sub_epi16(src, dstHi));
327
328 // mask = (src - dst) * mask >> 5
329 maskLo = _mm_srai_epi16(maskLo, 5);
330 maskHi = _mm_srai_epi16(maskHi, 5);
331
332 // Add two pixels into result.
333 // result = dst + ((src - dst) * mask >> 5)
334 __m128i resultLo = _mm_add_epi16(dstLo, maskLo);
335 __m128i resultHi = _mm_add_epi16(dstHi, maskHi);
336
337 // Merge into one SSE regsiter with sixteen 8-bit values (four pixels),
338 // clamping to 255 if necessary.
339 return _mm_packus_epi16(resultLo, resultHi);
340 }
#define SkPackedR16x5ToUnmaskedR32x5_SSE2(x)
#define SkPackedB16x5ToUnmaskedB32x5_SSE2(x)
#define SkPackedG16x5ToUnmaskedG32x5_SSE2(x)
#define SK_R32_SHIFT
Definition: SkTypes.h:44
#define SK_A32_SHIFT
Definition: SkTypes.h:54
#define SK_B32_SHIFT
Definition: SkTypes.h:50
#define SK_G32_SHIFT
Definition: SkTypes.h:53
static bool b
struct MyStruct a[10]

◆ blend_lcd16_sse2()

static __m128i blend_lcd16_sse2 ( __m128i &  src,
__m128i &  dst,
__m128i &  mask,
__m128i &  srcA 
)
static

Definition at line 160 of file SkBlitter_ARGB32.cpp.

160 {
161 // In the following comments, the components of src, dst and mask are
162 // abbreviated as (s)rc, (d)st, and (m)ask. Color components are marked
163 // by an R, G, B, or A suffix. Components of one of the four pixels that
164 // are processed in parallel are marked with 0, 1, 2, and 3. "d1B", for
165 // example is the blue channel of the second destination pixel. Memory
166 // layout is shown for an ARGB byte order in a color value.
167
168 // src and srcA store 8-bit values interleaved with zeros.
169 // src = (0xFF, 0, sR, 0, sG, 0, sB, 0, 0xFF, 0, sR, 0, sG, 0, sB, 0)
170 // srcA = (srcA, 0, srcA, 0, srcA, 0, srcA, 0,
171 // srcA, 0, srcA, 0, srcA, 0, srcA, 0)
172 // mask stores 16-bit values (compressed three channels) interleaved with zeros.
173 // Lo and Hi denote the low and high bytes of a 16-bit value, respectively.
174 // mask = (m0RGBLo, m0RGBHi, 0, 0, m1RGBLo, m1RGBHi, 0, 0,
175 // m2RGBLo, m2RGBHi, 0, 0, m3RGBLo, m3RGBHi, 0, 0)
176
177 // Get the R,G,B of each 16bit mask pixel, we want all of them in 5 bits.
178 // r = (0, m0R, 0, 0, 0, m1R, 0, 0, 0, m2R, 0, 0, 0, m3R, 0, 0)
179 __m128i r = _mm_and_si128(SkPackedR16x5ToUnmaskedR32x5_SSE2(mask),
180 _mm_set1_epi32(0x1F << SK_R32_SHIFT));
181
182 // g = (0, 0, m0G, 0, 0, 0, m1G, 0, 0, 0, m2G, 0, 0, 0, m3G, 0)
183 __m128i g = _mm_and_si128(SkPackedG16x5ToUnmaskedG32x5_SSE2(mask),
184 _mm_set1_epi32(0x1F << SK_G32_SHIFT));
185
186 // b = (0, 0, 0, m0B, 0, 0, 0, m1B, 0, 0, 0, m2B, 0, 0, 0, m3B)
187 __m128i b = _mm_and_si128(SkPackedB16x5ToUnmaskedB32x5_SSE2(mask),
188 _mm_set1_epi32(0x1F << SK_B32_SHIFT));
189
190 // a needs to be either the min or the max of the LCD coverages, depending on srcA < dstA
191 __m128i aMin = _mm_min_epu8(_mm_slli_epi32(r, SK_A32_SHIFT - SK_R32_SHIFT),
192 _mm_min_epu8(_mm_slli_epi32(g, SK_A32_SHIFT - SK_G32_SHIFT),
193 _mm_slli_epi32(b, SK_A32_SHIFT - SK_B32_SHIFT)));
194 __m128i aMax = _mm_max_epu8(_mm_slli_epi32(r, SK_A32_SHIFT - SK_R32_SHIFT),
195 _mm_max_epu8(_mm_slli_epi32(g, SK_A32_SHIFT - SK_G32_SHIFT),
196 _mm_slli_epi32(b, SK_A32_SHIFT - SK_B32_SHIFT)));
197 // srcA has been biased to [0-256], so compare srcA against (dstA+1)
198 __m128i a = _mm_cmplt_epi32(srcA,
199 _mm_and_si128(
200 _mm_add_epi32(dst, _mm_set1_epi32(1 << SK_A32_SHIFT)),
201 _mm_set1_epi32(SK_A32_MASK)));
202 // a = if_then_else(a, aMin, aMax) == (aMin & a) | (aMax & ~a)
203 a = _mm_or_si128(_mm_and_si128(a, aMin), _mm_andnot_si128(a, aMax));
204
205 // Pack the 4 16bit mask pixels into 4 32bit pixels, (p0, p1, p2, p3)
206 // Each component (m0R, m0G, etc.) is then a 5-bit value aligned to an
207 // 8-bit position
208 // mask = (m0A, m0R, m0G, m0B, m1A, m1R, m1G, m1B,
209 // m2A, m2R, m2G, m2B, m3A, m3R, m3G, m3B)
210 mask = _mm_or_si128(_mm_or_si128(a, r), _mm_or_si128(g, b));
211
212 // Interleave R,G,B into the lower byte of word.
213 // i.e. split the sixteen 8-bit values from mask into two sets of eight
214 // 16-bit values, padded by zero.
215 __m128i maskLo, maskHi;
216 // maskLo = (m0A, 0, m0R, 0, m0G, 0, m0B, 0, m1A, 0, m1R, 0, m1G, 0, m1B, 0)
217 maskLo = _mm_unpacklo_epi8(mask, _mm_setzero_si128());
218 // maskHi = (m2A, 0, m2R, 0, m2G, 0, m2B, 0, m3A, 0, m3R, 0, m3G, 0, m3B, 0)
219 maskHi = _mm_unpackhi_epi8(mask, _mm_setzero_si128());
220
221 // Upscale from 0..31 to 0..32
222 // (allows to replace division by left-shift further down)
223 // Left-shift each component by 4 and add the result back to that component,
224 // mapping numbers in the range 0..15 to 0..15, and 16..31 to 17..32
225 maskLo = _mm_add_epi16(maskLo, _mm_srli_epi16(maskLo, 4));
226 maskHi = _mm_add_epi16(maskHi, _mm_srli_epi16(maskHi, 4));
227
228 // Multiply each component of maskLo and maskHi by srcA
229 maskLo = _mm_mullo_epi16(maskLo, srcA);
230 maskHi = _mm_mullo_epi16(maskHi, srcA);
231
232 // Left shift mask components by 8 (divide by 256)
233 maskLo = _mm_srli_epi16(maskLo, 8);
234 maskHi = _mm_srli_epi16(maskHi, 8);
235
236 // Interleave R,G,B into the lower byte of the word
237 // dstLo = (d0A, 0, d0R, 0, d0G, 0, d0B, 0, d1A, 0, d1R, 0, d1G, 0, d1B, 0)
238 __m128i dstLo = _mm_unpacklo_epi8(dst, _mm_setzero_si128());
239 // dstLo = (d2A, 0, d2R, 0, d2G, 0, d2B, 0, d3A, 0, d3R, 0, d3G, 0, d3B, 0)
240 __m128i dstHi = _mm_unpackhi_epi8(dst, _mm_setzero_si128());
241
242 // mask = (src - dst) * mask
243 maskLo = _mm_mullo_epi16(maskLo, _mm_sub_epi16(src, dstLo));
244 maskHi = _mm_mullo_epi16(maskHi, _mm_sub_epi16(src, dstHi));
245
246 // mask = (src - dst) * mask >> 5
247 maskLo = _mm_srai_epi16(maskLo, 5);
248 maskHi = _mm_srai_epi16(maskHi, 5);
249
250 // Add two pixels into result.
251 // result = dst + ((src - dst) * mask >> 5)
252 __m128i resultLo = _mm_add_epi16(dstLo, maskLo);
253 __m128i resultHi = _mm_add_epi16(dstHi, maskHi);
254
255 // Pack into 4 32bit dst pixels.
256 // resultLo and resultHi contain eight 16-bit components (two pixels) each.
257 // Merge into one SSE regsiter with sixteen 8-bit values (four pixels),
258 // clamping to 255 if necessary.
259 return _mm_packus_epi16(resultLo, resultHi);
260 }
#define SK_A32_MASK
Definition: SkColorPriv.h:45

◆ blend_row_A8()

static void blend_row_A8 ( SkPMColor dst,
const void *  mask,
const SkPMColor src,
int  n 
)
static

Definition at line 1862 of file SkBlitter_ARGB32.cpp.

1862 {
1863 auto cov = (const uint8_t*)mask;
1864 drive(dst, src, cov, n, [](U8x4 d, U8x4 s, U8x4 c) {
1865 U8x4 s_aa = skvx::approx_scale(s, c),
1866 alpha = skvx::shuffle<3,3,3,3, 7,7,7,7, 11,11,11,11, 15,15,15,15>(s_aa);
1867 return s_aa + skvx::approx_scale(d, 255 - alpha);
1868 });
1869}
static void drive(SkPMColor *dst, const SkPMColor *src, const uint8_t *cov, int n, U8x4(*kernel)(U8x4, U8x4, U8x4))
VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE auto & d
Definition: main.cc:19
struct MyStruct s
SIN Vec< N, uint8_t > approx_scale(const Vec< N, uint8_t > &x, const Vec< N, uint8_t > &y)
Definition: SkVx.h:824
Definition: SkVx.h:83

◆ blend_row_A8_opaque()

static void blend_row_A8_opaque ( SkPMColor dst,
const void *  mask,
const SkPMColor src,
int  n 
)
static

Definition at line 1871 of file SkBlitter_ARGB32.cpp.

1871 {
1872 auto cov = (const uint8_t*)mask;
1873 drive(dst, src, cov, n, [](U8x4 d, U8x4 s, U8x4 c) {
1874 return skvx::div255( skvx::cast<uint16_t>(s) * skvx::cast<uint16_t>( c )
1875 + skvx::cast<uint16_t>(d) * skvx::cast<uint16_t>(255-c));
1876 });
1877}
SIN Vec< N, uint8_t > div255(const Vec< N, uint16_t > &x)
Definition: SkVx.h:818

◆ blend_row_lcd16()

static void blend_row_lcd16 ( SkPMColor dst,
const void *  vmask,
const SkPMColor src,
int  n 
)
static

Definition at line 1879 of file SkBlitter_ARGB32.cpp.

1879 {
1880 auto src_alpha_blend = [](int s, int d, int sa, int m) {
1881 return d + SkAlphaMul(s - SkAlphaMul(sa, d), m);
1882 };
1883
1884 auto upscale_31_to_255 = [](int v) {
1885 return (v << 3) | (v >> 2);
1886 };
1887
1888 auto mask = (const uint16_t*)vmask;
1889 for (int i = 0; i < n; ++i) {
1890 uint16_t m = mask[i];
1891 if (0 == m) {
1892 continue;
1893 }
1894
1895 SkPMColor s = src[i];
1896 SkPMColor d = dst[i];
1897
1898 int srcA = SkGetPackedA32(s);
1899 int srcR = SkGetPackedR32(s);
1900 int srcG = SkGetPackedG32(s);
1901 int srcB = SkGetPackedB32(s);
1902
1903 srcA += srcA >> 7;
1904
1905 // We're ignoring the least significant bit of the green coverage channel here.
1906 int maskR = SkGetPackedR16(m) >> (SK_R16_BITS - 5);
1907 int maskG = SkGetPackedG16(m) >> (SK_G16_BITS - 5);
1908 int maskB = SkGetPackedB16(m) >> (SK_B16_BITS - 5);
1909
1910 // Scale up to 8-bit coverage to work with SkAlphaMul() in src_alpha_blend().
1911 maskR = upscale_31_to_255(maskR);
1912 maskG = upscale_31_to_255(maskG);
1913 maskB = upscale_31_to_255(maskB);
1914
1915 // This LCD blit routine only works if the destination is opaque.
1916 dst[i] = SkPackARGB32(0xFF,
1917 src_alpha_blend(srcR, SkGetPackedR32(d), srcA, maskR),
1918 src_alpha_blend(srcG, SkGetPackedG32(d), srcA, maskG),
1919 src_alpha_blend(srcB, SkGetPackedB32(d), srcA, maskB));
1920 }
1921}
#define SkAlphaMul(value, alpha256)
Definition: SkColorPriv.h:34
uint32_t SkPMColor
Definition: SkColor.h:205

◆ blend_row_LCD16_opaque()

static void blend_row_LCD16_opaque ( SkPMColor dst,
const void *  vmask,
const SkPMColor src,
int  n 
)
static

Definition at line 1923 of file SkBlitter_ARGB32.cpp.

1923 {
1924 auto mask = (const uint16_t*)vmask;
1925
1926 for (int i = 0; i < n; ++i) {
1927 uint16_t m = mask[i];
1928 if (0 == m) {
1929 continue;
1930 }
1931
1932 SkPMColor s = src[i];
1933 SkPMColor d = dst[i];
1934
1935 int srcR = SkGetPackedR32(s);
1936 int srcG = SkGetPackedG32(s);
1937 int srcB = SkGetPackedB32(s);
1938
1939 // We're ignoring the least significant bit of the green coverage channel here.
1940 int maskR = SkGetPackedR16(m) >> (SK_R16_BITS - 5);
1941 int maskG = SkGetPackedG16(m) >> (SK_G16_BITS - 5);
1942 int maskB = SkGetPackedB16(m) >> (SK_B16_BITS - 5);
1943
1944 // Now upscale them to 0..32, so we can use blend_32.
1945 maskR = upscale_31_to_32(maskR);
1946 maskG = upscale_31_to_32(maskG);
1947 maskB = upscale_31_to_32(maskB);
1948
1949 // This LCD blit routine only works if the destination is opaque.
1950 dst[i] = SkPackARGB32(0xFF,
1951 blend_32(srcR, SkGetPackedR32(d), maskR),
1952 blend_32(srcG, SkGetPackedG32(d), maskG),
1953 blend_32(srcB, SkGetPackedB32(d), maskB));
1954 }
1955}

◆ blit_color()

static bool blit_color ( const SkPixmap device,
const SkMask mask,
const SkIRect clip,
SkColor  color 
)
static

Definition at line 1379 of file SkBlitter_ARGB32.cpp.

1382 {
1383 int x = clip.fLeft,
1384 y = clip.fTop;
1385
1386 if (device.colorType() == kN32_SkColorType && mask.fFormat == SkMask::kA8_Format) {
1387 SkOpts::blit_mask_d32_a8(device.writable_addr32(x,y), device.rowBytes(),
1388 (const SkAlpha*)mask.getAddr(x,y), mask.fRowBytes,
1389 color, clip.width(), clip.height());
1390 return true;
1391 }
1392
1393 if (device.colorType() == kN32_SkColorType && mask.fFormat == SkMask::kLCD16_Format) {
1394 auto dstRow = device.writable_addr32(x,y);
1395 auto maskRow = (const uint16_t*)mask.getAddr(x,y);
1396
1397 auto blit_row = blit_row_lcd16;
1398 SkPMColor opaqueDst = 0; // ignored unless opaque
1399
1400 if (0xff == SkColorGetA(color)) {
1401 blit_row = blit_row_lcd16_opaque;
1402 opaqueDst = SkPreMultiplyColor(color);
1403 }
1404
1405 for (int height = clip.height(); height --> 0; ) {
1406 blit_row(dstRow, maskRow, color, clip.width(), opaqueDst);
1407
1408 dstRow = (SkPMColor*) (( char*) dstRow + device.rowBytes());
1409 maskRow = (const uint16_t*)((const char*)maskRow + mask.fRowBytes);
1410 }
1411 return true;
1412 }
1413
1414 return false;
1415}
void blit_row_lcd16(SkPMColor dst[], const uint16_t mask[], SkColor src, int width, SkPMColor)
void blit_row_lcd16_opaque(SkPMColor dst[], const uint16_t mask[], SkColor src, int width, SkPMColor opaqueDst)
SK_API SkPMColor SkPreMultiplyColor(SkColor c)
Definition: SkColor.cpp:21
uint8_t SkAlpha
Definition: SkColor.h:26
#define SkColorGetA(color)
Definition: SkColor.h:61
static SkPath clip(const SkPath &path, const SkHalfPlane &plane)
Definition: SkPath.cpp:3892
VkDevice device
Definition: main.cc:53
double y
double x
void(* blit_mask_d32_a8)(SkPMColor *dst, size_t dstRB, const SkAlpha *mask, size_t maskRB, SkColor color, int w, int h)
int32_t height
const uint32_t fRowBytes
Definition: SkMask.h:43
@ kA8_Format
8bits per pixel mask (e.g. antialiasing)
Definition: SkMask.h:28
@ kLCD16_Format
565 alpha for r/g/b
Definition: SkMask.h:31
const void * getAddr(int x, int y) const
Definition: SkMask.cpp:112
const Format fFormat
Definition: SkMask.h:44

◆ blit_row_lcd16()

void blit_row_lcd16 ( SkPMColor  dst[],
const uint16_t  mask[],
SkColor  src,
int  width,
SkPMColor   
)

Definition at line 342 of file SkBlitter_ARGB32.cpp.

342 {
343 if (width <= 0) {
344 return;
345 }
346
347 int srcA = SkColorGetA(src);
348 int srcR = SkColorGetR(src);
349 int srcG = SkColorGetG(src);
350 int srcB = SkColorGetB(src);
351
352 srcA = SkAlpha255To256(srcA);
353
354 if (width >= 4) {
355 SkASSERT(((size_t)dst & 0x03) == 0);
356 while (((size_t)dst & 0x0F) != 0) {
357 *dst = blend_lcd16(srcA, srcR, srcG, srcB, *dst, *mask);
358 mask++;
359 dst++;
360 width--;
361 }
362
363 __m128i *d = reinterpret_cast<__m128i*>(dst);
364 // Set alpha to 0xFF and replicate source four times in SSE register.
365 __m128i src_sse = _mm_set1_epi32(SkPackARGB32(0xFF, srcR, srcG, srcB));
366 // Interleave with zeros to get two sets of four 16-bit values.
367 src_sse = _mm_unpacklo_epi8(src_sse, _mm_setzero_si128());
368 // Set srcA_sse to contain eight copies of srcA, padded with zero.
369 // src_sse=(0xFF, 0, sR, 0, sG, 0, sB, 0, 0xFF, 0, sR, 0, sG, 0, sB, 0)
370 __m128i srcA_sse = _mm_set1_epi16(srcA);
371 while (width >= 4) {
372 // Load four destination pixels into dst_sse.
373 __m128i dst_sse = _mm_load_si128(d);
374 // Load four 16-bit masks into lower half of mask_sse.
375 __m128i mask_sse = _mm_loadu_si64(mask);
376
377 // Check whether masks are equal to 0 and get the highest bit
378 // of each byte of result, if masks are all zero, we will get
379 // pack_cmp to 0xFFFF
380 int pack_cmp = _mm_movemask_epi8(_mm_cmpeq_epi16(mask_sse,
381 _mm_setzero_si128()));
382
383 // if mask pixels are not all zero, we will blend the dst pixels
384 if (pack_cmp != 0xFFFF) {
385 // Unpack 4 16bit mask pixels to
386 // mask_sse = (m0RGBLo, m0RGBHi, 0, 0, m1RGBLo, m1RGBHi, 0, 0,
387 // m2RGBLo, m2RGBHi, 0, 0, m3RGBLo, m3RGBHi, 0, 0)
388 mask_sse = _mm_unpacklo_epi16(mask_sse,
389 _mm_setzero_si128());
390
391 // Process 4 32bit dst pixels
392 __m128i result = blend_lcd16_sse2(src_sse, dst_sse, mask_sse, srcA_sse);
393 _mm_store_si128(d, result);
394 }
395
396 d++;
397 mask += 4;
398 width -= 4;
399 }
400
401 dst = reinterpret_cast<SkPMColor*>(d);
402 }
403
404 while (width > 0) {
405 *dst = blend_lcd16(srcA, srcR, srcG, srcB, *dst, *mask);
406 mask++;
407 dst++;
408 width--;
409 }
410 }
static __m128i blend_lcd16_sse2(__m128i &src, __m128i &dst, __m128i &mask, __m128i &srcA)
static SkPMColor blend_lcd16(int srcA, int srcR, int srcG, int srcB, SkPMColor dst, uint16_t mask)
static unsigned SkAlpha255To256(U8CPU alpha)
Definition: SkColorPriv.h:24
#define SkColorGetR(color)
Definition: SkColor.h:65
#define SkColorGetG(color)
Definition: SkColor.h:69
#define SkColorGetB(color)
Definition: SkColor.h:73
GAsyncResult * result
int32_t width

◆ blit_row_lcd16_opaque()

void blit_row_lcd16_opaque ( SkPMColor  dst[],
const uint16_t  mask[],
SkColor  src,
int  width,
SkPMColor  opaqueDst 
)

Definition at line 412 of file SkBlitter_ARGB32.cpp.

413 {
414 if (width <= 0) {
415 return;
416 }
417
418 int srcR = SkColorGetR(src);
419 int srcG = SkColorGetG(src);
420 int srcB = SkColorGetB(src);
421
422 if (width >= 4) {
423 SkASSERT(((size_t)dst & 0x03) == 0);
424 while (((size_t)dst & 0x0F) != 0) {
425 *dst = blend_lcd16_opaque(srcR, srcG, srcB, *dst, *mask, opaqueDst);
426 mask++;
427 dst++;
428 width--;
429 }
430
431 __m128i *d = reinterpret_cast<__m128i*>(dst);
432 // Set alpha to 0xFF and replicate source four times in SSE register.
433 __m128i src_sse = _mm_set1_epi32(SkPackARGB32(0xFF, srcR, srcG, srcB));
434 // Set srcA_sse to contain eight copies of srcA, padded with zero.
435 // src_sse=(0xFF, 0, sR, 0, sG, 0, sB, 0, 0xFF, 0, sR, 0, sG, 0, sB, 0)
436 src_sse = _mm_unpacklo_epi8(src_sse, _mm_setzero_si128());
437 while (width >= 4) {
438 // Load four destination pixels into dst_sse.
439 __m128i dst_sse = _mm_load_si128(d);
440 // Load four 16-bit masks into lower half of mask_sse.
441 __m128i mask_sse = _mm_loadu_si64(mask);
442
443 // Check whether masks are equal to 0 and get the highest bit
444 // of each byte of result, if masks are all zero, we will get
445 // pack_cmp to 0xFFFF
446 int pack_cmp = _mm_movemask_epi8(_mm_cmpeq_epi16(mask_sse,
447 _mm_setzero_si128()));
448
449 // if mask pixels are not all zero, we will blend the dst pixels
450 if (pack_cmp != 0xFFFF) {
451 // Unpack 4 16bit mask pixels to
452 // mask_sse = (m0RGBLo, m0RGBHi, 0, 0, m1RGBLo, m1RGBHi, 0, 0,
453 // m2RGBLo, m2RGBHi, 0, 0, m3RGBLo, m3RGBHi, 0, 0)
454 mask_sse = _mm_unpacklo_epi16(mask_sse,
455 _mm_setzero_si128());
456
457 // Process 4 32bit dst pixels
458 __m128i result = blend_lcd16_opaque_sse2(src_sse, dst_sse, mask_sse);
459 _mm_store_si128(d, result);
460 }
461
462 d++;
463 mask += 4;
464 width -= 4;
465 }
466
467 dst = reinterpret_cast<SkPMColor*>(d);
468 }
469
470 while (width > 0) {
471 *dst = blend_lcd16_opaque(srcR, srcG, srcB, *dst, *mask, opaqueDst);
472 mask++;
473 dst++;
474 width--;
475 }
476 }
static __m128i blend_lcd16_opaque_sse2(__m128i &src, __m128i &dst, __m128i &mask)
static SkPMColor blend_lcd16_opaque(int srcR, int srcG, int srcB, SkPMColor dst, uint16_t mask, SkPMColor opaqueDst)

◆ drive()

static void drive ( SkPMColor dst,
const SkPMColor src,
const uint8_t *  cov,
int  n,
U8x4(*)(U8x4, U8x4, U8x4 kernel 
)
static

Definition at line 1838 of file SkBlitter_ARGB32.cpp.

1839 {
1840
1841 auto apply = [kernel](U32 dst, U32 src, U8 cov) -> U32 {
1842 U8x4 cov_splat = skvx::shuffle<0,0,0,0, 1,1,1,1, 2,2,2,2, 3,3,3,3>(cov);
1843 return sk_bit_cast<U32>(kernel(sk_bit_cast<U8x4>(dst),
1844 sk_bit_cast<U8x4>(src),
1845 cov_splat));
1846 };
1847 while (n >= 4) {
1848 apply(U32::Load(dst), U32::Load(src), U8::Load(cov)).store(dst);
1849 dst += 4;
1850 src += 4;
1851 cov += 4;
1852 n -= 4;
1853 }
1854 while (n --> 0) {
1855 *dst = apply(U32{*dst}, U32{*src}, U8{*cov})[0];
1856 dst++;
1857 src++;
1858 cov++;
1859 }
1860}
static bool apply(Pass *pass, SkRecord *record)
V< uint8_t > U8
Definition: Transform_inl.h:19
V< uint32_t > U32
Definition: Transform_inl.h:17

◆ SkARGB32_Blit32()

static void SkARGB32_Blit32 ( const SkPixmap device,
const SkMask mask,
const SkIRect clip,
SkPMColor  srcColor 
)
static

Definition at line 1419 of file SkBlitter_ARGB32.cpp.

1420 {
1421 U8CPU alpha = SkGetPackedA32(srcColor);
1423 if (alpha != 255) {
1425 }
1427
1428 int x = clip.fLeft;
1429 int y = clip.fTop;
1430 int width = clip.width();
1431 int height = clip.height();
1432
1433 SkPMColor* dstRow = device.writable_addr32(x, y);
1434 const SkPMColor* srcRow = reinterpret_cast<const SkPMColor*>(mask.getAddr8(x, y));
1435
1436 do {
1437 proc(dstRow, srcRow, width, alpha);
1438 dstRow = (SkPMColor*)((char*)dstRow + device.rowBytes());
1439 srcRow = (const SkPMColor*)((const char*)srcRow + mask.fRowBytes);
1440 } while (--height != 0);
1441}
unsigned U8CPU
Definition: SkCPUTypes.h:18
static Proc32 Factory32(unsigned flags32)
void(* Proc32)(uint32_t dst[], const SkPMColor src[], int count, U8CPU alpha)
Definition: SkBlitRow.h:27
@ kSrcPixelAlpha_Flag32
Definition: SkBlitRow.h:18
@ kGlobalAlpha_Flag32
Definition: SkBlitRow.h:17
FlutterSemanticsFlag flags
const uint8_t * getAddr8(int x, int y) const
Definition: SkMask.h:79

◆ upscale_31_to_32()

static int upscale_31_to_32 ( int  value)
inlinestatic

Definition at line 33 of file SkBlitter_ARGB32.cpp.

33 {
34 SkASSERT((unsigned)value <= 31);
35 return value + (value >> 4);
36}
uint8_t value