Flutter Engine
The Flutter Engine
Sk4px.h
Go to the documentation of this file.
1/*
2 * Copyright 2015 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#ifndef Sk4px_DEFINED
9#define Sk4px_DEFINED
10
13#include "src/base/SkVx.h"
14
15// 1, 2 or 4 SkPMColors, generally vectorized.
16class Sk4px {
17public:
18 Sk4px(const skvx::byte16& v) : fV(v) {}
19
21 skvx::uint4 splat(c);
22
23 Sk4px v;
24 memcpy((void*)&v, &splat, 16);
25 return v;
26 }
27
28 // RGBA rgba XYZW xyzw -> AAAA aaaa WWWW wwww
29 Sk4px alphas() const {
30 static_assert(SK_A32_SHIFT == 24, "This method assumes little-endian.");
31 return Sk4px(skvx::shuffle<3,3,3,3, 7,7,7,7, 11,11,11,11, 15,15,15,15>(fV));
32 }
33 Sk4px inv() const { return Sk4px(skvx::byte16(255) - fV); }
34
35 // When loading or storing fewer than 4 SkPMColors, we use the low lanes.
36 static Sk4px Load4(const SkPMColor px[4]) {
37 Sk4px v;
38 memcpy((void*)&v, px, 16);
39 return v;
40 }
41 static Sk4px Load2(const SkPMColor px[2]) {
42 Sk4px v;
43 memcpy((void*)&v, px, 8);
44 return v;
45 }
46 static Sk4px Load1(const SkPMColor px[1]) {
47 Sk4px v;
48 memcpy((void*)&v, px, 4);
49 return v;
50 }
51
52 // Ditto for Alphas... Load2Alphas fills the low two lanes of Sk4px.
53 // AaXx -> AAAA aaaa XXXX xxxx
54 static Sk4px Load4Alphas(const SkAlpha alphas[4]) {
56 return Sk4px(skvx::shuffle<0,0,0,0, 1,1,1,1, 2,2,2,2, 3,3,3,3>(a));
57 }
58 // Aa -> AAAA aaaa ???? ????
59 static Sk4px Load2Alphas(const SkAlpha alphas[2]) {
61 return Sk4px(join(skvx::shuffle<0,0,0,0, 1,1,1,1>(a), skvx::byte8()));
62 }
63
64 void store4(SkPMColor px[4]) const { memcpy(px, this, 16); }
65 void store2(SkPMColor px[2]) const { memcpy(px, this, 8); }
66 void store1(SkPMColor px[1]) const { memcpy(px, this, 4); }
67
68 // 1, 2, or 4 SkPMColors with 16-bit components.
69 // This is most useful as the result of a multiply, e.g. from mulWiden().
70 class Wide {
71 public:
72 Wide(const skvx::Vec<16, uint16_t>& v) : fV(v) {}
73
74 // Rounds, i.e. (x+127) / 255.
75 Sk4px div255() const { return Sk4px(skvx::div255(fV)); }
76
77 Wide operator * (const Wide& o) const { return Wide(fV * o.fV); }
78 Wide operator + (const Wide& o) const { return Wide(fV + o.fV); }
79 Wide operator - (const Wide& o) const { return Wide(fV - o.fV); }
80 Wide operator >> (int bits) const { return Wide(fV >> bits); }
81 Wide operator << (int bits) const { return Wide(fV << bits); }
82
83 private:
85 };
86
87 // Widen 8-bit values to low 8-bits of 16-bit lanes.
88 Wide widen() const { return Wide(skvx::cast<uint16_t>(fV)); }
89 // 8-bit x 8-bit -> 16-bit components.
90 Wide mulWiden(const skvx::byte16& o) const { return Wide(mull(fV, o)); }
91
92 // The only 8-bit multiply we use is 8-bit x 8-bit -> 16-bit. Might as well make it pithy.
93 Wide operator * (const Sk4px& o) const { return this->mulWiden(o.fV); }
94
95 Sk4px operator + (const Sk4px& o) const { return Sk4px(fV + o.fV); }
96 Sk4px operator - (const Sk4px& o) const { return Sk4px(fV - o.fV); }
97 Sk4px operator < (const Sk4px& o) const { return Sk4px(fV < o.fV); }
98 Sk4px operator & (const Sk4px& o) const { return Sk4px(fV & o.fV); }
99 Sk4px thenElse(const Sk4px& t, const Sk4px& e) const {
100 return Sk4px(if_then_else(fV, t.fV, e.fV));
101 }
102
103 // Generally faster than (*this * o).div255().
104 // May be incorrect by +-1, but is always exactly correct when *this or o is 0 or 255.
105 Sk4px approxMulDiv255(const Sk4px& o) const {
106 return Sk4px(approx_scale(fV, o.fV));
107 }
108
109 Sk4px saturatedAdd(const Sk4px& o) const {
110 return Sk4px(saturated_add(fV, o.fV));
111 }
112
113 // A generic driver that maps fn over a src array into a dst array.
114 // fn should take an Sk4px (4 src pixels) and return an Sk4px (4 dst pixels).
115 template <typename Fn>
116 [[maybe_unused]] static void MapSrc(int n, SkPMColor* dst, const SkPMColor* src, const Fn& fn) {
117 SkASSERT(dst);
118 SkASSERT(src);
119 // This looks a bit odd, but it helps loop-invariant hoisting across different calls to fn.
120 // Basically, we need to make sure we keep things inside a single loop.
121 while (n > 0) {
122 if (n >= 8) {
123 Sk4px dst0 = fn(Load4(src+0)),
124 dst4 = fn(Load4(src+4));
125 dst0.store4(dst+0);
126 dst4.store4(dst+4);
127 dst += 8; src += 8; n -= 8;
128 continue; // Keep our stride at 8 pixels as long as possible.
129 }
130 SkASSERT(n <= 7);
131 if (n >= 4) {
132 fn(Load4(src)).store4(dst);
133 dst += 4; src += 4; n -= 4;
134 }
135 if (n >= 2) {
136 fn(Load2(src)).store2(dst);
137 dst += 2; src += 2; n -= 2;
138 }
139 if (n >= 1) {
140 fn(Load1(src)).store1(dst);
141 }
142 break;
143 }
144 }
145
146 // As above, but with dst4' = fn(dst4, src4).
147 template <typename Fn>
148 [[maybe_unused]] static void MapDstSrc(int n, SkPMColor* dst, const SkPMColor* src,
149 const Fn& fn) {
150 SkASSERT(dst);
151 SkASSERT(src);
152 while (n > 0) {
153 if (n >= 8) {
154 Sk4px dst0 = fn(Load4(dst+0), Load4(src+0)),
155 dst4 = fn(Load4(dst+4), Load4(src+4));
156 dst0.store4(dst+0);
157 dst4.store4(dst+4);
158 dst += 8; src += 8; n -= 8;
159 continue; // Keep our stride at 8 pixels as long as possible.
160 }
161 SkASSERT(n <= 7);
162 if (n >= 4) {
163 fn(Load4(dst), Load4(src)).store4(dst);
164 dst += 4; src += 4; n -= 4;
165 }
166 if (n >= 2) {
167 fn(Load2(dst), Load2(src)).store2(dst);
168 dst += 2; src += 2; n -= 2;
169 }
170 if (n >= 1) {
171 fn(Load1(dst), Load1(src)).store1(dst);
172 }
173 break;
174 }
175 }
176
177 // As above, but with dst4' = fn(dst4, alpha4).
178 template <typename Fn>
179 [[maybe_unused]] static void MapDstAlpha(int n, SkPMColor* dst, const SkAlpha* a,
180 const Fn& fn) {
181 SkASSERT(dst);
182 SkASSERT(a);
183 while (n > 0) {
184 if (n >= 8) {
185 Sk4px dst0 = fn(Load4(dst+0), Load4Alphas(a+0)),
186 dst4 = fn(Load4(dst+4), Load4Alphas(a+4));
187 dst0.store4(dst+0);
188 dst4.store4(dst+4);
189 dst += 8; a += 8; n -= 8;
190 continue; // Keep our stride at 8 pixels as long as possible.
191 }
192 SkASSERT(n <= 7);
193 if (n >= 4) {
194 fn(Load4(dst), Load4Alphas(a)).store4(dst);
195 dst += 4; a += 4; n -= 4;
196 }
197 if (n >= 2) {
198 fn(Load2(dst), Load2Alphas(a)).store2(dst);
199 dst += 2; a += 2; n -= 2;
200 }
201 if (n >= 1) {
202 fn(Load1(dst), skvx::byte16(*a)).store1(dst);
203 }
204 break;
205 }
206 }
207
208 // As above, but with dst4' = fn(dst4, src4, alpha4).
209 template <typename Fn>
210 [[maybe_unused]] static void MapDstSrcAlpha(int n, SkPMColor* dst, const SkPMColor* src,
211 const SkAlpha* a, const Fn& fn) {
212 SkASSERT(dst);
213 SkASSERT(src);
214 SkASSERT(a);
215 while (n > 0) {
216 if (n >= 8) {
217 Sk4px dst0 = fn(Load4(dst+0), Load4(src+0), Load4Alphas(a+0)),
218 dst4 = fn(Load4(dst+4), Load4(src+4), Load4Alphas(a+4));
219 dst0.store4(dst+0);
220 dst4.store4(dst+4);
221 dst += 8; src += 8; a += 8; n -= 8;
222 continue; // Keep our stride at 8 pixels as long as possible.
223 }
224 SkASSERT(n <= 7);
225 if (n >= 4) {
226 fn(Load4(dst), Load4(src), Load4Alphas(a)).store4(dst);
227 dst += 4; src += 4; a += 4; n -= 4;
228 }
229 if (n >= 2) {
230 fn(Load2(dst), Load2(src), Load2Alphas(a)).store2(dst);
231 dst += 2; src += 2; a += 2; n -= 2;
232 }
233 if (n >= 1) {
234 fn(Load1(dst), Load1(src), skvx::byte16(*a)).store1(dst);
235 }
236 break;
237 }
238 }
239
240private:
241 Sk4px() = default;
242
243 skvx::byte16 fV;
244};
245
246static_assert(sizeof(Sk4px) == sizeof(skvx::byte16));
247static_assert(alignof(Sk4px) == alignof(skvx::byte16));
248
249#endif // Sk4px_DEFINED
#define SkASSERT(cond)
Definition: SkAssert.h:116
uint8_t SkAlpha
Definition: SkColor.h:26
uint32_t SkPMColor
Definition: SkColor.h:205
#define SK_A32_SHIFT
Definition: SkTypes.h:54
SI T if_then_else(C cond, T t, T e)
Wide(const skvx::Vec< 16, uint16_t > &v)
Definition: Sk4px.h:72
Wide operator-(const Wide &o) const
Definition: Sk4px.h:79
Wide operator<<(int bits) const
Definition: Sk4px.h:81
Wide operator>>(int bits) const
Definition: Sk4px.h:80
Wide operator*(const Wide &o) const
Definition: Sk4px.h:77
Wide operator+(const Wide &o) const
Definition: Sk4px.h:78
Sk4px div255() const
Definition: Sk4px.h:75
Definition: Sk4px.h:16
Sk4px approxMulDiv255(const Sk4px &o) const
Definition: Sk4px.h:105
void store2(SkPMColor px[2]) const
Definition: Sk4px.h:65
static Sk4px Load2Alphas(const SkAlpha alphas[2])
Definition: Sk4px.h:59
void store1(SkPMColor px[1]) const
Definition: Sk4px.h:66
static void MapSrc(int n, SkPMColor *dst, const SkPMColor *src, const Fn &fn)
Definition: Sk4px.h:116
static void MapDstSrc(int n, SkPMColor *dst, const SkPMColor *src, const Fn &fn)
Definition: Sk4px.h:148
static Sk4px Load4Alphas(const SkAlpha alphas[4])
Definition: Sk4px.h:54
Sk4px thenElse(const Sk4px &t, const Sk4px &e) const
Definition: Sk4px.h:99
Sk4px saturatedAdd(const Sk4px &o) const
Definition: Sk4px.h:109
static Sk4px Load2(const SkPMColor px[2])
Definition: Sk4px.h:41
Sk4px operator-(const Sk4px &o) const
Definition: Sk4px.h:96
Wide operator*(const Sk4px &o) const
Definition: Sk4px.h:93
Wide widen() const
Definition: Sk4px.h:88
void store4(SkPMColor px[4]) const
Definition: Sk4px.h:64
static Sk4px DupPMColor(SkPMColor c)
Definition: Sk4px.h:20
Sk4px(const skvx::byte16 &v)
Definition: Sk4px.h:18
static void MapDstAlpha(int n, SkPMColor *dst, const SkAlpha *a, const Fn &fn)
Definition: Sk4px.h:179
Wide mulWiden(const skvx::byte16 &o) const
Definition: Sk4px.h:90
Sk4px operator&(const Sk4px &o) const
Definition: Sk4px.h:98
Sk4px alphas() const
Definition: Sk4px.h:29
Sk4px inv() const
Definition: Sk4px.h:33
static Sk4px Load1(const SkPMColor px[1])
Definition: Sk4px.h:46
static Sk4px Load4(const SkPMColor px[4])
Definition: Sk4px.h:36
static void MapDstSrcAlpha(int n, SkPMColor *dst, const SkPMColor *src, const SkAlpha *a, const Fn &fn)
Definition: Sk4px.h:210
Sk4px operator<(const Sk4px &o) const
Definition: Sk4px.h:97
Sk4px operator+(const Sk4px &o) const
Definition: Sk4px.h:95
struct MyStruct a[10]
dst
Definition: cp.py:12
SIN Vec< N, uint16_t > mull(const Vec< N, uint8_t > &x, const Vec< N, uint8_t > &y)
Definition: SkVx.h:906
SIN Vec< N, uint8_t > div255(const Vec< N, uint16_t > &x)
Definition: SkVx.h:818
SINT std::enable_if_t< std::is_unsigned_v< T >, Vec< N, T > > saturated_add(const Vec< N, T > &x, const Vec< N, T > &y)
Definition: SkVx.h:833
SIN Vec< N, uint8_t > approx_scale(const Vec< N, uint8_t > &x, const Vec< N, uint8_t > &y)
Definition: SkVx.h:824
static SkString join(const CommandLineFlags::StringArray &)
Definition: skpbench.cpp:741
Definition: SkVx.h:83
static SKVX_ALWAYS_INLINE Vec Load(const void *ptr)
Definition: SkVx.h:109