23static const double kPi = 3.14159265358979323846264338327950288;
25class PlanGauss final {
27 explicit PlanGauss(
double sigma) {
28 auto possibleWindow =
static_cast<int>(
floor(sigma * 3 *
sqrt(2 *
kPi) / 4 + 0.5));
78 fSlidingWindow = 2 * fBorder + 1;
83 auto window3 = window2 *
window;
84 auto divisor = (
window & 1) == 1 ? window3 : window3 + window2;
86 fWeight =
static_cast<uint64_t
>(
round(1.0 / divisor * (1ull << 32)));
89 size_t bufferSize()
const {
return fPass0Size + fPass1Size + fPass2Size; }
91 int border()
const {
return fBorder; }
96 Scan(uint64_t weight,
int noChangeCount,
97 uint32_t* buffer0, uint32_t* buffer0End,
98 uint32_t* buffer1, uint32_t* buffer1End,
99 uint32_t* buffer2, uint32_t* buffer2End)
101 , fNoChangeCount{noChangeCount}
103 , fBuffer0End{buffer0End}
105 , fBuffer1End{buffer1End}
107 , fBuffer2End{buffer2End}
110 template <
typename AlphaIter>
void blur(
const AlphaIter srcBegin,
const AlphaIter srcEnd,
111 uint8_t*
dst,
int dstStride, uint8_t* dstEnd)
const {
112 auto buffer0Cursor = fBuffer0;
113 auto buffer1Cursor = fBuffer1;
114 auto buffer2Cursor = fBuffer2;
116 std::memset(fBuffer0, 0x00, (fBuffer2End - fBuffer0) *
sizeof(*fBuffer0));
123 for (AlphaIter
src = srcBegin;
src < srcEnd; ++
src,
dst += dstStride) {
124 uint32_t leadingEdge = *
src;
129 *
dst = this->finalScale(sum2);
131 sum2 -= *buffer2Cursor;
132 *buffer2Cursor = sum1;
133 buffer2Cursor = (buffer2Cursor + 1) < fBuffer2End ? buffer2Cursor + 1 : fBuffer2;
135 sum1 -= *buffer1Cursor;
136 *buffer1Cursor = sum0;
137 buffer1Cursor = (buffer1Cursor + 1) < fBuffer1End ? buffer1Cursor + 1 : fBuffer1;
139 sum0 -= *buffer0Cursor;
140 *buffer0Cursor = leadingEdge;
141 buffer0Cursor = (buffer0Cursor + 1) < fBuffer0End ? buffer0Cursor + 1 : fBuffer0;
145 for (
int i = 0;
i < fNoChangeCount;
i++) {
146 uint32_t leadingEdge = 0;
151 *
dst = this->finalScale(sum2);
153 sum2 -= *buffer2Cursor;
154 *buffer2Cursor = sum1;
155 buffer2Cursor = (buffer2Cursor + 1) < fBuffer2End ? buffer2Cursor + 1 : fBuffer2;
157 sum1 -= *buffer1Cursor;
158 *buffer1Cursor = sum0;
159 buffer1Cursor = (buffer1Cursor + 1) < fBuffer1End ? buffer1Cursor + 1 : fBuffer1;
161 sum0 -= *buffer0Cursor;
162 *buffer0Cursor = leadingEdge;
163 buffer0Cursor = (buffer0Cursor + 1) < fBuffer0End ? buffer0Cursor + 1 : fBuffer0;
169 std::memset(fBuffer0, 0, (fBuffer2End - fBuffer0) *
sizeof(*fBuffer0));
171 sum0 = sum1 = sum2 = 0;
173 uint8_t* dstCursor = dstEnd;
174 AlphaIter
src = srcEnd;
175 while (dstCursor >
dst) {
176 dstCursor -= dstStride;
177 uint32_t leadingEdge = *(--
src);
182 *dstCursor = this->finalScale(sum2);
184 sum2 -= *buffer2Cursor;
185 *buffer2Cursor = sum1;
186 buffer2Cursor = (buffer2Cursor + 1) < fBuffer2End ? buffer2Cursor + 1 : fBuffer2;
188 sum1 -= *buffer1Cursor;
189 *buffer1Cursor = sum0;
190 buffer1Cursor = (buffer1Cursor + 1) < fBuffer1End ? buffer1Cursor + 1 : fBuffer1;
192 sum0 -= *buffer0Cursor;
193 *buffer0Cursor = leadingEdge;
194 buffer0Cursor = (buffer0Cursor + 1) < fBuffer0End ? buffer0Cursor + 1 : fBuffer0;
199 inline static constexpr uint64_t
kHalf =
static_cast<uint64_t
>(1) << 31;
201 uint8_t finalScale(uint32_t sum)
const {
202 return SkTo<uint8_t>((fWeight * sum +
kHalf) >> 32);
208 uint32_t* fBuffer0End;
210 uint32_t* fBuffer1End;
212 uint32_t* fBuffer2End;
215 Scan makeBlurScan(
int width, uint32_t*
buffer)
const {
216 uint32_t* buffer0, *buffer0End, *buffer1, *buffer1End, *buffer2, *buffer2End;
218 buffer0End = buffer1 = buffer0 + fPass0Size;
219 buffer1End = buffer2 = buffer1 + fPass1Size;
220 buffer2End = buffer2 + fPass2Size;
221 int noChangeCount = fSlidingWindow >
width ? fSlidingWindow -
width : 0;
224 fWeight, noChangeCount,
227 buffer2, buffer2End);
255 : fSigmaW{
SkTPin(sigmaW, 0.0, 135.0)}
256 , fSigmaH{
SkTPin(sigmaH, 0.0, 135.0)}
263 return (3 * fSigmaW <= 1) && (3 * fSigmaH <= 1);
271 uint8_t masks = *from;
273 a8[
i] = (masks >> (7 -
i)) & 1 ? 0xFF
281 unsigned rgb =
reinterpret_cast<const uint16_t*
>(from)[
i],
285 a8[
i] = (r + g +
b) / 3;
291 uint32_t
rgba =
reinterpret_cast<const uint32_t*
>(from)[
i];
304 uint8_t tmp[8] = {0,0,0,0, 0,0,0,0};
306 toA8(tmp, from,
width);
308 }
else if (
width < 8) {
332static constexpr uint16_t
_____ = 0u;
333static constexpr uint16_t
kHalf = 0x80u;
419 auto v1 =
mulhi(s0, g1);
420 auto v0 =
mulhi(s0, g0);
426 *d0 +=
fp88{
_____, v0[0], v0[1], v0[2], v0[3], v0[4], v0[5], v0[6]};
430 *d0 +=
fp88{
_____,
_____, v1[0], v1[1], v1[2], v1[3], v1[4], v1[5]};
439 auto v0 =
mulhi(s0, g0);
440 auto v1 =
mulhi(s0, g1);
447 *d0 +=
fp88{
_____, v1[0], v1[1], v1[2], v1[3], v1[4], v1[5], v1[6]};
451 *d0 +=
fp88{
_____,
_____, v0[0], v0[1], v0[2], v0[3], v0[4], v0[5]};
467 auto v0 =
mulhi(s0, g0);
468 auto v1 =
mulhi(s0, g1);
470 auto v3 =
mulhi(s0, g3);
480 *d0 +=
fp88{
_____,
_____, v1[0], v1[1], v1[2], v1[3], v1[4], v1[5]};
497 *d8 +=
fp88{v3[2], v3[3], v3[4], v3[5], v3[6], v3[7],
_____,
_____};
504 auto v0 =
mulhi(s0, g0);
505 auto v1 =
mulhi(s0, g1);
507 auto v3 =
mulhi(s0, g3);
514 *d0 +=
fp88{
_____, v3[0], v3[1], v3[2], v3[3], v3[4], v3[5], v3[6]};
539 *d8 +=
fp88{v3[1], v3[2], v3[3], v3[4], v3[5], v3[6], v3[7],
_____};
551 const uint8_t*
src,
int srcW,
552 uint8_t*
dst,
int dstW) {
558 for (;
x <= srcW - 8;
x += 8) {
559 blur(
load(
src, 8,
nullptr), g0, g1, g2, g3, g4, &d0, &d8);
571 int srcTail = srcW -
x;
574 blur(
load(
src, srcTail,
nullptr), g0, g1, g2, g3, g4, &d0, &d8);
585 int dstTail = dstW -
x;
594 const uint8_t*
src,
size_t srcStride,
int srcW,
595 uint8_t*
dst,
size_t dstStride,
int dstW,
int dstH) {
604 for (
int y = 0;
y < dstH;
y++) {
612 const uint8_t*
src,
size_t srcStride,
int srcW,
613 uint8_t*
dst,
size_t dstStride,
int dstW,
int dstH) {
633 SkASSERTF(
false,
"The radius %d is not handled\n", radius);
691 auto v0 =
mulhi(s0, g0);
692 auto v1 =
mulhi(s0, g1);
694 fp88 answer = *d01 + v1;
705 auto v0 =
mulhi(s0, g0);
706 auto v1 =
mulhi(s0, g1);
722 auto v0 =
mulhi(s0, g0);
723 auto v1 =
mulhi(s0, g1);
725 auto v3 =
mulhi(s0, g3);
727 fp88 answer = *d01 + v3;
742 auto v0 =
mulhi(s0, g0);
743 auto v1 =
mulhi(s0, g1);
745 auto v3 =
mulhi(s0, g3);
768 const uint8_t*
src,
size_t srcRB,
int srcH,
769 uint8_t*
dst,
size_t dstRB) {
773 auto flush = [&](uint8_t* to,
const fp88& v0,
const fp88& v1) {
780 for (
int y = 0;
y < srcH;
y += 1) {
784 &d01, &d12, &d23, &d34, &d45, &d56, &d67, &d78);
791 dst = flush(
dst, d01, d12);
794 dst = flush(
dst, d23, d34);
797 dst = flush(
dst, d45, d56);
800 flush(
dst, d67, d78);
806 BlurY blur,
int radius, uint16_t *gauss,
807 const uint8_t *
src,
size_t srcRB,
int srcW,
int srcH,
808 uint8_t *
dst,
size_t dstRB) {
817 for (;
x <= srcW - 8;
x += 8) {
826 int xTail = srcW -
x;
836 int radius, uint16_t* gauss,
837 const uint8_t*
src,
size_t srcRB,
int srcW,
int srcH,
838 uint8_t*
dst,
size_t dstRB) {
843 src, srcRB, srcW, srcH,
849 src, srcRB, srcW, srcH,
855 src, srcRB, srcW, srcH,
861 src, srcRB, srcW, srcH,
866 SkASSERTF(
false,
"The radius %d is not handled\n", radius);
872 SkASSERT(0.01 <= sigmaX && sigmaX < 2);
873 SkASSERT(0.01 <= sigmaY && sigmaY < 2);
878 int radiusX = filterX.
radius(),
879 radiusY = filterY.radius();
881 SkASSERT(radiusX <= 4 && radiusY <= 4);
883 auto prepareGauss = [](
const SkGaussFilter& filter, uint16_t* factors) {
885 for (
double d : filter) {
886 factors[
i++] =
static_cast<uint16_t
>(
round(
d * (1 << 16)));
893 prepareGauss(filterX, gaussFactorsX);
894 prepareGauss(filterY, gaussFactorsY);
897 if (
src.fImage ==
nullptr) {
898 return {SkTo<int32_t>(radiusX), SkTo<int32_t>(radiusY)};
900 if (
dst->fImage ==
nullptr) {
901 dst->bounds().setEmpty();
905 int srcW =
src.fBounds.width(),
906 srcH =
src.fBounds.height();
908 int dstW =
dst->fBounds.width(),
909 dstH =
dst->fBounds.height();
911 size_t srcRB =
src.fRowBytes,
912 dstRB =
dst->fRowBytes;
917 switch (
src.fFormat) {
920 radiusY, gaussFactorsY,
921 src.fImage, srcRB, srcW, srcH,
922 dst->image() + radiusX, dstRB);
926 radiusY, gaussFactorsY,
927 src.fImage, srcRB, srcW, srcH,
928 dst->image() + radiusX, dstRB);
932 radiusY, gaussFactorsY,
933 src.fImage, srcRB, srcW, srcH,
934 dst->image() + radiusX, dstRB);
938 src.fImage, srcRB, srcW, srcH,
939 dst->image() + radiusX, dstRB);
947 dst->fImage + radiusX, dstRB, srcW,
948 dst->image(), dstRB, dstW, dstH);
950 return {radiusX, radiusY};
957 if (fSigmaW < 2.0 && fSigmaH < 2.0) {
964 PlanGauss planW(fSigmaW);
965 PlanGauss planH(fSigmaH);
967 int borderW = planW.border(),
968 borderH = planH.border();
969 SkASSERT(borderH >= 0 && borderW >= 0);
972 if (
src.fImage ==
nullptr) {
973 return {SkTo<int32_t>(borderW), SkTo<int32_t>(borderH)};
975 if (
dst->fImage ==
nullptr) {
976 dst->bounds().setEmpty();
980 int srcW =
src.fBounds.width(),
981 srcH =
src.fBounds.height(),
982 dstW =
dst->fBounds.width(),
983 dstH =
dst->fBounds.height();
984 SkASSERT(srcW >= 0 && srcH >= 0 && dstW >= 0 && dstH >= 0);
986 auto bufferSize =
std::max(planW.bufferSize(), planH.bufferSize());
1000 const PlanGauss::Scan& scanW = planW.makeBlurScan(srcW,
buffer);
1001 switch (
src.fFormat) {
1003 const uint8_t* bwStart =
src.fImage;
1006 for (
int y = 0; y < srcH; ++y, start >>=
src.fRowBytes,
end >>=
src.fRowBytes) {
1007 auto tmpStart = &tmp[
y];
1008 scanW.blur(
start,
end, tmpStart, tmpW, tmpStart + tmpW * tmpH);
1012 const uint8_t* a8Start =
src.fImage;
1015 for (
int y = 0; y < srcH; ++y, start >>=
src.fRowBytes,
end >>=
src.fRowBytes) {
1016 auto tmpStart = &tmp[
y];
1017 scanW.blur(
start,
end, tmpStart, tmpW, tmpStart + tmpW * tmpH);
1021 const uint32_t* argbStart =
reinterpret_cast<const uint32_t*
>(
src.fImage);
1024 for (
int y = 0; y < srcH; ++y, start >>=
src.fRowBytes,
end >>=
src.fRowBytes) {
1025 auto tmpStart = &tmp[
y];
1026 scanW.blur(
start,
end, tmpStart, tmpW, tmpStart + tmpW * tmpH);
1030 const uint16_t* lcdStart =
reinterpret_cast<const uint16_t*
>(
src.fImage);
1033 for (
int y = 0; y < srcH; ++y, start >>=
src.fRowBytes,
end >>=
src.fRowBytes) {
1034 auto tmpStart = &tmp[
y];
1035 scanW.blur(
start,
end, tmpStart, tmpW, tmpStart + tmpW * tmpH);
1044 const PlanGauss::Scan& scanH = planH.makeBlurScan(tmpW,
buffer);
1045 for (
int y = 0;
y < tmpH;
y++) {
1046 auto tmpStart = &tmp[
y * tmpW];
1047 auto dstStart = &
dst->image()[
y];
1049 scanH.blur(tmpStart, tmpStart + tmpW,
1050 dstStart,
dst->fRowBytes, dstStart +
dst->fRowBytes * dstH);
1053 return {SkTo<int32_t>(borderW), SkTo<int32_t>(borderH)};
static void round(SkPoint *p)
static const uint32_t rgba[kNumPixels]
#define SK_ABORT(message,...)
#define SkASSERTF(cond, fmt,...)
#define SkPacked16ToG32(c)
#define SkPacked16ToR32(c)
#define SkPacked16ToB32(c)
#define SkGetPackedA32(packed)
static SkV4 v4(SkV3 v, SkScalar w)
static fp88 blur_y_radius_2(const fp88 &s0, const fp88 &g0, const fp88 &g1, const fp88 &g2, const fp88 &, const fp88 &, fp88 *d01, fp88 *d12, fp88 *d23, fp88 *d34, fp88 *, fp88 *, fp88 *, fp88 *)
static fp88 blur_y_radius_4(const fp88 &s0, const fp88 &g0, const fp88 &g1, const fp88 &g2, const fp88 &g3, const fp88 &g4, fp88 *d01, fp88 *d12, fp88 *d23, fp88 *d34, fp88 *d45, fp88 *d56, fp88 *d67, fp88 *d78)
static constexpr uint16_t kHalf
decltype(blur_y_radius_1) BlurY
static void direct_blur_y(ToA8 toA8, const int strideOf8, int radius, uint16_t *gauss, const uint8_t *src, size_t srcRB, int srcW, int srcH, uint8_t *dst, size_t dstRB)
static void blur_column(ToA8 toA8, BlurY blur, int radius, int width, const fp88 &g0, const fp88 &g1, const fp88 &g2, const fp88 &g3, const fp88 &g4, const uint8_t *src, size_t srcRB, int srcH, uint8_t *dst, size_t dstRB)
static fp88 blur_y_radius_1(const fp88 &s0, const fp88 &g0, const fp88 &g1, const fp88 &, const fp88 &, const fp88 &, fp88 *d01, fp88 *d12, fp88 *, fp88 *, fp88 *, fp88 *, fp88 *, fp88 *)
static fp88 blur_y_radius_3(const fp88 &s0, const fp88 &g0, const fp88 &g1, const fp88 &g2, const fp88 &g3, const fp88 &, fp88 *d01, fp88 *d12, fp88 *d23, fp88 *d34, fp88 *d45, fp88 *d56, fp88 *, fp88 *)
static SkIPoint small_blur(double sigmaX, double sigmaY, const SkMask &src, SkMaskBuilder *dst)
static void bw_to_a8(uint8_t *a8, const uint8_t *from, int width)
static void direct_blur_x(int radius, uint16_t *gauss, const uint8_t *src, size_t srcStride, int srcW, uint8_t *dst, size_t dstStride, int dstW, int dstH)
static void blur_x_radius_3(const fp88 &s0, const fp88 &g0, const fp88 &g1, const fp88 &g2, const fp88 &g3, const fp88 &, fp88 *d0, fp88 *d8)
static void blur_x_rect(BlurX blur, uint16_t *gauss, const uint8_t *src, size_t srcStride, int srcW, uint8_t *dst, size_t dstStride, int dstW, int dstH)
decltype(blur_x_radius_1) BlurX
static void blur_x_radius_4(const fp88 &s0, const fp88 &g0, const fp88 &g1, const fp88 &g2, const fp88 &g3, const fp88 &g4, fp88 *d0, fp88 *d8)
static void blur_y_rect(ToA8 toA8, const int strideOf8, BlurY blur, int radius, uint16_t *gauss, const uint8_t *src, size_t srcRB, int srcW, int srcH, uint8_t *dst, size_t dstRB)
static void argb32_to_a8(uint8_t *a8, const uint8_t *from, int width)
skvx::Vec< 8, uint16_t > fp88
static void blur_x_radius_2(const fp88 &s0, const fp88 &g0, const fp88 &g1, const fp88 &g2, const fp88 &, const fp88 &, fp88 *d0, fp88 *d8)
static fp88 load(const uint8_t *from, int width, ToA8 *toA8)
static void lcd_to_a8(uint8_t *a8, const uint8_t *from, int width)
static void store(uint8_t *to, const fp88 &v, int width)
static void blur_row(BlurX blur, const fp88 &g0, const fp88 &g1, const fp88 &g2, const fp88 &g3, const fp88 &g4, const uint8_t *src, int srcW, uint8_t *dst, int dstW)
static constexpr uint16_t _____
static void blur_x_radius_1(const fp88 &s0, const fp88 &g0, const fp88 &g1, const fp88 &, const fp88 &, const fp88 &, fp88 *d0, fp88 *d8)
static constexpr const T & SkTPin(const T &x, const T &lo, const T &hi)
T * makeArrayDefault(size_t count)
static constexpr int kGaussArrayMax
SkIPoint blur(const SkMask &src, SkMaskBuilder *dst) const
SkMaskBlurFilter(double sigmaW, double sigmaH)
VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE auto & d
static float max(float r, float g, float b)
static float min(float r, float g, float b)
DEF_SWITCHES_START aot vmservice shared library Name of the *so containing AOT compiled Dart assets for launching the service isolate vm snapshot The VM snapshot data that will be memory mapped as read only SnapshotAssetPath must be present isolate snapshot The isolate snapshot data that will be memory mapped as read only SnapshotAssetPath must be present cache dir Path to the cache directory This is different from the persistent_cache_path in embedder which is used for Skia shader cache icu native lib Path to the library file that exports the ICU data vm service The hostname IP address on which the Dart VM Service should be served If not defaults to or::depending on whether ipv6 is specified vm service A custom Dart VM Service port The default is to pick a randomly available open port disable vm Disable the Dart VM Service The Dart VM Service is never available in release mode disable vm service Disable mDNS Dart VM Service publication Bind to the IPv6 localhost address for the Dart VM Service Ignored if vm service host is set endless trace buffer
SIN Vec< N, uint16_t > mulhi(const Vec< N, uint16_t > &x, const Vec< N, uint16_t > &y)
SIN Vec< N, float > sqrt(const Vec< N, float > &x)
SIN Vec< N, float > floor(const Vec< N, float > &x)
static SkMaskBuilder PrepareDestination(int radiusX, int radiusY, const SkMask &src)
@ kA8_Format
8bits per pixel mask (e.g. antialiasing)
@ kLCD16_Format
565 alpha for r/g/b
@ kARGB32_Format
SkPMColor.
@ kBW_Format
1bit per pixel mask (e.g. monochrome)
static SKVX_ALWAYS_INLINE Vec Load(const void *ptr)