23static const double kPi = 3.14159265358979323846264338327950288;
25class PlanGauss final {
27 explicit PlanGauss(
double sigma) {
28 auto possibleWindow =
static_cast<int>(
floor(sigma * 3 *
sqrt(2 * kPi) / 4 + 0.5));
29 auto window = std::max(1, possibleWindow);
78 fSlidingWindow = 2 * fBorder + 1;
83 auto window3 = window2 *
window;
84 auto divisor = (
window & 1) == 1 ? window3 : window3 + window2;
86 fWeight =
static_cast<uint64_t
>(
round(1.0 / divisor * (1ull << 32)));
89 size_t bufferSize()
const {
return fPass0Size + fPass1Size + fPass2Size; }
91 int border()
const {
return fBorder; }
96 Scan(uint64_t weight,
int noChangeCount,
97 uint32_t* buffer0, uint32_t* buffer0End,
98 uint32_t* buffer1, uint32_t* buffer1End,
99 uint32_t* buffer2, uint32_t* buffer2End)
101 , fNoChangeCount{noChangeCount}
103 , fBuffer0End{buffer0End}
105 , fBuffer1End{buffer1End}
107 , fBuffer2End{buffer2End}
110 template <
typename AlphaIter>
void blur(
const AlphaIter srcBegin,
const AlphaIter srcEnd,
111 uint8_t* dst,
int dstStride, uint8_t* dstEnd)
const {
112 auto buffer0Cursor = fBuffer0;
113 auto buffer1Cursor = fBuffer1;
114 auto buffer2Cursor = fBuffer2;
116 std::memset(fBuffer0, 0x00, (fBuffer2End - fBuffer0) *
sizeof(*fBuffer0));
123 for (AlphaIter src = srcBegin;
src < srcEnd; ++
src,
dst += dstStride) {
124 uint32_t leadingEdge = *
src;
129 *
dst = this->finalScale(sum2);
131 sum2 -= *buffer2Cursor;
132 *buffer2Cursor = sum1;
133 buffer2Cursor = (buffer2Cursor + 1) < fBuffer2End ? buffer2Cursor + 1 : fBuffer2;
135 sum1 -= *buffer1Cursor;
136 *buffer1Cursor = sum0;
137 buffer1Cursor = (buffer1Cursor + 1) < fBuffer1End ? buffer1Cursor + 1 : fBuffer1;
139 sum0 -= *buffer0Cursor;
140 *buffer0Cursor = leadingEdge;
141 buffer0Cursor = (buffer0Cursor + 1) < fBuffer0End ? buffer0Cursor + 1 : fBuffer0;
145 for (
int i = 0; i < fNoChangeCount; i++) {
146 uint32_t leadingEdge = 0;
151 *
dst = this->finalScale(sum2);
153 sum2 -= *buffer2Cursor;
154 *buffer2Cursor = sum1;
155 buffer2Cursor = (buffer2Cursor + 1) < fBuffer2End ? buffer2Cursor + 1 : fBuffer2;
157 sum1 -= *buffer1Cursor;
158 *buffer1Cursor = sum0;
159 buffer1Cursor = (buffer1Cursor + 1) < fBuffer1End ? buffer1Cursor + 1 : fBuffer1;
161 sum0 -= *buffer0Cursor;
162 *buffer0Cursor = leadingEdge;
163 buffer0Cursor = (buffer0Cursor + 1) < fBuffer0End ? buffer0Cursor + 1 : fBuffer0;
169 std::memset(fBuffer0, 0, (fBuffer2End - fBuffer0) *
sizeof(*fBuffer0));
171 sum0 = sum1 = sum2 = 0;
173 uint8_t* dstCursor = dstEnd;
174 AlphaIter
src = srcEnd;
175 while (dstCursor > dst) {
176 dstCursor -= dstStride;
177 uint32_t leadingEdge = *(--
src);
182 *dstCursor = this->finalScale(sum2);
184 sum2 -= *buffer2Cursor;
185 *buffer2Cursor = sum1;
186 buffer2Cursor = (buffer2Cursor + 1) < fBuffer2End ? buffer2Cursor + 1 : fBuffer2;
188 sum1 -= *buffer1Cursor;
189 *buffer1Cursor = sum0;
190 buffer1Cursor = (buffer1Cursor + 1) < fBuffer1End ? buffer1Cursor + 1 : fBuffer1;
192 sum0 -= *buffer0Cursor;
193 *buffer0Cursor = leadingEdge;
194 buffer0Cursor = (buffer0Cursor + 1) < fBuffer0End ? buffer0Cursor + 1 : fBuffer0;
199 inline static constexpr uint64_t
kHalf =
static_cast<uint64_t
>(1) << 31;
201 uint8_t finalScale(uint32_t sum)
const {
202 return SkTo<uint8_t>((fWeight * sum + kHalf) >> 32);
208 uint32_t* fBuffer0End;
210 uint32_t* fBuffer1End;
212 uint32_t* fBuffer2End;
215 Scan makeBlurScan(
int width, uint32_t*
buffer)
const {
216 uint32_t* buffer0, *buffer0End, *buffer1, *buffer1End, *buffer2, *buffer2End;
218 buffer0End = buffer1 = buffer0 + fPass0Size;
219 buffer1End = buffer2 = buffer1 + fPass1Size;
220 buffer2End = buffer2 + fPass2Size;
221 int noChangeCount = fSlidingWindow >
width ? fSlidingWindow -
width : 0;
224 fWeight, noChangeCount,
227 buffer2, buffer2End);
255 : fSigmaW{
SkTPin(sigmaW, 0.0, 135.0)}
256 , fSigmaH{
SkTPin(sigmaH, 0.0, 135.0)}
263 return (3 * fSigmaW <= 1) && (3 * fSigmaH <= 1);
271 uint8_t masks = *from;
272 for (
int i = 0; i <
width; ++i) {
273 a8[i] = (masks >> (7 - i)) & 1 ? 0xFF
280 for (
int i = 0; i <
width; ++i) {
281 unsigned rgb =
reinterpret_cast<const uint16_t*
>(from)[i],
285 a8[i] = (r + g +
b) / 3;
290 for (
int i = 0; i <
width; ++i) {
291 uint32_t
rgba =
reinterpret_cast<const uint32_t*
>(from)[i];
304 uint8_t tmp[8] = {0,0,0,0, 0,0,0,0};
306 toA8(tmp, from,
width);
308 }
else if (
width < 8) {
309 for (
int i = 0; i <
width; ++i) {
326 for (
int i = 0; i <
width; i++) {
332static constexpr uint16_t
_____ = 0u;
333static constexpr uint16_t
kHalf = 0x80u;
419 auto v1 = mulhi(s0, g1);
420 auto v0 = mulhi(s0, g0);
426 *d0 +=
fp88{
_____, v0[0], v0[1], v0[2], v0[3], v0[4], v0[5], v0[6]};
430 *d0 +=
fp88{
_____,
_____, v1[0], v1[1], v1[2], v1[3], v1[4], v1[5]};
439 auto v0 = mulhi(s0, g0);
440 auto v1 = mulhi(s0, g1);
441 auto v2 = mulhi(s0, g2);
447 *d0 +=
fp88{
_____, v1[0], v1[1], v1[2], v1[3], v1[4], v1[5], v1[6]};
451 *d0 +=
fp88{
_____,
_____, v0[0], v0[1], v0[2], v0[3], v0[4], v0[5]};
467 auto v0 = mulhi(s0, g0);
468 auto v1 = mulhi(s0, g1);
469 auto v2 = mulhi(s0, g2);
470 auto v3 = mulhi(s0, g3);
480 *d0 +=
fp88{
_____,
_____, v1[0], v1[1], v1[2], v1[3], v1[4], v1[5]};
497 *d8 +=
fp88{v3[2], v3[3], v3[4], v3[5], v3[6], v3[7],
_____,
_____};
504 auto v0 = mulhi(s0, g0);
505 auto v1 = mulhi(s0, g1);
506 auto v2 = mulhi(s0, g2);
507 auto v3 = mulhi(s0, g3);
508 auto v4 = mulhi(s0, g4);
514 *d0 +=
fp88{
_____, v3[0], v3[1], v3[2], v3[3], v3[4], v3[5], v3[6]};
539 *d8 +=
fp88{v3[1], v3[2], v3[3], v3[4], v3[5], v3[6], v3[7],
_____};
551 const uint8_t* src,
int srcW,
552 uint8_t* dst,
int dstW) {
558 for (;
x <= srcW - 8;
x += 8) {
559 blur(
load(src, 8,
nullptr), g0, g1, g2, g3, g4, &d0, &d8);
571 int srcTail = srcW -
x;
574 blur(
load(src, srcTail,
nullptr), g0, g1, g2, g3, g4, &d0, &d8);
576 int dstTail = std::min(8, dstW -
x);
577 store(dst, d0, dstTail);
585 int dstTail = dstW -
x;
587 store(dst, d0, dstTail);
594 const uint8_t* src,
size_t srcStride,
int srcW,
595 uint8_t* dst,
size_t dstStride,
int dstW,
int dstH) {
604 for (
int y = 0;
y < dstH;
y++) {
605 blur_row(blur, g0, g1, g2, g3, g4, src, srcW, dst, dstW);
612 const uint8_t* src,
size_t srcStride,
int srcW,
613 uint8_t* dst,
size_t dstStride,
int dstW,
int dstH) {
633 SkASSERTF(
false,
"The radius %d is not handled\n", radius);
691 auto v0 = mulhi(s0, g0);
692 auto v1 = mulhi(s0, g1);
694 fp88 answer = *d01 + v1;
705 auto v0 = mulhi(s0, g0);
706 auto v1 = mulhi(s0, g1);
707 auto v2 = mulhi(s0, g2);
722 auto v0 = mulhi(s0, g0);
723 auto v1 = mulhi(s0, g1);
724 auto v2 = mulhi(s0, g2);
725 auto v3 = mulhi(s0, g3);
727 fp88 answer = *d01 + v3;
742 auto v0 = mulhi(s0, g0);
743 auto v1 = mulhi(s0, g1);
744 auto v2 = mulhi(s0, g2);
745 auto v3 = mulhi(s0, g3);
746 auto v4 = mulhi(s0, g4);
768 const uint8_t* src,
size_t srcRB,
int srcH,
769 uint8_t* dst,
size_t dstRB) {
773 auto flush = [&](uint8_t* to,
const fp88& v0,
const fp88& v1) {
780 for (
int y = 0;
y < srcH;
y += 1) {
784 &d01, &d12, &d23, &d34, &d45, &d56, &d67, &d78);
791 dst = flush(dst, d01, d12);
794 dst = flush(dst, d23, d34);
797 dst = flush(dst, d45, d56);
800 flush(dst, d67, d78);
806 BlurY blur,
int radius, uint16_t *gauss,
807 const uint8_t *src,
size_t srcRB,
int srcW,
int srcH,
808 uint8_t *dst,
size_t dstRB) {
817 for (;
x <= srcW - 8;
x += 8) {
826 int xTail = srcW -
x;
836 int radius, uint16_t* gauss,
837 const uint8_t* src,
size_t srcRB,
int srcW,
int srcH,
838 uint8_t* dst,
size_t dstRB) {
843 src, srcRB, srcW, srcH,
849 src, srcRB, srcW, srcH,
855 src, srcRB, srcW, srcH,
861 src, srcRB, srcW, srcH,
866 SkASSERTF(
false,
"The radius %d is not handled\n", radius);
872 SkASSERT(0.01 <= sigmaX && sigmaX < 2);
873 SkASSERT(0.01 <= sigmaY && sigmaY < 2);
878 int radiusX = filterX.
radius(),
879 radiusY = filterY.radius();
881 SkASSERT(radiusX <= 4 && radiusY <= 4);
883 auto prepareGauss = [](
const SkGaussFilter& filter, uint16_t* factors) {
885 for (
double d : filter) {
886 factors[i++] =
static_cast<uint16_t
>(
round(
d * (1 << 16)));
893 prepareGauss(filterX, gaussFactorsX);
894 prepareGauss(filterY, gaussFactorsY);
897 if (src.fImage ==
nullptr) {
898 return {SkTo<int32_t>(radiusX), SkTo<int32_t>(radiusY)};
900 if (dst->fImage ==
nullptr) {
901 dst->bounds().setEmpty();
905 int srcW = src.fBounds.width(),
906 srcH = src.fBounds.height();
908 int dstW = dst->fBounds.width(),
909 dstH = dst->fBounds.height();
911 size_t srcRB = src.fRowBytes,
912 dstRB = dst->fRowBytes;
917 switch (src.fFormat) {
920 radiusY, gaussFactorsY,
921 src.fImage, srcRB, srcW, srcH,
922 dst->image() + radiusX, dstRB);
926 radiusY, gaussFactorsY,
927 src.fImage, srcRB, srcW, srcH,
928 dst->image() + radiusX, dstRB);
932 radiusY, gaussFactorsY,
933 src.fImage, srcRB, srcW, srcH,
934 dst->image() + radiusX, dstRB);
938 src.fImage, srcRB, srcW, srcH,
939 dst->image() + radiusX, dstRB);
947 dst->fImage + radiusX, dstRB, srcW,
948 dst->image(), dstRB, dstW, dstH);
950 return {radiusX, radiusY};
957 if (fSigmaW < 2.0 && fSigmaH < 2.0) {
958 return small_blur(fSigmaW, fSigmaH, src, dst);
964 PlanGauss planW(fSigmaW);
965 PlanGauss planH(fSigmaH);
967 int borderW = planW.border(),
968 borderH = planH.border();
969 SkASSERT(borderH >= 0 && borderW >= 0);
972 if (src.fImage ==
nullptr) {
973 return {SkTo<int32_t>(borderW), SkTo<int32_t>(borderH)};
975 if (dst->fImage ==
nullptr) {
976 dst->bounds().setEmpty();
980 int srcW = src.fBounds.width(),
981 srcH = src.fBounds.height(),
982 dstW = dst->fBounds.width(),
983 dstH = dst->fBounds.height();
984 SkASSERT(srcW >= 0 && srcH >= 0 && dstW >= 0 && dstH >= 0);
986 auto bufferSize = std::max(planW.bufferSize(), planH.bufferSize());
994 if (tmpH > std::numeric_limits<int>::max() / tmpW) {
1000 const PlanGauss::Scan& scanW = planW.makeBlurScan(srcW,
buffer);
1001 switch (src.fFormat) {
1003 const uint8_t* bwStart = src.fImage;
1006 for (
int y = 0; y < srcH; ++y, start >>= src.fRowBytes,
end >>= src.fRowBytes) {
1007 auto tmpStart = &tmp[
y];
1008 scanW.blur(
start,
end, tmpStart, tmpW, tmpStart + tmpW * tmpH);
1012 const uint8_t* a8Start = src.fImage;
1015 for (
int y = 0; y < srcH; ++y, start >>= src.fRowBytes,
end >>= src.fRowBytes) {
1016 auto tmpStart = &tmp[
y];
1017 scanW.blur(
start,
end, tmpStart, tmpW, tmpStart + tmpW * tmpH);
1021 const uint32_t* argbStart =
reinterpret_cast<const uint32_t*
>(src.fImage);
1024 for (
int y = 0; y < srcH; ++y, start >>= src.fRowBytes,
end >>= src.fRowBytes) {
1025 auto tmpStart = &tmp[
y];
1026 scanW.blur(
start,
end, tmpStart, tmpW, tmpStart + tmpW * tmpH);
1030 const uint16_t* lcdStart =
reinterpret_cast<const uint16_t*
>(src.fImage);
1033 for (
int y = 0; y < srcH; ++y, start >>= src.fRowBytes,
end >>= src.fRowBytes) {
1034 auto tmpStart = &tmp[
y];
1035 scanW.blur(
start,
end, tmpStart, tmpW, tmpStart + tmpW * tmpH);
1044 const PlanGauss::Scan& scanH = planH.makeBlurScan(tmpW,
buffer);
1045 for (
int y = 0;
y < tmpH;
y++) {
1046 auto tmpStart = &tmp[
y * tmpW];
1047 auto dstStart = &dst->image()[
y];
1049 scanH.blur(tmpStart, tmpStart + tmpW,
1050 dstStart, dst->fRowBytes, dstStart + dst->fRowBytes * dstH);
1053 return {SkTo<int32_t>(borderW), SkTo<int32_t>(borderH)};
static void round(SkPoint *p)
static const uint32_t rgba[kNumPixels]
#define SK_ABORT(message,...)
#define SkASSERTF(cond, fmt,...)
#define SkPacked16ToG32(c)
#define SkPacked16ToR32(c)
#define SkPacked16ToB32(c)
#define SkGetPackedA32(packed)
static SkV4 v4(SkV3 v, SkScalar w)
static fp88 blur_y_radius_2(const fp88 &s0, const fp88 &g0, const fp88 &g1, const fp88 &g2, const fp88 &, const fp88 &, fp88 *d01, fp88 *d12, fp88 *d23, fp88 *d34, fp88 *, fp88 *, fp88 *, fp88 *)
static fp88 blur_y_radius_4(const fp88 &s0, const fp88 &g0, const fp88 &g1, const fp88 &g2, const fp88 &g3, const fp88 &g4, fp88 *d01, fp88 *d12, fp88 *d23, fp88 *d34, fp88 *d45, fp88 *d56, fp88 *d67, fp88 *d78)
static constexpr uint16_t kHalf
decltype(blur_y_radius_1) BlurY
static void direct_blur_y(ToA8 toA8, const int strideOf8, int radius, uint16_t *gauss, const uint8_t *src, size_t srcRB, int srcW, int srcH, uint8_t *dst, size_t dstRB)
static void blur_column(ToA8 toA8, BlurY blur, int radius, int width, const fp88 &g0, const fp88 &g1, const fp88 &g2, const fp88 &g3, const fp88 &g4, const uint8_t *src, size_t srcRB, int srcH, uint8_t *dst, size_t dstRB)
static fp88 blur_y_radius_1(const fp88 &s0, const fp88 &g0, const fp88 &g1, const fp88 &, const fp88 &, const fp88 &, fp88 *d01, fp88 *d12, fp88 *, fp88 *, fp88 *, fp88 *, fp88 *, fp88 *)
static fp88 blur_y_radius_3(const fp88 &s0, const fp88 &g0, const fp88 &g1, const fp88 &g2, const fp88 &g3, const fp88 &, fp88 *d01, fp88 *d12, fp88 *d23, fp88 *d34, fp88 *d45, fp88 *d56, fp88 *, fp88 *)
static SkIPoint small_blur(double sigmaX, double sigmaY, const SkMask &src, SkMaskBuilder *dst)
static void bw_to_a8(uint8_t *a8, const uint8_t *from, int width)
static void direct_blur_x(int radius, uint16_t *gauss, const uint8_t *src, size_t srcStride, int srcW, uint8_t *dst, size_t dstStride, int dstW, int dstH)
static void blur_x_radius_3(const fp88 &s0, const fp88 &g0, const fp88 &g1, const fp88 &g2, const fp88 &g3, const fp88 &, fp88 *d0, fp88 *d8)
static void blur_x_rect(BlurX blur, uint16_t *gauss, const uint8_t *src, size_t srcStride, int srcW, uint8_t *dst, size_t dstStride, int dstW, int dstH)
decltype(blur_x_radius_1) BlurX
static void blur_x_radius_4(const fp88 &s0, const fp88 &g0, const fp88 &g1, const fp88 &g2, const fp88 &g3, const fp88 &g4, fp88 *d0, fp88 *d8)
static void blur_y_rect(ToA8 toA8, const int strideOf8, BlurY blur, int radius, uint16_t *gauss, const uint8_t *src, size_t srcRB, int srcW, int srcH, uint8_t *dst, size_t dstRB)
static void argb32_to_a8(uint8_t *a8, const uint8_t *from, int width)
skvx::Vec< 8, uint16_t > fp88
static void blur_x_radius_2(const fp88 &s0, const fp88 &g0, const fp88 &g1, const fp88 &g2, const fp88 &, const fp88 &, fp88 *d0, fp88 *d8)
static fp88 load(const uint8_t *from, int width, ToA8 *toA8)
static void lcd_to_a8(uint8_t *a8, const uint8_t *from, int width)
static void store(uint8_t *to, const fp88 &v, int width)
static void blur_row(BlurX blur, const fp88 &g0, const fp88 &g1, const fp88 &g2, const fp88 &g3, const fp88 &g4, const uint8_t *src, int srcW, uint8_t *dst, int dstW)
static constexpr uint16_t _____
static void blur_x_radius_1(const fp88 &s0, const fp88 &g0, const fp88 &g1, const fp88 &, const fp88 &, const fp88 &, fp88 *d0, fp88 *d8)
static constexpr const T & SkTPin(const T &x, const T &lo, const T &hi)
T * makeArrayDefault(size_t count)
static constexpr int kGaussArrayMax
SkIPoint blur(const SkMask &src, SkMaskBuilder *dst) const
SkMaskBlurFilter(double sigmaW, double sigmaH)
VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE auto & d
static const uint8_t buffer[]
SIN Vec< N, float > sqrt(const Vec< N, float > &x)
SIN Vec< N, float > floor(const Vec< N, float > &x)
static SkMaskBuilder PrepareDestination(int radiusX, int radiusY, const SkMask &src)
@ kA8_Format
8bits per pixel mask (e.g. antialiasing)
@ kLCD16_Format
565 alpha for r/g/b
@ kARGB32_Format
SkPMColor.
@ kBW_Format
1bit per pixel mask (e.g. monochrome)
static SKVX_ALWAYS_INLINE Vec Load(const void *ptr)