Flutter Engine
The Flutter Engine
SkBlurImageFilter.cpp
Go to the documentation of this file.
1/*
2 * Copyright 2011 The Android Open Source Project
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
9
16#include "include/core/SkRect.h"
19#include "include/core/SkSize.h"
26#include "src/base/SkVx.h"
32
33#include <algorithm>
34#include <cmath>
35#include <cstdint>
36#include <cstring>
37#include <optional>
38#include <utility>
39
40struct SkIPoint;
41
42#if defined(SK_GANESH) || defined(SK_GRAPHITE)
43#include "src/gpu/BlurUtils.h"
44#endif
45
46#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1
47 #include <xmmintrin.h>
48 #define SK_PREFETCH(ptr) _mm_prefetch(reinterpret_cast<const char*>(ptr), _MM_HINT_T0)
49#elif defined(__GNUC__)
50 #define SK_PREFETCH(ptr) __builtin_prefetch(ptr)
51#else
52 #define SK_PREFETCH(ptr)
53#endif
54
55namespace {
56
57class SkBlurImageFilter final : public SkImageFilter_Base {
58public:
59 SkBlurImageFilter(SkSize sigma, sk_sp<SkImageFilter> input)
60 : SkImageFilter_Base(&input, 1)
61 , fSigma{sigma} {}
62
63 SkBlurImageFilter(SkSize sigma, SkTileMode legacyTileMode, sk_sp<SkImageFilter> input)
64 : SkImageFilter_Base(&input, 1)
65 , fSigma(sigma)
66 , fLegacyTileMode(legacyTileMode) {}
67
68 SkRect computeFastBounds(const SkRect&) const override;
69
70protected:
71 void flatten(SkWriteBuffer&) const override;
72
73private:
75 SK_FLATTENABLE_HOOKS(SkBlurImageFilter)
76
77 skif::FilterResult onFilterImage(const skif::Context& context) const override;
78
79 skif::LayerSpace<SkIRect> onGetInputLayerBounds(
80 const skif::Mapping& mapping,
81 const skif::LayerSpace<SkIRect>& desiredOutput,
82 std::optional<skif::LayerSpace<SkIRect>> contentBounds) const override;
83
84 std::optional<skif::LayerSpace<SkIRect>> onGetOutputLayerBounds(
85 const skif::Mapping& mapping,
86 std::optional<skif::LayerSpace<SkIRect>> contentBounds) const override;
87
88 skif::LayerSpace<SkSize> mapSigma(const skif::Mapping& mapping, bool gpuBacked) const;
89
90 skif::LayerSpace<SkIRect> kernelBounds(const skif::Mapping& mapping,
92 bool gpuBacked) const {
93 skif::LayerSpace<SkSize> sigma = this->mapSigma(mapping, gpuBacked);
94 bounds.outset(skif::LayerSpace<SkSize>({3 * sigma.width(), 3 * sigma.height()}).ceil());
95 return bounds;
96 }
97
99 // kDecal means no legacy tiling, it will be handled by SkCropImageFilter instead. Legacy
100 // tiling occurs when there's no provided crop rect, and should be deleted once clients create
101 // their filters with defined tiling geometry.
102 SkTileMode fLegacyTileMode = SkTileMode::kDecal;
103};
104
105} // end namespace
106
108 SkScalar sigmaX, SkScalar sigmaY, SkTileMode tileMode, sk_sp<SkImageFilter> input,
109 const CropRect& cropRect) {
110 if (!SkIsFinite(sigmaX, sigmaY) || sigmaX < 0.f || sigmaY < 0.f) {
111 // Non-finite or negative sigmas are error conditions. We allow 0 sigma for X and/or Y
112 // for 1D blurs; onFilterImage() will detect when no visible blurring would occur based on
113 // the Context mapping.
114 return nullptr;
115 }
116
117 // Temporarily allow tiling with no crop rect
118 if (tileMode != SkTileMode::kDecal && !cropRect) {
119 return sk_make_sp<SkBlurImageFilter>(SkSize{sigmaX, sigmaY}, tileMode, std::move(input));
120 }
121
122 // The 'tileMode' behavior is not well-defined if there is no crop. We only apply it if
123 // there is a provided 'cropRect'.
124 sk_sp<SkImageFilter> filter = std::move(input);
125 if (tileMode != SkTileMode::kDecal && cropRect) {
126 // Historically the input image was restricted to the cropRect when tiling was not
127 // kDecal, so that the kernel evaluated the tiled edge conditions, while a kDecal crop
128 // only affected the output.
129 filter = SkImageFilters::Crop(*cropRect, tileMode, std::move(filter));
130 }
131
132 filter = sk_make_sp<SkBlurImageFilter>(SkSize{sigmaX, sigmaY}, std::move(filter));
133 if (cropRect) {
134 // But regardless of the tileMode, the output is always decal cropped
135 filter = SkImageFilters::Crop(*cropRect, SkTileMode::kDecal, std::move(filter));
136 }
137 return filter;
138}
139
141 SK_REGISTER_FLATTENABLE(SkBlurImageFilter);
142 SkFlattenable::Register("SkBlurImageFilterImpl", SkBlurImageFilter::CreateProc);
143}
144
145sk_sp<SkFlattenable> SkBlurImageFilter::CreateProc(SkReadBuffer& buffer) {
147 SkScalar sigmaX = buffer.readScalar();
148 SkScalar sigmaY = buffer.readScalar();
149 SkTileMode tileMode = buffer.read32LE(SkTileMode::kLastTileMode);
150
151 // NOTE: For new SKPs, 'tileMode' holds the "legacy" tile mode; any originally specified tile
152 // mode with valid tiling geometry is handled in the SkCropImageFilters that wrap the blur.
153 // In a new SKP, when 'tileMode' is not kDecal, common.cropRect() will be null and the blur
154 // will automatically emulate the legacy tiling.
155 //
156 // In old SKPs, the 'tileMode' and common.cropRect() may not be null. ::Blur() automatically
157 // detects when this is a legacy or valid tiling and constructs the DAG appropriately.
159 sigmaX, sigmaY, tileMode, common.getInput(0), common.cropRect());
160}
161
162void SkBlurImageFilter::flatten(SkWriteBuffer& buffer) const {
163 this->SkImageFilter_Base::flatten(buffer);
164
165 buffer.writeScalar(SkSize(fSigma).fWidth);
166 buffer.writeScalar(SkSize(fSigma).fHeight);
167 buffer.writeInt(static_cast<int>(fLegacyTileMode));
168}
169
170///////////////////////////////////////////////////////////////////////////////
171
172namespace {
173
174// TODO: Move these functions into a CPU, 8888-only blur engine implementation; ideally share logic
175// with the similar techniques in SkMaskBlurFilter on 4x A8 data.
176
177// TODO(b/294575803): Provide a more accurate CPU implementation at s<2, at which point the notion
178// of an identity sigma can be consolidated between the different functions.
179// This is defined by the SVG spec:
180// https://drafts.fxtf.org/filter-effects/#feGaussianBlurElement
181int calculate_window(double sigma) {
182 auto possibleWindow = static_cast<int>(floor(sigma * 3 * sqrt(2 * SK_DoublePI) / 4 + 0.5));
183 return std::max(1, possibleWindow);
184}
185
186// This rather arbitrary-looking value results in a maximum box blur kernel size
187// of 1000 pixels on the raster path, which matches the WebKit and Firefox
188// implementations. Since the GPU path does not compute a box blur, putting
189// the limit on sigma ensures consistent behaviour between the GPU and
190// raster paths.
191static constexpr SkScalar kMaxSigma = 532.f;
192
193class Pass {
194public:
195 explicit Pass(int border) : fBorder(border) {}
196 virtual ~Pass() = default;
197
198 void blur(int srcLeft, int srcRight, int dstRight,
199 const uint32_t* src, int srcStride,
200 uint32_t* dst, int dstStride) {
201 this->startBlur();
202
203 auto srcStart = srcLeft - fBorder,
204 srcEnd = srcRight - fBorder,
205 dstEnd = dstRight,
206 srcIdx = srcStart,
207 dstIdx = 0;
208
209 const uint32_t* srcCursor = src;
210 uint32_t* dstCursor = dst;
211
212 if (dstIdx < srcIdx) {
213 // The destination pixels are not effected by the src pixels,
214 // change to zero as per the spec.
215 // https://drafts.fxtf.org/filter-effects/#FilterPrimitivesOverviewIntro
216 int commonEnd = std::min(srcIdx, dstEnd);
217 while (dstIdx < commonEnd) {
218 *dstCursor = 0;
219 dstCursor += dstStride;
220 SK_PREFETCH(dstCursor);
221 dstIdx++;
222 }
223 } else if (srcIdx < dstIdx) {
224 // The edge of the source is before the edge of the destination. Calculate the sums for
225 // the pixels before the start of the destination.
226 if (int commonEnd = std::min(dstIdx, srcEnd); srcIdx < commonEnd) {
227 // Preload the blur with values from src before dst is entered.
228 int n = commonEnd - srcIdx;
229 this->blurSegment(n, srcCursor, srcStride, nullptr, 0);
230 srcIdx += n;
231 srcCursor += n * srcStride;
232 }
233 if (srcIdx < dstIdx) {
234 // The weird case where src is out of pixels before dst is even started.
235 int n = dstIdx - srcIdx;
236 this->blurSegment(n, nullptr, 0, nullptr, 0);
237 srcIdx += n;
238 }
239 }
240
241 if (int commonEnd = std::min(dstEnd, srcEnd); dstIdx < commonEnd) {
242 // Both srcIdx and dstIdx are in sync now, and can run in a 1:1 fashion. This is the
243 // normal mode of operation.
244 SkASSERT(srcIdx == dstIdx);
245
246 int n = commonEnd - dstIdx;
247 this->blurSegment(n, srcCursor, srcStride, dstCursor, dstStride);
248 srcCursor += n * srcStride;
249 dstCursor += n * dstStride;
250 dstIdx += n;
251 srcIdx += n;
252 }
253
254 // Drain the remaining blur values into dst assuming 0's for the leading edge.
255 if (dstIdx < dstEnd) {
256 int n = dstEnd - dstIdx;
257 this->blurSegment(n, nullptr, 0, dstCursor, dstStride);
258 }
259 }
260
261protected:
262 virtual void startBlur() = 0;
263 virtual void blurSegment(
264 int n, const uint32_t* src, int srcStride, uint32_t* dst, int dstStride) = 0;
265
266private:
267 const int fBorder;
268};
269
270class PassMaker {
271public:
272 explicit PassMaker(int window) : fWindow{window} {}
273 virtual ~PassMaker() = default;
274 virtual Pass* makePass(void* buffer, SkArenaAlloc* alloc) const = 0;
275 virtual size_t bufferSizeBytes() const = 0;
276 int window() const {return fWindow;}
277
278private:
279 const int fWindow;
280};
281
282// Implement a scanline processor that uses a three-box filter to approximate a Gaussian blur.
283// The GaussPass is limit to processing sigmas < 135.
284class GaussPass final : public Pass {
285public:
286 // NB 136 is the largest sigma that will not cause a buffer full of 255 mask values to overflow
287 // using the Gauss filter. It also limits the size of buffers used hold intermediate values.
288 // Explanation of maximums:
289 // sum0 = window * 255
290 // sum1 = window * sum0 -> window * window * 255
291 // sum2 = window * sum1 -> window * window * window * 255 -> window^3 * 255
292 //
293 // The value window^3 * 255 must fit in a uint32_t. So,
294 // window^3 < 2^32. window = 255.
295 //
296 // window = floor(sigma * 3 * sqrt(2 * kPi) / 4 + 0.5)
297 // For window <= 255, the largest value for sigma is 136.
298 static PassMaker* MakeMaker(double sigma, SkArenaAlloc* alloc) {
299 SkASSERT(0 <= sigma);
300 int window = calculate_window(sigma);
301 if (255 <= window) {
302 return nullptr;
303 }
304
305 class Maker : public PassMaker {
306 public:
307 explicit Maker(int window) : PassMaker{window} {}
308 Pass* makePass(void* buffer, SkArenaAlloc* alloc) const override {
309 return GaussPass::Make(this->window(), buffer, alloc);
310 }
311
312 size_t bufferSizeBytes() const override {
313 int window = this->window();
314 size_t onePassSize = window - 1;
315 // If the window is odd, then there is an obvious middle element. For even sizes
316 // 2 passes are shifted, and the last pass has an extra element. Like this:
317 // S
318 // aaaAaa
319 // bbBbbb
320 // cccCccc
321 // D
322 size_t bufferCount = (window & 1) == 1 ? 3 * onePassSize : 3 * onePassSize + 1;
323 return bufferCount * sizeof(skvx::Vec<4, uint32_t>);
324 }
325 };
326
327 return alloc->make<Maker>(window);
328 }
329
330 static GaussPass* Make(int window, void* buffers, SkArenaAlloc* alloc) {
331 // We don't need to store the trailing edge pixel in the buffer;
332 int passSize = window - 1;
333 skvx::Vec<4, uint32_t>* buffer0 = static_cast<skvx::Vec<4, uint32_t>*>(buffers);
334 skvx::Vec<4, uint32_t>* buffer1 = buffer0 + passSize;
335 skvx::Vec<4, uint32_t>* buffer2 = buffer1 + passSize;
336 // If the window is odd just one buffer is needed, but if it's even, then there is one
337 // more element on that pass.
338 skvx::Vec<4, uint32_t>* buffersEnd = buffer2 + ((window & 1) ? passSize : passSize + 1);
339
340 // Calculating the border is tricky. The border is the distance in pixels between the first
341 // dst pixel and the first src pixel (or the last src pixel and the last dst pixel).
342 // I will go through the odd case which is simpler, and then through the even case. Given a
343 // stack of filters seven wide for the odd case of three passes.
344 //
345 // S
346 // aaaAaaa
347 // bbbBbbb
348 // cccCccc
349 // D
350 //
351 // The furthest changed pixel is when the filters are in the following configuration.
352 //
353 // S
354 // aaaAaaa
355 // bbbBbbb
356 // cccCccc
357 // D
358 //
359 // The A pixel is calculated using the value S, the B uses A, and the C uses B, and
360 // finally D is C. So, with a window size of seven the border is nine. In the odd case, the
361 // border is 3*((window - 1)/2).
362 //
363 // For even cases the filter stack is more complicated. The spec specifies two passes
364 // of even filters and a final pass of odd filters. A stack for a width of six looks like
365 // this.
366 //
367 // S
368 // aaaAaa
369 // bbBbbb
370 // cccCccc
371 // D
372 //
373 // The furthest pixel looks like this.
374 //
375 // S
376 // aaaAaa
377 // bbBbbb
378 // cccCccc
379 // D
380 //
381 // For a window of six, the border value is eight. In the even case the border is 3 *
382 // (window/2) - 1.
383 int border = (window & 1) == 1 ? 3 * ((window - 1) / 2) : 3 * (window / 2) - 1;
384
385 // If the window is odd then the divisor is just window ^ 3 otherwise,
386 // it is window * window * (window + 1) = window ^ 3 + window ^ 2;
387 int window2 = window * window;
388 int window3 = window2 * window;
389 int divisor = (window & 1) == 1 ? window3 : window3 + window2;
390 return alloc->make<GaussPass>(buffer0, buffer1, buffer2, buffersEnd, border, divisor);
391 }
392
393 GaussPass(skvx::Vec<4, uint32_t>* buffer0,
394 skvx::Vec<4, uint32_t>* buffer1,
395 skvx::Vec<4, uint32_t>* buffer2,
396 skvx::Vec<4, uint32_t>* buffersEnd,
397 int border,
398 int divisor)
399 : Pass{border}
400 , fBuffer0{buffer0}
401 , fBuffer1{buffer1}
402 , fBuffer2{buffer2}
403 , fBuffersEnd{buffersEnd}
404 , fDivider(divisor) {}
405
406private:
407 void startBlur() override {
408 skvx::Vec<4, uint32_t> zero = {0u, 0u, 0u, 0u};
409 zero.store(fSum0);
410 zero.store(fSum1);
411 auto half = fDivider.half();
412 skvx::Vec<4, uint32_t>{half, half, half, half}.store(fSum2);
413 sk_bzero(fBuffer0, (fBuffersEnd - fBuffer0) * sizeof(skvx::Vec<4, uint32_t>));
414
415 fBuffer0Cursor = fBuffer0;
416 fBuffer1Cursor = fBuffer1;
417 fBuffer2Cursor = fBuffer2;
418 }
419
420 // GaussPass implements the common three pass box filter approximation of Gaussian blur,
421 // but combines all three passes into a single pass. This approach is facilitated by three
422 // circular buffers the width of the window which track values for trailing edges of each of
423 // the three passes. This allows the algorithm to use more precision in the calculation
424 // because the values are not rounded each pass. And this implementation also avoids a trap
425 // that's easy to fall into resulting in blending in too many zeroes near the edge.
426 //
427 // In general, a window sum has the form:
428 // sum_n+1 = sum_n + leading_edge - trailing_edge.
429 // If instead we do the subtraction at the end of the previous iteration, we can just
430 // calculate the sums instead of having to do the subtractions too.
431 //
432 // In previous iteration:
433 // sum_n+1 = sum_n - trailing_edge.
434 //
435 // In this iteration:
436 // sum_n+1 = sum_n + leading_edge.
437 //
438 // Now we can stack all three sums and do them at once. Sum0 gets its leading edge from the
439 // actual data. Sum1's leading edge is just Sum0, and Sum2's leading edge is Sum1. So, doing the
440 // three passes at the same time has the form:
441 //
442 // sum0_n+1 = sum0_n + leading edge
443 // sum1_n+1 = sum1_n + sum0_n+1
444 // sum2_n+1 = sum2_n + sum1_n+1
445 //
446 // sum2_n+1 / window^3 is the new value of the destination pixel.
447 //
448 // Reduce the sums by the trailing edges which were stored in the circular buffers for the
449 // next go around. This is the case for odd sized windows, even windows the the third
450 // circular buffer is one larger then the first two circular buffers.
451 //
452 // sum2_n+2 = sum2_n+1 - buffer2[i];
453 // buffer2[i] = sum1;
454 // sum1_n+2 = sum1_n+1 - buffer1[i];
455 // buffer1[i] = sum0;
456 // sum0_n+2 = sum0_n+1 - buffer0[i];
457 // buffer0[i] = leading edge
458 void blurSegment(
459 int n, const uint32_t* src, int srcStride, uint32_t* dst, int dstStride) override {
460 skvx::Vec<4, uint32_t>* buffer0Cursor = fBuffer0Cursor;
461 skvx::Vec<4, uint32_t>* buffer1Cursor = fBuffer1Cursor;
462 skvx::Vec<4, uint32_t>* buffer2Cursor = fBuffer2Cursor;
466
467 // Given an expanded input pixel, move the window ahead using the leadingEdge value.
468 auto processValue = [&](const skvx::Vec<4, uint32_t>& leadingEdge) {
469 sum0 += leadingEdge;
470 sum1 += sum0;
471 sum2 += sum1;
472
473 skvx::Vec<4, uint32_t> blurred = fDivider.divide(sum2);
474
475 sum2 -= *buffer2Cursor;
476 *buffer2Cursor = sum1;
477 buffer2Cursor = (buffer2Cursor + 1) < fBuffersEnd ? buffer2Cursor + 1 : fBuffer2;
478 sum1 -= *buffer1Cursor;
479 *buffer1Cursor = sum0;
480 buffer1Cursor = (buffer1Cursor + 1) < fBuffer2 ? buffer1Cursor + 1 : fBuffer1;
481 sum0 -= *buffer0Cursor;
482 *buffer0Cursor = leadingEdge;
483 buffer0Cursor = (buffer0Cursor + 1) < fBuffer1 ? buffer0Cursor + 1 : fBuffer0;
484
485 return skvx::cast<uint8_t>(blurred);
486 };
487
488 auto loadEdge = [&](const uint32_t* srcCursor) {
489 return skvx::cast<uint32_t>(skvx::Vec<4, uint8_t>::Load(srcCursor));
490 };
491
492 if (!src && !dst) {
493 while (n --> 0) {
494 (void)processValue(0);
495 }
496 } else if (src && !dst) {
497 while (n --> 0) {
498 (void)processValue(loadEdge(src));
499 src += srcStride;
500 }
501 } else if (!src && dst) {
502 while (n --> 0) {
503 processValue(0u).store(dst);
504 dst += dstStride;
505 }
506 } else if (src && dst) {
507 while (n --> 0) {
508 processValue(loadEdge(src)).store(dst);
509 src += srcStride;
510 dst += dstStride;
511 }
512 }
513
514 // Store the state
515 fBuffer0Cursor = buffer0Cursor;
516 fBuffer1Cursor = buffer1Cursor;
517 fBuffer2Cursor = buffer2Cursor;
518
519 sum0.store(fSum0);
520 sum1.store(fSum1);
521 sum2.store(fSum2);
522 }
523
524 skvx::Vec<4, uint32_t>* const fBuffer0;
525 skvx::Vec<4, uint32_t>* const fBuffer1;
526 skvx::Vec<4, uint32_t>* const fBuffer2;
527 skvx::Vec<4, uint32_t>* const fBuffersEnd;
528 const skvx::ScaledDividerU32 fDivider;
529
530 // blur state
531 char fSum0[sizeof(skvx::Vec<4, uint32_t>)];
532 char fSum1[sizeof(skvx::Vec<4, uint32_t>)];
533 char fSum2[sizeof(skvx::Vec<4, uint32_t>)];
534 skvx::Vec<4, uint32_t>* fBuffer0Cursor;
535 skvx::Vec<4, uint32_t>* fBuffer1Cursor;
536 skvx::Vec<4, uint32_t>* fBuffer2Cursor;
537};
538
539// Implement a scanline processor that uses a two-box filter to approximate a Tent filter.
540// The TentPass is limit to processing sigmas < 2183.
541class TentPass final : public Pass {
542public:
543 // NB 2183 is the largest sigma that will not cause a buffer full of 255 mask values to overflow
544 // using the Tent filter. It also limits the size of buffers used hold intermediate values.
545 // Explanation of maximums:
546 // sum0 = window * 255
547 // sum1 = window * sum0 -> window * window * 255
548 //
549 // The value window^2 * 255 must fit in a uint32_t. So,
550 // window^2 < 2^32. window = 4104.
551 //
552 // window = floor(sigma * 3 * sqrt(2 * kPi) / 4 + 0.5)
553 // For window <= 4104, the largest value for sigma is 2183.
554 static PassMaker* MakeMaker(double sigma, SkArenaAlloc* alloc) {
555 SkASSERT(0 <= sigma);
556 int gaussianWindow = calculate_window(sigma);
557 // This is a naive method of using the window size for the Gaussian blur to calculate the
558 // window size for the Tent blur. This seems to work well in practice.
559 //
560 // We can use a single pixel to generate the effective blur area given a window size. For
561 // the Gaussian blur this is 3 * window size. For the Tent filter this is 2 * window size.
562 int tentWindow = 3 * gaussianWindow / 2;
563 if (tentWindow >= 4104) {
564 return nullptr;
565 }
566
567 class Maker : public PassMaker {
568 public:
569 explicit Maker(int window) : PassMaker{window} {}
570 Pass* makePass(void* buffer, SkArenaAlloc* alloc) const override {
571 return TentPass::Make(this->window(), buffer, alloc);
572 }
573
574 size_t bufferSizeBytes() const override {
575 size_t onePassSize = this->window() - 1;
576 // If the window is odd, then there is an obvious middle element. For even sizes 2
577 // passes are shifted, and the last pass has an extra element. Like this:
578 // S
579 // aaaAaa
580 // bbBbbb
581 // D
582 size_t bufferCount = 2 * onePassSize;
583 return bufferCount * sizeof(skvx::Vec<4, uint32_t>);
584 }
585 };
586
587 return alloc->make<Maker>(tentWindow);
588 }
589
590 static TentPass* Make(int window, void* buffers, SkArenaAlloc* alloc) {
591 if (window > 4104) {
592 return nullptr;
593 }
594
595 // We don't need to store the trailing edge pixel in the buffer;
596 int passSize = window - 1;
597 skvx::Vec<4, uint32_t>* buffer0 = static_cast<skvx::Vec<4, uint32_t>*>(buffers);
598 skvx::Vec<4, uint32_t>* buffer1 = buffer0 + passSize;
599 skvx::Vec<4, uint32_t>* buffersEnd = buffer1 + passSize;
600
601 // Calculating the border is tricky. The border is the distance in pixels between the first
602 // dst pixel and the first src pixel (or the last src pixel and the last dst pixel).
603 // I will go through the odd case which is simpler, and then through the even case. Given a
604 // stack of filters seven wide for the odd case of three passes.
605 //
606 // S
607 // aaaAaaa
608 // bbbBbbb
609 // D
610 //
611 // The furthest changed pixel is when the filters are in the following configuration.
612 //
613 // S
614 // aaaAaaa
615 // bbbBbbb
616 // D
617 //
618 // The A pixel is calculated using the value S, the B uses A, and the D uses B.
619 // So, with a window size of seven the border is nine. In the odd case, the border is
620 // window - 1.
621 //
622 // For even cases the filter stack is more complicated. It uses two passes
623 // of even filters offset from each other. A stack for a width of six looks like
624 // this.
625 //
626 // S
627 // aaaAaa
628 // bbBbbb
629 // D
630 //
631 // The furthest pixel looks like this.
632 //
633 // S
634 // aaaAaa
635 // bbBbbb
636 // D
637 //
638 // For a window of six, the border value is 5. In the even case the border is
639 // window - 1.
640 int border = window - 1;
641
642 int divisor = window * window;
643 return alloc->make<TentPass>(buffer0, buffer1, buffersEnd, border, divisor);
644 }
645
646 TentPass(skvx::Vec<4, uint32_t>* buffer0,
647 skvx::Vec<4, uint32_t>* buffer1,
648 skvx::Vec<4, uint32_t>* buffersEnd,
649 int border,
650 int divisor)
651 : Pass{border}
652 , fBuffer0{buffer0}
653 , fBuffer1{buffer1}
654 , fBuffersEnd{buffersEnd}
655 , fDivider(divisor) {}
656
657private:
658 void startBlur() override {
659 skvx::Vec<4, uint32_t>{0u, 0u, 0u, 0u}.store(fSum0);
660 auto half = fDivider.half();
661 skvx::Vec<4, uint32_t>{half, half, half, half}.store(fSum1);
662 sk_bzero(fBuffer0, (fBuffersEnd - fBuffer0) * sizeof(skvx::Vec<4, uint32_t>));
663
664 fBuffer0Cursor = fBuffer0;
665 fBuffer1Cursor = fBuffer1;
666 }
667
668 // TentPass implements the common two pass box filter approximation of Tent filter,
669 // but combines all both passes into a single pass. This approach is facilitated by two
670 // circular buffers the width of the window which track values for trailing edges of each of
671 // both passes. This allows the algorithm to use more precision in the calculation
672 // because the values are not rounded each pass. And this implementation also avoids a trap
673 // that's easy to fall into resulting in blending in too many zeroes near the edge.
674 //
675 // In general, a window sum has the form:
676 // sum_n+1 = sum_n + leading_edge - trailing_edge.
677 // If instead we do the subtraction at the end of the previous iteration, we can just
678 // calculate the sums instead of having to do the subtractions too.
679 //
680 // In previous iteration:
681 // sum_n+1 = sum_n - trailing_edge.
682 //
683 // In this iteration:
684 // sum_n+1 = sum_n + leading_edge.
685 //
686 // Now we can stack all three sums and do them at once. Sum0 gets its leading edge from the
687 // actual data. Sum1's leading edge is just Sum0, and Sum2's leading edge is Sum1. So, doing the
688 // three passes at the same time has the form:
689 //
690 // sum0_n+1 = sum0_n + leading edge
691 // sum1_n+1 = sum1_n + sum0_n+1
692 //
693 // sum1_n+1 / window^2 is the new value of the destination pixel.
694 //
695 // Reduce the sums by the trailing edges which were stored in the circular buffers for the
696 // next go around.
697 //
698 // sum1_n+2 = sum1_n+1 - buffer1[i];
699 // buffer1[i] = sum0;
700 // sum0_n+2 = sum0_n+1 - buffer0[i];
701 // buffer0[i] = leading edge
702 void blurSegment(
703 int n, const uint32_t* src, int srcStride, uint32_t* dst, int dstStride) override {
704 skvx::Vec<4, uint32_t>* buffer0Cursor = fBuffer0Cursor;
705 skvx::Vec<4, uint32_t>* buffer1Cursor = fBuffer1Cursor;
708
709 // Given an expanded input pixel, move the window ahead using the leadingEdge value.
710 auto processValue = [&](const skvx::Vec<4, uint32_t>& leadingEdge) {
711 sum0 += leadingEdge;
712 sum1 += sum0;
713
714 skvx::Vec<4, uint32_t> blurred = fDivider.divide(sum1);
715
716 sum1 -= *buffer1Cursor;
717 *buffer1Cursor = sum0;
718 buffer1Cursor = (buffer1Cursor + 1) < fBuffersEnd ? buffer1Cursor + 1 : fBuffer1;
719 sum0 -= *buffer0Cursor;
720 *buffer0Cursor = leadingEdge;
721 buffer0Cursor = (buffer0Cursor + 1) < fBuffer1 ? buffer0Cursor + 1 : fBuffer0;
722
723 return skvx::cast<uint8_t>(blurred);
724 };
725
726 auto loadEdge = [&](const uint32_t* srcCursor) {
727 return skvx::cast<uint32_t>(skvx::Vec<4, uint8_t>::Load(srcCursor));
728 };
729
730 if (!src && !dst) {
731 while (n --> 0) {
732 (void)processValue(0);
733 }
734 } else if (src && !dst) {
735 while (n --> 0) {
736 (void)processValue(loadEdge(src));
737 src += srcStride;
738 }
739 } else if (!src && dst) {
740 while (n --> 0) {
741 processValue(0u).store(dst);
742 dst += dstStride;
743 }
744 } else if (src && dst) {
745 while (n --> 0) {
746 processValue(loadEdge(src)).store(dst);
747 src += srcStride;
748 dst += dstStride;
749 }
750 }
751
752 // Store the state
753 fBuffer0Cursor = buffer0Cursor;
754 fBuffer1Cursor = buffer1Cursor;
755 sum0.store(fSum0);
756 sum1.store(fSum1);
757 }
758
759 skvx::Vec<4, uint32_t>* const fBuffer0;
760 skvx::Vec<4, uint32_t>* const fBuffer1;
761 skvx::Vec<4, uint32_t>* const fBuffersEnd;
762 const skvx::ScaledDividerU32 fDivider;
763
764 // blur state
765 char fSum0[sizeof(skvx::Vec<4, uint32_t>)];
766 char fSum1[sizeof(skvx::Vec<4, uint32_t>)];
767 skvx::Vec<4, uint32_t>* fBuffer0Cursor;
768 skvx::Vec<4, uint32_t>* fBuffer1Cursor;
769};
770
771// TODO: Implement CPU backend for different fTileMode. This is still worth doing inline with the
772// blur; at the moment the tiling is applied via the CropImageFilter and carried as metadata on
773// the FilterResult. This is forcefully applied in onFilterImage() to get a simple SkSpecialImage to
774// pass to cpu_blur or gpu_blur, which evaluates the tile mode into a kernel-outset buffer that is
775// then processed by these functions. If the tilemode is the only thing being applied, it would be
776// ideal to tile from the input image directly instead of inserting a new temporary image. For CPU
777// blurs this temporary image now creates the appearance of correctness; for GPU blurs that could
778// tile already it may create a regression.
779sk_sp<SkSpecialImage> cpu_blur(const skif::Context& ctx,
781 const sk_sp<SkSpecialImage>& input,
783 skif::LayerSpace<SkIRect> dstBounds) {
784 // map_sigma limits sigma to 532 to match 1000px box filter limit of WebKit and Firefox.
785 // Since this does not exceed the limits of the TentPass (2183), there won't be overflow when
786 // computing a kernel over a pixel window filled with 255.
787 static_assert(kMaxSigma <= 2183.0f);
788
789 // The input image should fill the srcBounds
790 SkASSERT(input->width() == srcBounds.width() && input->height() == srcBounds.height());
791
793 auto makeMaker = [&](double sigma) -> PassMaker* {
794 SkASSERT(0 <= sigma && sigma <= 2183); // should be guaranteed after map_sigma
795 if (PassMaker* maker = GaussPass::MakeMaker(sigma, &alloc)) {
796 return maker;
797 }
798 if (PassMaker* maker = TentPass::MakeMaker(sigma, &alloc)) {
799 return maker;
800 }
801 SK_ABORT("Sigma is out of range.");
802 };
803
804 PassMaker* makerX = makeMaker(sigma.width());
805 PassMaker* makerY = makeMaker(sigma.height());
806 // A no-op blur should have been caught earlier in onFilterImage().
807 SkASSERT(makerX->window() > 1 || makerY->window() > 1);
808
810 if (!SkSpecialImages::AsBitmap(input.get(), &src)) {
811 return nullptr;
812 }
813 if (src.colorType() != kN32_SkColorType) {
814 return nullptr;
815 }
816
817 auto originalDstBounds = dstBounds;
818 if (makerX->window() > 1) {
819 // Inflate the dst by the window required for the Y pass so that the X pass can prepare it.
820 // The Y pass will be offset to only write to the original rows in dstBounds, but its window
821 // will access these extra rows calculated by the X pass. The SpecialImage factory will
822 // then subset the bitmap so it appears to match 'originalDstBounds' tightly. We make one
823 // slightly larger image to hold this extra data instead of two separate images sized
824 // exactly to each pass because the CPU blur can write in place.
825 const auto yPadding = skif::LayerSpace<SkSize>({0.f, 3 * sigma.height()}).ceil();
826 dstBounds.outset(yPadding);
827 }
828
830 const skif::LayerSpace<SkIPoint> dstOrigin = dstBounds.topLeft();
831 if (!dst.tryAllocPixels(src.info().makeWH(dstBounds.width(), dstBounds.height()))) {
832 return nullptr;
833 }
834 dst.eraseColor(SK_ColorTRANSPARENT);
835
836 auto buffer = alloc.makeBytesAlignedTo(std::max(makerX->bufferSizeBytes(),
837 makerY->bufferSizeBytes()),
838 alignof(skvx::Vec<4, uint32_t>));
839
840 // Basic Plan: The three cases to handle
841 // * Horizontal and Vertical - blur horizontally while copying values from the source to
842 // the destination. Then, do an in-place vertical blur.
843 // * Horizontal only - blur horizontally copying values from the source to the destination.
844 // * Vertical only - blur vertically copying values from the source to the destination.
845
846 // Initialize these assuming the Y-only case
847 int loopStart = std::max(srcBounds.left(), dstBounds.left());
848 int loopEnd = std::min(srcBounds.right(), dstBounds.right());
849 int dstYOffset = 0;
850
851 if (makerX->window() > 1) {
852 // First an X-only blur from src into dst, including the extra rows that will become input
853 // for the second Y pass, which will then be performed in place.
854 loopStart = std::max(srcBounds.top(), dstBounds.top());
855 loopEnd = std::min(srcBounds.bottom(), dstBounds.bottom());
856
857 auto srcAddr = src.getAddr32(0, loopStart - srcBounds.top());
858 auto dstAddr = dst.getAddr32(0, loopStart - dstBounds.top());
859
860 // Iterate over each row to calculate 1D blur along X.
861 Pass* pass = makerX->makePass(buffer, &alloc);
862 for (int y = loopStart; y < loopEnd; ++y) {
863 pass->blur(srcBounds.left() - dstBounds.left(),
864 srcBounds.right() - dstBounds.left(),
865 dstBounds.width(),
866 srcAddr, 1,
867 dstAddr, 1);
868 srcAddr += src.rowBytesAsPixels();
869 dstAddr += dst.rowBytesAsPixels();
870 }
871
872 // Set up the Y pass to blur from the full dst into the non-outset portion of dst
873 src = dst;
874 loopStart = originalDstBounds.left();
875 loopEnd = originalDstBounds.right();
876 // The new 'dst' is equal to dst.extractSubset(originalDstBounds.offset(-dstOrigin)), but
877 // by construction only the Y offset has an interesting value so this is a little more
878 // efficient.
879 dstYOffset = originalDstBounds.top() - dstBounds.top();
880
881 srcBounds = dstBounds;
882 dstBounds = originalDstBounds;
883 }
884
885 // Iterate over each column to calculate 1D blur along Y. This is either blurring from src into
886 // dst for a 1D blur; or it's blurring from dst into dst for the second pass of a 2D blur.
887 if (makerY->window() > 1) {
888 auto srcAddr = src.getAddr32(loopStart - srcBounds.left(), 0);
889 auto dstAddr = dst.getAddr32(loopStart - dstBounds.left(), dstYOffset);
890
891 Pass* pass = makerY->makePass(buffer, &alloc);
892 for (int x = loopStart; x < loopEnd; ++x) {
893 pass->blur(srcBounds.top() - dstBounds.top(),
894 srcBounds.bottom() - dstBounds.top(),
895 dstBounds.height(),
896 srcAddr, src.rowBytesAsPixels(),
897 dstAddr, dst.rowBytesAsPixels());
898 srcAddr += 1;
899 dstAddr += 1;
900 }
901 }
902
903 originalDstBounds.offset(-dstOrigin); // Make relative to dst's pixels
904 return SkSpecialImages::MakeFromRaster(SkIRect(originalDstBounds),
905 dst,
906 ctx.backend()->surfaceProps());
907}
908
909} // namespace
910
911skif::FilterResult SkBlurImageFilter::onFilterImage(const skif::Context& ctx) const {
912 const bool gpuBacked = SkToBool(ctx.backend()->getBlurEngine());
913
914 skif::Context inputCtx = ctx.withNewDesiredOutput(
915 this->kernelBounds(ctx.mapping(), ctx.desiredOutput(), gpuBacked));
916
917 skif::FilterResult childOutput = this->getChildOutput(0, inputCtx);
918 skif::LayerSpace<SkSize> sigma = this->mapSigma(ctx.mapping(), gpuBacked);
919 if (sigma.width() == 0.f && sigma.height() == 0.f) {
920 // No actual blur, so just return the input unmodified
921 return childOutput;
922 }
923
924 SkASSERT(sigma.width() >= 0.f && sigma.width() <= kMaxSigma &&
925 sigma.height() >= 0.f && sigma.height() <= kMaxSigma);
926
927 // By default, FilterResult::blur() will calculate a more optimal output automatically, so
928 // convey the original output to it.
930 if (!gpuBacked || fLegacyTileMode != SkTileMode::kDecal) {
931 maxOutput = this->kernelBounds(ctx.mapping(), childOutput.layerBounds(), gpuBacked);
932 if (!maxOutput.intersect(ctx.desiredOutput())) {
933 return {};
934 }
935 }
936 if (fLegacyTileMode != SkTileMode::kDecal) {
937 // Legacy tiling applied to the input image when there was no explicit crop rect. Use the
938 // child's output image's layer bounds as the crop rectangle to adjust the edge tile mode
939 // without restricting the image.
940 childOutput = childOutput.applyCrop(inputCtx,
941 childOutput.layerBounds(),
942 fLegacyTileMode);
943 }
944
945 // TODO(b/40039877): Once the CPU blur functions can handle tile modes and color types beyond
946 // N32, there won't be any need to branch on how to apply the blur to the filter result.
947 if (gpuBacked) {
948 // For non-legacy tiling, 'maxOutput' is equal to the desired output. For decal's it matches
949 // what Builder::blur() calculates internally. For legacy tiling, however, it's dependent on
950 // the original child output's bounds ignoring the tile mode's effect.
951 skif::Context croppedOutput = ctx.withNewDesiredOutput(maxOutput);
953 builder.add(childOutput);
954 return builder.blur(sigma);
955 }
956
957 // The CPU blur does not yet support tile modes so explicitly resolve it to a special image that
958 // has the tiling rendered into the pixels.
959
960 auto [resolvedChildOutput, origin] = childOutput.imageAndOffset(inputCtx);
961 if (!resolvedChildOutput) {
962 return {};
963 }
964 skif::LayerSpace<SkIRect> srcBounds{SkIRect::MakeXYWH(origin.x(),
965 origin.y(),
966 resolvedChildOutput->width(),
967 resolvedChildOutput->height())};
968
969 return skif::FilterResult{cpu_blur(ctx, sigma, std::move(resolvedChildOutput),
970 srcBounds, maxOutput),
971 maxOutput.topLeft()};
972}
973
974skif::LayerSpace<SkSize> SkBlurImageFilter::mapSigma(const skif::Mapping& mapping,
975 bool gpuBacked) const {
976 skif::LayerSpace<SkSize> sigma = mapping.paramToLayer(fSigma);
977 // Clamp to the maximum sigma
978 sigma = skif::LayerSpace<SkSize>({std::min(sigma.width(), kMaxSigma),
979 std::min(sigma.height(), kMaxSigma)});
980
981 // TODO(b/294575803) - The CPU and GPU implementations have different requirements for
982 // "identity", with the GPU able to handle smaller sigmas. calculate_window() returns <= 1 once
983 // sigma is below ~0.8. Ideally we should work out the sigma threshold such that the max
984 // contribution from adjacent pixels is less than 0.5/255 and use that for both backends.
985 // NOTE: For convenience with builds, and the flux that is about to occur with the blur utils,
986 // this GPU logic is just copied from GrBlurUtils
987
988 // Disable bluring on axes that are not finite, or that are small enough that the blur is
989 // effectively an identity.
990 if (!SkIsFinite(sigma.width()) || (!gpuBacked && calculate_window(sigma.width()) <= 1)
991#if defined(SK_GANESH) || defined(SK_GRAPHITE)
992 || (gpuBacked && skgpu::BlurIsEffectivelyIdentity(sigma.width()))
993#endif
994 ) {
995 sigma = skif::LayerSpace<SkSize>({0.f, sigma.height()});
996 }
997
998 if (!SkIsFinite(sigma.height()) || (!gpuBacked && calculate_window(sigma.height()) <= 1)
999#if defined(SK_GANESH) || defined(SK_GRAPHITE)
1000 || (gpuBacked && skgpu::BlurIsEffectivelyIdentity(sigma.height()))
1001#endif
1002 ) {
1003 sigma = skif::LayerSpace<SkSize>({sigma.width(), 0.f});
1004 }
1005
1006 return sigma;
1007}
1008
1009skif::LayerSpace<SkIRect> SkBlurImageFilter::onGetInputLayerBounds(
1010 const skif::Mapping& mapping,
1011 const skif::LayerSpace<SkIRect>& desiredOutput,
1012 std::optional<skif::LayerSpace<SkIRect>> contentBounds) const {
1013 // Use gpuBacked=true since that has a more sensitive kernel, ensuring any layer input bounds
1014 // will be sufficient for both GPU and CPU evaluations.
1015 skif::LayerSpace<SkIRect> requiredInput =
1016 this->kernelBounds(mapping, desiredOutput, /*gpuBacked=*/true);
1017 return this->getChildInputLayerBounds(0, mapping, requiredInput, contentBounds);
1018}
1019
1020std::optional<skif::LayerSpace<SkIRect>> SkBlurImageFilter::onGetOutputLayerBounds(
1021 const skif::Mapping& mapping,
1022 std::optional<skif::LayerSpace<SkIRect>> contentBounds) const {
1023 auto childOutput = this->getChildOutputLayerBounds(0, mapping, contentBounds);
1024 if (childOutput) {
1025 // Use gpuBacked=true since it will ensure output bounds are conservative; CPU-based blurs
1026 // may produce 1px inset from this for very small sigmas.
1027 return this->kernelBounds(mapping, *childOutput, /*gpuBacked=*/true);
1028 } else {
1030 }
1031}
1032
1033SkRect SkBlurImageFilter::computeFastBounds(const SkRect& src) const {
1034 SkRect bounds = this->getInput(0) ? this->getInput(0)->computeFastBounds(src) : src;
1035 bounds.outset(SkSize(fSigma).width() * 3, SkSize(fSigma).height() * 3);
1036 return bounds;
1037}
sk_bzero(glyphs, sizeof(glyphs))
#define SK_ABORT(message,...)
Definition: SkAssert.h:70
#define SkASSERT(cond)
Definition: SkAssert.h:116
void SkRegisterBlurImageFilterFlattenable()
#define SK_PREFETCH(ptr)
constexpr SkColor SK_ColorTRANSPARENT
Definition: SkColor.h:99
#define SK_FLATTENABLE_HOOKS(type)
#define SK_REGISTER_FLATTENABLE(type)
constexpr double SK_DoublePI
static bool SkIsFinite(T x, Pack... values)
#define SK_IMAGEFILTER_UNFLATTEN_COMMON(localVar, expectedCount)
SkTileMode
Definition: SkTileMode.h:13
static constexpr bool SkToBool(const T &x)
Definition: SkTo.h:35
void * makeBytesAlignedTo(size_t size, size_t align)
Definition: SkArenaAlloc.h:200
auto make(Ctor &&ctor) -> decltype(ctor(nullptr))
Definition: SkArenaAlloc.h:120
static void Register(const char name[], Factory)
void flatten(SkWriteBuffer &) const override
static sk_sp< SkImageFilter > Blur(SkScalar sigmaX, SkScalar sigmaY, SkTileMode tileMode, sk_sp< SkImageFilter > input, const CropRect &cropRect={})
static sk_sp< SkImageFilter > Crop(const SkRect &rect, SkTileMode tileMode, sk_sp< SkImageFilter > input)
int width() const
int height() const
T * get() const
Definition: SkRefCnt.h:303
const SkSurfaceProps & surfaceProps() const
virtual const SkBlurEngine * getBlurEngine() const =0
const Backend * backend() const
const LayerSpace< SkIRect > & desiredOutput() const
Context withNewDesiredOutput(const LayerSpace< SkIRect > &desiredOutput) const
const Mapping & mapping() const
FilterResult applyCrop(const Context &ctx, const LayerSpace< SkIRect > &crop, SkTileMode tileMode=SkTileMode::kDecal) const
sk_sp< SkSpecialImage > imageAndOffset(const Context &ctx, SkIPoint *offset) const
LayerSpace< SkIRect > layerBounds() const
LayerSpace< SkIPoint > topLeft() const
void outset(const LayerSpace< SkISize > &delta)
bool intersect(const LayerSpace< SkIRect > &r)
LayerSpace< T > paramToLayer(const ParameterSpace< T > &paramGeometry) const
GLFWwindow * window
Definition: main.cc:45
float SkScalar
Definition: extension.cpp:12
static float max(float r, float g, float b)
Definition: hsl.cpp:49
static float min(float r, float g, float b)
Definition: hsl.cpp:48
double y
double x
SK_API sk_sp< SkDocument > Make(SkWStream *dst, const SkSerialProcs *=nullptr, std::function< void(const SkPicture *)> onEndPage=nullptr)
Optional< SkRect > bounds
Definition: SkRecords.h:189
bool AsBitmap(const SkSpecialImage *img, SkBitmap *result)
sk_sp< SkSpecialImage > MakeFromRaster(const SkIRect &subset, const SkBitmap &bm, const SkSurfaceProps &props)
Definition: common.py:1
DEF_SWITCHES_START aot vmservice shared library Name of the *so containing AOT compiled Dart assets for launching the service isolate vm snapshot The VM snapshot data that will be memory mapped as read only SnapshotAssetPath must be present isolate snapshot The isolate snapshot data that will be memory mapped as read only SnapshotAssetPath must be present cache dir Path to the cache directory This is different from the persistent_cache_path in embedder which is used for Skia shader cache icu native lib Path to the library file that exports the ICU data vm service The hostname IP address on which the Dart VM Service should be served If not defaults to or::depending on whether ipv6 is specified vm service A custom Dart VM Service port The default is to pick a randomly available open port disable vm Disable the Dart VM Service The Dart VM Service is never available in release mode disable vm service Disable mDNS Dart VM Service publication Bind to the IPv6 localhost address for the Dart VM Service Ignored if vm service host is set endless trace buffer
Definition: switches.h:126
dst
Definition: cp.py:12
constexpr bool BlurIsEffectivelyIdentity(float sigma)
Definition: BlurUtils.h:30
SIN Vec< N, float > sqrt(const Vec< N, float > &x)
Definition: SkVx.h:706
SIN Vec< N, float > floor(const Vec< N, float > &x)
Definition: SkVx.h:703
SIN Vec< N, float > ceil(const Vec< N, float > &x)
Definition: SkVx.h:702
int32_t height
int32_t width
Definition: SkRect.h:32
static constexpr SkIRect MakeXYWH(int32_t x, int32_t y, int32_t w, int32_t h)
Definition: SkRect.h:104
Definition: SkSize.h:52
Definition: SkVx.h:83
static SKVX_ALWAYS_INLINE Vec Load(const void *ptr)
Definition: SkVx.h:109
SKVX_ALWAYS_INLINE void store(void *ptr) const
Definition: SkVx.h:112