Flutter Engine
The Flutter Engine
Loading...
Searching...
No Matches
SkBlurImageFilter.cpp
Go to the documentation of this file.
1/*
2 * Copyright 2011 The Android Open Source Project
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
9
16#include "include/core/SkRect.h"
19#include "include/core/SkSize.h"
26#include "src/base/SkVx.h"
32
33#include <algorithm>
34#include <cmath>
35#include <cstdint>
36#include <cstring>
37#include <optional>
38#include <utility>
39
40struct SkIPoint;
41
42#if defined(SK_GANESH) || defined(SK_GRAPHITE)
43#include "src/gpu/BlurUtils.h"
44#endif
45
46#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1
47 #include <xmmintrin.h>
48 #define SK_PREFETCH(ptr) _mm_prefetch(reinterpret_cast<const char*>(ptr), _MM_HINT_T0)
49#elif defined(__GNUC__)
50 #define SK_PREFETCH(ptr) __builtin_prefetch(ptr)
51#else
52 #define SK_PREFETCH(ptr)
53#endif
54
55namespace {
56
57class SkBlurImageFilter final : public SkImageFilter_Base {
58public:
59 SkBlurImageFilter(SkSize sigma, sk_sp<SkImageFilter> input)
60 : SkImageFilter_Base(&input, 1)
61 , fSigma{sigma} {}
62
63 SkBlurImageFilter(SkSize sigma, SkTileMode legacyTileMode, sk_sp<SkImageFilter> input)
64 : SkImageFilter_Base(&input, 1)
65 , fSigma(sigma)
66 , fLegacyTileMode(legacyTileMode) {}
67
68 SkRect computeFastBounds(const SkRect&) const override;
69
70protected:
71 void flatten(SkWriteBuffer&) const override;
72
73private:
74 friend void ::SkRegisterBlurImageFilterFlattenable();
75 SK_FLATTENABLE_HOOKS(SkBlurImageFilter)
76
77 skif::FilterResult onFilterImage(const skif::Context& context) const override;
78
79 skif::LayerSpace<SkIRect> onGetInputLayerBounds(
80 const skif::Mapping& mapping,
81 const skif::LayerSpace<SkIRect>& desiredOutput,
82 std::optional<skif::LayerSpace<SkIRect>> contentBounds) const override;
83
84 std::optional<skif::LayerSpace<SkIRect>> onGetOutputLayerBounds(
85 const skif::Mapping& mapping,
86 std::optional<skif::LayerSpace<SkIRect>> contentBounds) const override;
87
88 skif::LayerSpace<SkSize> mapSigma(const skif::Mapping& mapping, bool gpuBacked) const;
89
90 skif::LayerSpace<SkIRect> kernelBounds(const skif::Mapping& mapping,
92 bool gpuBacked) const {
93 skif::LayerSpace<SkSize> sigma = this->mapSigma(mapping, gpuBacked);
94 bounds.outset(skif::LayerSpace<SkSize>({3 * sigma.width(), 3 * sigma.height()}).ceil());
95 return bounds;
96 }
97
99 // kDecal means no legacy tiling, it will be handled by SkCropImageFilter instead. Legacy
100 // tiling occurs when there's no provided crop rect, and should be deleted once clients create
101 // their filters with defined tiling geometry.
102 SkTileMode fLegacyTileMode = SkTileMode::kDecal;
103};
104
105} // end namespace
106
108 SkScalar sigmaX, SkScalar sigmaY, SkTileMode tileMode, sk_sp<SkImageFilter> input,
109 const CropRect& cropRect) {
110 if (!SkIsFinite(sigmaX, sigmaY) || sigmaX < 0.f || sigmaY < 0.f) {
111 // Non-finite or negative sigmas are error conditions. We allow 0 sigma for X and/or Y
112 // for 1D blurs; onFilterImage() will detect when no visible blurring would occur based on
113 // the Context mapping.
114 return nullptr;
115 }
116
117 // Temporarily allow tiling with no crop rect
118 if (tileMode != SkTileMode::kDecal && !cropRect) {
119 return sk_make_sp<SkBlurImageFilter>(SkSize{sigmaX, sigmaY}, tileMode, std::move(input));
120 }
121
122 // The 'tileMode' behavior is not well-defined if there is no crop. We only apply it if
123 // there is a provided 'cropRect'.
124 sk_sp<SkImageFilter> filter = std::move(input);
125 if (tileMode != SkTileMode::kDecal && cropRect) {
126 // Historically the input image was restricted to the cropRect when tiling was not
127 // kDecal, so that the kernel evaluated the tiled edge conditions, while a kDecal crop
128 // only affected the output.
129 filter = SkImageFilters::Crop(*cropRect, tileMode, std::move(filter));
130 }
131
132 filter = sk_make_sp<SkBlurImageFilter>(SkSize{sigmaX, sigmaY}, std::move(filter));
133 if (cropRect) {
134 // But regardless of the tileMode, the output is always decal cropped
135 filter = SkImageFilters::Crop(*cropRect, SkTileMode::kDecal, std::move(filter));
136 }
137 return filter;
138}
139
141 SK_REGISTER_FLATTENABLE(SkBlurImageFilter);
142 SkFlattenable::Register("SkBlurImageFilterImpl", SkBlurImageFilter::CreateProc);
143}
144
145sk_sp<SkFlattenable> SkBlurImageFilter::CreateProc(SkReadBuffer& buffer) {
147 SkScalar sigmaX = buffer.readScalar();
148 SkScalar sigmaY = buffer.readScalar();
149 SkTileMode tileMode = buffer.read32LE(SkTileMode::kLastTileMode);
150
151 // NOTE: For new SKPs, 'tileMode' holds the "legacy" tile mode; any originally specified tile
152 // mode with valid tiling geometry is handled in the SkCropImageFilters that wrap the blur.
153 // In a new SKP, when 'tileMode' is not kDecal, common.cropRect() will be null and the blur
154 // will automatically emulate the legacy tiling.
155 //
156 // In old SKPs, the 'tileMode' and common.cropRect() may not be null. ::Blur() automatically
157 // detects when this is a legacy or valid tiling and constructs the DAG appropriately.
159 sigmaX, sigmaY, tileMode, common.getInput(0), common.cropRect());
160}
161
162void SkBlurImageFilter::flatten(SkWriteBuffer& buffer) const {
163 this->SkImageFilter_Base::flatten(buffer);
164
165 buffer.writeScalar(SkSize(fSigma).fWidth);
166 buffer.writeScalar(SkSize(fSigma).fHeight);
167 buffer.writeInt(static_cast<int>(fLegacyTileMode));
168}
169
170///////////////////////////////////////////////////////////////////////////////
171
172namespace {
173
174// TODO: Move these functions into a CPU, 8888-only blur engine implementation; ideally share logic
175// with the similar techniques in SkMaskBlurFilter on 4x A8 data.
176
177// TODO(b/294575803): Provide a more accurate CPU implementation at s<2, at which point the notion
178// of an identity sigma can be consolidated between the different functions.
179// This is defined by the SVG spec:
180// https://drafts.fxtf.org/filter-effects/#feGaussianBlurElement
181int calculate_window(double sigma) {
182 auto possibleWindow = static_cast<int>(floor(sigma * 3 * sqrt(2 * SK_DoublePI) / 4 + 0.5));
183 return std::max(1, possibleWindow);
184}
185
186// This rather arbitrary-looking value results in a maximum box blur kernel size
187// of 1000 pixels on the raster path, which matches the WebKit and Firefox
188// implementations. Since the GPU path does not compute a box blur, putting
189// the limit on sigma ensures consistent behaviour between the GPU and
190// raster paths.
191static constexpr SkScalar kMaxSigma = 532.f;
192
193class Pass {
194public:
195 explicit Pass(int border) : fBorder(border) {}
196 virtual ~Pass() = default;
197
198 void blur(int srcLeft, int srcRight, int dstRight,
199 const uint32_t* src, int srcStride,
200 uint32_t* dst, int dstStride) {
201 this->startBlur();
202
203 auto srcStart = srcLeft - fBorder,
204 srcEnd = srcRight - fBorder,
205 dstEnd = dstRight,
206 srcIdx = srcStart,
207 dstIdx = 0;
208
209 const uint32_t* srcCursor = src;
210 uint32_t* dstCursor = dst;
211
212 if (dstIdx < srcIdx) {
213 // The destination pixels are not effected by the src pixels,
214 // change to zero as per the spec.
215 // https://drafts.fxtf.org/filter-effects/#FilterPrimitivesOverviewIntro
216 int commonEnd = std::min(srcIdx, dstEnd);
217 while (dstIdx < commonEnd) {
218 *dstCursor = 0;
219 dstCursor += dstStride;
220 SK_PREFETCH(dstCursor);
221 dstIdx++;
222 }
223 } else if (srcIdx < dstIdx) {
224 // The edge of the source is before the edge of the destination. Calculate the sums for
225 // the pixels before the start of the destination.
226 if (int commonEnd = std::min(dstIdx, srcEnd); srcIdx < commonEnd) {
227 // Preload the blur with values from src before dst is entered.
228 int n = commonEnd - srcIdx;
229 this->blurSegment(n, srcCursor, srcStride, nullptr, 0);
230 srcIdx += n;
231 srcCursor += n * srcStride;
232 }
233 if (srcIdx < dstIdx) {
234 // The weird case where src is out of pixels before dst is even started.
235 int n = dstIdx - srcIdx;
236 this->blurSegment(n, nullptr, 0, nullptr, 0);
237 srcIdx += n;
238 }
239 }
240
241 if (int commonEnd = std::min(dstEnd, srcEnd); dstIdx < commonEnd) {
242 // Both srcIdx and dstIdx are in sync now, and can run in a 1:1 fashion. This is the
243 // normal mode of operation.
244 SkASSERT(srcIdx == dstIdx);
245
246 int n = commonEnd - dstIdx;
247 this->blurSegment(n, srcCursor, srcStride, dstCursor, dstStride);
248 srcCursor += n * srcStride;
249 dstCursor += n * dstStride;
250 dstIdx += n;
251 srcIdx += n;
252 }
253
254 // Drain the remaining blur values into dst assuming 0's for the leading edge.
255 if (dstIdx < dstEnd) {
256 int n = dstEnd - dstIdx;
257 this->blurSegment(n, nullptr, 0, dstCursor, dstStride);
258 }
259 }
260
261protected:
262 virtual void startBlur() = 0;
263 virtual void blurSegment(
264 int n, const uint32_t* src, int srcStride, uint32_t* dst, int dstStride) = 0;
265
266private:
267 const int fBorder;
268};
269
270class PassMaker {
271public:
272 explicit PassMaker(int window) : fWindow{window} {}
273 virtual ~PassMaker() = default;
274 virtual Pass* makePass(void* buffer, SkArenaAlloc* alloc) const = 0;
275 virtual size_t bufferSizeBytes() const = 0;
276 int window() const {return fWindow;}
277
278private:
279 const int fWindow;
280};
281
282// Implement a scanline processor that uses a three-box filter to approximate a Gaussian blur.
283// The GaussPass is limit to processing sigmas < 135.
284class GaussPass final : public Pass {
285public:
286 // NB 136 is the largest sigma that will not cause a buffer full of 255 mask values to overflow
287 // using the Gauss filter. It also limits the size of buffers used hold intermediate values.
288 // Explanation of maximums:
289 // sum0 = window * 255
290 // sum1 = window * sum0 -> window * window * 255
291 // sum2 = window * sum1 -> window * window * window * 255 -> window^3 * 255
292 //
293 // The value window^3 * 255 must fit in a uint32_t. So,
294 // window^3 < 2^32. window = 255.
295 //
296 // window = floor(sigma * 3 * sqrt(2 * kPi) / 4 + 0.5)
297 // For window <= 255, the largest value for sigma is 136.
298 static PassMaker* MakeMaker(double sigma, SkArenaAlloc* alloc) {
299 SkASSERT(0 <= sigma);
300 int window = calculate_window(sigma);
301 if (255 <= window) {
302 return nullptr;
303 }
304
305 class Maker : public PassMaker {
306 public:
307 explicit Maker(int window) : PassMaker{window} {}
308 Pass* makePass(void* buffer, SkArenaAlloc* alloc) const override {
309 return GaussPass::Make(this->window(), buffer, alloc);
310 }
311
312 size_t bufferSizeBytes() const override {
313 int window = this->window();
314 size_t onePassSize = window - 1;
315 // If the window is odd, then there is an obvious middle element. For even sizes
316 // 2 passes are shifted, and the last pass has an extra element. Like this:
317 // S
318 // aaaAaa
319 // bbBbbb
320 // cccCccc
321 // D
322 size_t bufferCount = (window & 1) == 1 ? 3 * onePassSize : 3 * onePassSize + 1;
323 return bufferCount * sizeof(skvx::Vec<4, uint32_t>);
324 }
325 };
326
327 return alloc->make<Maker>(window);
328 }
329
330 static GaussPass* Make(int window, void* buffers, SkArenaAlloc* alloc) {
331 // We don't need to store the trailing edge pixel in the buffer;
332 int passSize = window - 1;
333 skvx::Vec<4, uint32_t>* buffer0 = static_cast<skvx::Vec<4, uint32_t>*>(buffers);
334 skvx::Vec<4, uint32_t>* buffer1 = buffer0 + passSize;
335 skvx::Vec<4, uint32_t>* buffer2 = buffer1 + passSize;
336 // If the window is odd just one buffer is needed, but if it's even, then there is one
337 // more element on that pass.
338 skvx::Vec<4, uint32_t>* buffersEnd = buffer2 + ((window & 1) ? passSize : passSize + 1);
339
340 // Calculating the border is tricky. The border is the distance in pixels between the first
341 // dst pixel and the first src pixel (or the last src pixel and the last dst pixel).
342 // I will go through the odd case which is simpler, and then through the even case. Given a
343 // stack of filters seven wide for the odd case of three passes.
344 //
345 // S
346 // aaaAaaa
347 // bbbBbbb
348 // cccCccc
349 // D
350 //
351 // The furthest changed pixel is when the filters are in the following configuration.
352 //
353 // S
354 // aaaAaaa
355 // bbbBbbb
356 // cccCccc
357 // D
358 //
359 // The A pixel is calculated using the value S, the B uses A, and the C uses B, and
360 // finally D is C. So, with a window size of seven the border is nine. In the odd case, the
361 // border is 3*((window - 1)/2).
362 //
363 // For even cases the filter stack is more complicated. The spec specifies two passes
364 // of even filters and a final pass of odd filters. A stack for a width of six looks like
365 // this.
366 //
367 // S
368 // aaaAaa
369 // bbBbbb
370 // cccCccc
371 // D
372 //
373 // The furthest pixel looks like this.
374 //
375 // S
376 // aaaAaa
377 // bbBbbb
378 // cccCccc
379 // D
380 //
381 // For a window of six, the border value is eight. In the even case the border is 3 *
382 // (window/2) - 1.
383 int border = (window & 1) == 1 ? 3 * ((window - 1) / 2) : 3 * (window / 2) - 1;
384
385 // If the window is odd then the divisor is just window ^ 3 otherwise,
386 // it is window * window * (window + 1) = window ^ 3 + window ^ 2;
387 int window2 = window * window;
388 int window3 = window2 * window;
389 int divisor = (window & 1) == 1 ? window3 : window3 + window2;
390 return alloc->make<GaussPass>(buffer0, buffer1, buffer2, buffersEnd, border, divisor);
391 }
392
393 GaussPass(skvx::Vec<4, uint32_t>* buffer0,
394 skvx::Vec<4, uint32_t>* buffer1,
395 skvx::Vec<4, uint32_t>* buffer2,
396 skvx::Vec<4, uint32_t>* buffersEnd,
397 int border,
398 int divisor)
399 : Pass{border}
400 , fBuffer0{buffer0}
401 , fBuffer1{buffer1}
402 , fBuffer2{buffer2}
403 , fBuffersEnd{buffersEnd}
404 , fDivider(divisor) {}
405
406private:
407 void startBlur() override {
408 skvx::Vec<4, uint32_t> zero = {0u, 0u, 0u, 0u};
409 zero.store(fSum0);
410 zero.store(fSum1);
411 auto half = fDivider.half();
412 skvx::Vec<4, uint32_t>{half, half, half, half}.store(fSum2);
413 sk_bzero(fBuffer0, (fBuffersEnd - fBuffer0) * sizeof(skvx::Vec<4, uint32_t>));
414
415 fBuffer0Cursor = fBuffer0;
416 fBuffer1Cursor = fBuffer1;
417 fBuffer2Cursor = fBuffer2;
418 }
419
420 // GaussPass implements the common three pass box filter approximation of Gaussian blur,
421 // but combines all three passes into a single pass. This approach is facilitated by three
422 // circular buffers the width of the window which track values for trailing edges of each of
423 // the three passes. This allows the algorithm to use more precision in the calculation
424 // because the values are not rounded each pass. And this implementation also avoids a trap
425 // that's easy to fall into resulting in blending in too many zeroes near the edge.
426 //
427 // In general, a window sum has the form:
428 // sum_n+1 = sum_n + leading_edge - trailing_edge.
429 // If instead we do the subtraction at the end of the previous iteration, we can just
430 // calculate the sums instead of having to do the subtractions too.
431 //
432 // In previous iteration:
433 // sum_n+1 = sum_n - trailing_edge.
434 //
435 // In this iteration:
436 // sum_n+1 = sum_n + leading_edge.
437 //
438 // Now we can stack all three sums and do them at once. Sum0 gets its leading edge from the
439 // actual data. Sum1's leading edge is just Sum0, and Sum2's leading edge is Sum1. So, doing the
440 // three passes at the same time has the form:
441 //
442 // sum0_n+1 = sum0_n + leading edge
443 // sum1_n+1 = sum1_n + sum0_n+1
444 // sum2_n+1 = sum2_n + sum1_n+1
445 //
446 // sum2_n+1 / window^3 is the new value of the destination pixel.
447 //
448 // Reduce the sums by the trailing edges which were stored in the circular buffers for the
449 // next go around. This is the case for odd sized windows, even windows the the third
450 // circular buffer is one larger then the first two circular buffers.
451 //
452 // sum2_n+2 = sum2_n+1 - buffer2[i];
453 // buffer2[i] = sum1;
454 // sum1_n+2 = sum1_n+1 - buffer1[i];
455 // buffer1[i] = sum0;
456 // sum0_n+2 = sum0_n+1 - buffer0[i];
457 // buffer0[i] = leading edge
458 void blurSegment(
459 int n, const uint32_t* src, int srcStride, uint32_t* dst, int dstStride) override {
460 skvx::Vec<4, uint32_t>* buffer0Cursor = fBuffer0Cursor;
461 skvx::Vec<4, uint32_t>* buffer1Cursor = fBuffer1Cursor;
462 skvx::Vec<4, uint32_t>* buffer2Cursor = fBuffer2Cursor;
466
467 // Given an expanded input pixel, move the window ahead using the leadingEdge value.
468 auto processValue = [&](const skvx::Vec<4, uint32_t>& leadingEdge) {
469 sum0 += leadingEdge;
470 sum1 += sum0;
471 sum2 += sum1;
472
473 skvx::Vec<4, uint32_t> blurred = fDivider.divide(sum2);
474
475 sum2 -= *buffer2Cursor;
476 *buffer2Cursor = sum1;
477 buffer2Cursor = (buffer2Cursor + 1) < fBuffersEnd ? buffer2Cursor + 1 : fBuffer2;
478 sum1 -= *buffer1Cursor;
479 *buffer1Cursor = sum0;
480 buffer1Cursor = (buffer1Cursor + 1) < fBuffer2 ? buffer1Cursor + 1 : fBuffer1;
481 sum0 -= *buffer0Cursor;
482 *buffer0Cursor = leadingEdge;
483 buffer0Cursor = (buffer0Cursor + 1) < fBuffer1 ? buffer0Cursor + 1 : fBuffer0;
484
485 return skvx::cast<uint8_t>(blurred);
486 };
487
488 auto loadEdge = [&](const uint32_t* srcCursor) {
489 return skvx::cast<uint32_t>(skvx::Vec<4, uint8_t>::Load(srcCursor));
490 };
491
492 if (!src && !dst) {
493 while (n --> 0) {
494 (void)processValue(0);
495 }
496 } else if (src && !dst) {
497 while (n --> 0) {
498 (void)processValue(loadEdge(src));
499 src += srcStride;
500 }
501 } else if (!src && dst) {
502 while (n --> 0) {
503 processValue(0u).store(dst);
504 dst += dstStride;
505 }
506 } else if (src && dst) {
507 while (n --> 0) {
508 processValue(loadEdge(src)).store(dst);
509 src += srcStride;
510 dst += dstStride;
511 }
512 }
513
514 // Store the state
515 fBuffer0Cursor = buffer0Cursor;
516 fBuffer1Cursor = buffer1Cursor;
517 fBuffer2Cursor = buffer2Cursor;
518
519 sum0.store(fSum0);
520 sum1.store(fSum1);
521 sum2.store(fSum2);
522 }
523
524 skvx::Vec<4, uint32_t>* const fBuffer0;
525 skvx::Vec<4, uint32_t>* const fBuffer1;
526 skvx::Vec<4, uint32_t>* const fBuffer2;
527 skvx::Vec<4, uint32_t>* const fBuffersEnd;
528 const skvx::ScaledDividerU32 fDivider;
529
530 // blur state
531 char fSum0[sizeof(skvx::Vec<4, uint32_t>)];
532 char fSum1[sizeof(skvx::Vec<4, uint32_t>)];
533 char fSum2[sizeof(skvx::Vec<4, uint32_t>)];
534 skvx::Vec<4, uint32_t>* fBuffer0Cursor;
535 skvx::Vec<4, uint32_t>* fBuffer1Cursor;
536 skvx::Vec<4, uint32_t>* fBuffer2Cursor;
537};
538
539// Implement a scanline processor that uses a two-box filter to approximate a Tent filter.
540// The TentPass is limit to processing sigmas < 2183.
541class TentPass final : public Pass {
542public:
543 // NB 2183 is the largest sigma that will not cause a buffer full of 255 mask values to overflow
544 // using the Tent filter. It also limits the size of buffers used hold intermediate values.
545 // Explanation of maximums:
546 // sum0 = window * 255
547 // sum1 = window * sum0 -> window * window * 255
548 //
549 // The value window^2 * 255 must fit in a uint32_t. So,
550 // window^2 < 2^32. window = 4104.
551 //
552 // window = floor(sigma * 3 * sqrt(2 * kPi) / 4 + 0.5)
553 // For window <= 4104, the largest value for sigma is 2183.
554 static PassMaker* MakeMaker(double sigma, SkArenaAlloc* alloc) {
555 SkASSERT(0 <= sigma);
556 int gaussianWindow = calculate_window(sigma);
557 // This is a naive method of using the window size for the Gaussian blur to calculate the
558 // window size for the Tent blur. This seems to work well in practice.
559 //
560 // We can use a single pixel to generate the effective blur area given a window size. For
561 // the Gaussian blur this is 3 * window size. For the Tent filter this is 2 * window size.
562 int tentWindow = 3 * gaussianWindow / 2;
563 if (tentWindow >= 4104) {
564 return nullptr;
565 }
566
567 class Maker : public PassMaker {
568 public:
569 explicit Maker(int window) : PassMaker{window} {}
570 Pass* makePass(void* buffer, SkArenaAlloc* alloc) const override {
571 return TentPass::Make(this->window(), buffer, alloc);
572 }
573
574 size_t bufferSizeBytes() const override {
575 size_t onePassSize = this->window() - 1;
576 // If the window is odd, then there is an obvious middle element. For even sizes 2
577 // passes are shifted, and the last pass has an extra element. Like this:
578 // S
579 // aaaAaa
580 // bbBbbb
581 // D
582 size_t bufferCount = 2 * onePassSize;
583 return bufferCount * sizeof(skvx::Vec<4, uint32_t>);
584 }
585 };
586
587 return alloc->make<Maker>(tentWindow);
588 }
589
590 static TentPass* Make(int window, void* buffers, SkArenaAlloc* alloc) {
591 if (window > 4104) {
592 return nullptr;
593 }
594
595 // We don't need to store the trailing edge pixel in the buffer;
596 int passSize = window - 1;
597 skvx::Vec<4, uint32_t>* buffer0 = static_cast<skvx::Vec<4, uint32_t>*>(buffers);
598 skvx::Vec<4, uint32_t>* buffer1 = buffer0 + passSize;
599 skvx::Vec<4, uint32_t>* buffersEnd = buffer1 + passSize;
600
601 // Calculating the border is tricky. The border is the distance in pixels between the first
602 // dst pixel and the first src pixel (or the last src pixel and the last dst pixel).
603 // I will go through the odd case which is simpler, and then through the even case. Given a
604 // stack of filters seven wide for the odd case of three passes.
605 //
606 // S
607 // aaaAaaa
608 // bbbBbbb
609 // D
610 //
611 // The furthest changed pixel is when the filters are in the following configuration.
612 //
613 // S
614 // aaaAaaa
615 // bbbBbbb
616 // D
617 //
618 // The A pixel is calculated using the value S, the B uses A, and the D uses B.
619 // So, with a window size of seven the border is nine. In the odd case, the border is
620 // window - 1.
621 //
622 // For even cases the filter stack is more complicated. It uses two passes
623 // of even filters offset from each other. A stack for a width of six looks like
624 // this.
625 //
626 // S
627 // aaaAaa
628 // bbBbbb
629 // D
630 //
631 // The furthest pixel looks like this.
632 //
633 // S
634 // aaaAaa
635 // bbBbbb
636 // D
637 //
638 // For a window of six, the border value is 5. In the even case the border is
639 // window - 1.
640 int border = window - 1;
641
642 int divisor = window * window;
643 return alloc->make<TentPass>(buffer0, buffer1, buffersEnd, border, divisor);
644 }
645
646 TentPass(skvx::Vec<4, uint32_t>* buffer0,
647 skvx::Vec<4, uint32_t>* buffer1,
648 skvx::Vec<4, uint32_t>* buffersEnd,
649 int border,
650 int divisor)
651 : Pass{border}
652 , fBuffer0{buffer0}
653 , fBuffer1{buffer1}
654 , fBuffersEnd{buffersEnd}
655 , fDivider(divisor) {}
656
657private:
658 void startBlur() override {
659 skvx::Vec<4, uint32_t>{0u, 0u, 0u, 0u}.store(fSum0);
660 auto half = fDivider.half();
661 skvx::Vec<4, uint32_t>{half, half, half, half}.store(fSum1);
662 sk_bzero(fBuffer0, (fBuffersEnd - fBuffer0) * sizeof(skvx::Vec<4, uint32_t>));
663
664 fBuffer0Cursor = fBuffer0;
665 fBuffer1Cursor = fBuffer1;
666 }
667
668 // TentPass implements the common two pass box filter approximation of Tent filter,
669 // but combines all both passes into a single pass. This approach is facilitated by two
670 // circular buffers the width of the window which track values for trailing edges of each of
671 // both passes. This allows the algorithm to use more precision in the calculation
672 // because the values are not rounded each pass. And this implementation also avoids a trap
673 // that's easy to fall into resulting in blending in too many zeroes near the edge.
674 //
675 // In general, a window sum has the form:
676 // sum_n+1 = sum_n + leading_edge - trailing_edge.
677 // If instead we do the subtraction at the end of the previous iteration, we can just
678 // calculate the sums instead of having to do the subtractions too.
679 //
680 // In previous iteration:
681 // sum_n+1 = sum_n - trailing_edge.
682 //
683 // In this iteration:
684 // sum_n+1 = sum_n + leading_edge.
685 //
686 // Now we can stack all three sums and do them at once. Sum0 gets its leading edge from the
687 // actual data. Sum1's leading edge is just Sum0, and Sum2's leading edge is Sum1. So, doing the
688 // three passes at the same time has the form:
689 //
690 // sum0_n+1 = sum0_n + leading edge
691 // sum1_n+1 = sum1_n + sum0_n+1
692 //
693 // sum1_n+1 / window^2 is the new value of the destination pixel.
694 //
695 // Reduce the sums by the trailing edges which were stored in the circular buffers for the
696 // next go around.
697 //
698 // sum1_n+2 = sum1_n+1 - buffer1[i];
699 // buffer1[i] = sum0;
700 // sum0_n+2 = sum0_n+1 - buffer0[i];
701 // buffer0[i] = leading edge
702 void blurSegment(
703 int n, const uint32_t* src, int srcStride, uint32_t* dst, int dstStride) override {
704 skvx::Vec<4, uint32_t>* buffer0Cursor = fBuffer0Cursor;
705 skvx::Vec<4, uint32_t>* buffer1Cursor = fBuffer1Cursor;
708
709 // Given an expanded input pixel, move the window ahead using the leadingEdge value.
710 auto processValue = [&](const skvx::Vec<4, uint32_t>& leadingEdge) {
711 sum0 += leadingEdge;
712 sum1 += sum0;
713
714 skvx::Vec<4, uint32_t> blurred = fDivider.divide(sum1);
715
716 sum1 -= *buffer1Cursor;
717 *buffer1Cursor = sum0;
718 buffer1Cursor = (buffer1Cursor + 1) < fBuffersEnd ? buffer1Cursor + 1 : fBuffer1;
719 sum0 -= *buffer0Cursor;
720 *buffer0Cursor = leadingEdge;
721 buffer0Cursor = (buffer0Cursor + 1) < fBuffer1 ? buffer0Cursor + 1 : fBuffer0;
722
723 return skvx::cast<uint8_t>(blurred);
724 };
725
726 auto loadEdge = [&](const uint32_t* srcCursor) {
727 return skvx::cast<uint32_t>(skvx::Vec<4, uint8_t>::Load(srcCursor));
728 };
729
730 if (!src && !dst) {
731 while (n --> 0) {
732 (void)processValue(0);
733 }
734 } else if (src && !dst) {
735 while (n --> 0) {
736 (void)processValue(loadEdge(src));
737 src += srcStride;
738 }
739 } else if (!src && dst) {
740 while (n --> 0) {
741 processValue(0u).store(dst);
742 dst += dstStride;
743 }
744 } else if (src && dst) {
745 while (n --> 0) {
746 processValue(loadEdge(src)).store(dst);
747 src += srcStride;
748 dst += dstStride;
749 }
750 }
751
752 // Store the state
753 fBuffer0Cursor = buffer0Cursor;
754 fBuffer1Cursor = buffer1Cursor;
755 sum0.store(fSum0);
756 sum1.store(fSum1);
757 }
758
759 skvx::Vec<4, uint32_t>* const fBuffer0;
760 skvx::Vec<4, uint32_t>* const fBuffer1;
761 skvx::Vec<4, uint32_t>* const fBuffersEnd;
762 const skvx::ScaledDividerU32 fDivider;
763
764 // blur state
765 char fSum0[sizeof(skvx::Vec<4, uint32_t>)];
766 char fSum1[sizeof(skvx::Vec<4, uint32_t>)];
767 skvx::Vec<4, uint32_t>* fBuffer0Cursor;
768 skvx::Vec<4, uint32_t>* fBuffer1Cursor;
769};
770
771// TODO: Implement CPU backend for different fTileMode. This is still worth doing inline with the
772// blur; at the moment the tiling is applied via the CropImageFilter and carried as metadata on
773// the FilterResult. This is forcefully applied in onFilterImage() to get a simple SkSpecialImage to
774// pass to cpu_blur or gpu_blur, which evaluates the tile mode into a kernel-outset buffer that is
775// then processed by these functions. If the tilemode is the only thing being applied, it would be
776// ideal to tile from the input image directly instead of inserting a new temporary image. For CPU
777// blurs this temporary image now creates the appearance of correctness; for GPU blurs that could
778// tile already it may create a regression.
779sk_sp<SkSpecialImage> cpu_blur(const skif::Context& ctx,
781 const sk_sp<SkSpecialImage>& input,
783 skif::LayerSpace<SkIRect> dstBounds) {
784 // map_sigma limits sigma to 532 to match 1000px box filter limit of WebKit and Firefox.
785 // Since this does not exceed the limits of the TentPass (2183), there won't be overflow when
786 // computing a kernel over a pixel window filled with 255.
787 static_assert(kMaxSigma <= 2183.0f);
788
789 // The input image should fill the srcBounds
790 SkASSERT(input->width() == srcBounds.width() && input->height() == srcBounds.height());
791
793 auto makeMaker = [&](double sigma) -> PassMaker* {
794 SkASSERT(0 <= sigma && sigma <= 2183); // should be guaranteed after map_sigma
795 if (PassMaker* maker = GaussPass::MakeMaker(sigma, &alloc)) {
796 return maker;
797 }
798 if (PassMaker* maker = TentPass::MakeMaker(sigma, &alloc)) {
799 return maker;
800 }
801 SK_ABORT("Sigma is out of range.");
802 };
803
804 PassMaker* makerX = makeMaker(sigma.width());
805 PassMaker* makerY = makeMaker(sigma.height());
806 // A no-op blur should have been caught earlier in onFilterImage().
807 SkASSERT(makerX->window() > 1 || makerY->window() > 1);
808
810 if (!SkSpecialImages::AsBitmap(input.get(), &src)) {
811 return nullptr;
812 }
813 if (src.colorType() != kN32_SkColorType) {
814 return nullptr;
815 }
816
817 auto originalDstBounds = dstBounds;
818 if (makerX->window() > 1) {
819 // Inflate the dst by the window required for the Y pass so that the X pass can prepare it.
820 // The Y pass will be offset to only write to the original rows in dstBounds, but its window
821 // will access these extra rows calculated by the X pass. The SpecialImage factory will
822 // then subset the bitmap so it appears to match 'originalDstBounds' tightly. We make one
823 // slightly larger image to hold this extra data instead of two separate images sized
824 // exactly to each pass because the CPU blur can write in place.
825 const auto yPadding = skif::LayerSpace<SkSize>({0.f, 3 * sigma.height()}).ceil();
826 dstBounds.outset(yPadding);
827 }
828
830 const skif::LayerSpace<SkIPoint> dstOrigin = dstBounds.topLeft();
831 if (!dst.tryAllocPixels(src.info().makeWH(dstBounds.width(), dstBounds.height()))) {
832 return nullptr;
833 }
834 dst.eraseColor(SK_ColorTRANSPARENT);
835
836 auto buffer = alloc.makeBytesAlignedTo(std::max(makerX->bufferSizeBytes(),
837 makerY->bufferSizeBytes()),
838 alignof(skvx::Vec<4, uint32_t>));
839
840 // Basic Plan: The three cases to handle
841 // * Horizontal and Vertical - blur horizontally while copying values from the source to
842 // the destination. Then, do an in-place vertical blur.
843 // * Horizontal only - blur horizontally copying values from the source to the destination.
844 // * Vertical only - blur vertically copying values from the source to the destination.
845
846 // Initialize these assuming the Y-only case
847 int loopStart = std::max(srcBounds.left(), dstBounds.left());
848 int loopEnd = std::min(srcBounds.right(), dstBounds.right());
849 int dstYOffset = 0;
850
851 if (makerX->window() > 1) {
852 // First an X-only blur from src into dst, including the extra rows that will become input
853 // for the second Y pass, which will then be performed in place.
854 loopStart = std::max(srcBounds.top(), dstBounds.top());
855 loopEnd = std::min(srcBounds.bottom(), dstBounds.bottom());
856
857 auto srcAddr = src.getAddr32(0, loopStart - srcBounds.top());
858 auto dstAddr = dst.getAddr32(0, loopStart - dstBounds.top());
859
860 // Iterate over each row to calculate 1D blur along X.
861 Pass* pass = makerX->makePass(buffer, &alloc);
862 for (int y = loopStart; y < loopEnd; ++y) {
863 pass->blur(srcBounds.left() - dstBounds.left(),
864 srcBounds.right() - dstBounds.left(),
865 dstBounds.width(),
866 srcAddr, 1,
867 dstAddr, 1);
868 srcAddr += src.rowBytesAsPixels();
869 dstAddr += dst.rowBytesAsPixels();
870 }
871
872 // Set up the Y pass to blur from the full dst into the non-outset portion of dst
873 src = dst;
874 loopStart = originalDstBounds.left();
875 loopEnd = originalDstBounds.right();
876 // The new 'dst' is equal to dst.extractSubset(originalDstBounds.offset(-dstOrigin)), but
877 // by construction only the Y offset has an interesting value so this is a little more
878 // efficient.
879 dstYOffset = originalDstBounds.top() - dstBounds.top();
880
881 srcBounds = dstBounds;
882 dstBounds = originalDstBounds;
883 }
884
885 // Iterate over each column to calculate 1D blur along Y. This is either blurring from src into
886 // dst for a 1D blur; or it's blurring from dst into dst for the second pass of a 2D blur.
887 if (makerY->window() > 1) {
888 auto srcAddr = src.getAddr32(loopStart - srcBounds.left(), 0);
889 auto dstAddr = dst.getAddr32(loopStart - dstBounds.left(), dstYOffset);
890
891 Pass* pass = makerY->makePass(buffer, &alloc);
892 for (int x = loopStart; x < loopEnd; ++x) {
893 pass->blur(srcBounds.top() - dstBounds.top(),
894 srcBounds.bottom() - dstBounds.top(),
895 dstBounds.height(),
896 srcAddr, src.rowBytesAsPixels(),
897 dstAddr, dst.rowBytesAsPixels());
898 srcAddr += 1;
899 dstAddr += 1;
900 }
901 }
902
903 originalDstBounds.offset(-dstOrigin); // Make relative to dst's pixels
904 return SkSpecialImages::MakeFromRaster(SkIRect(originalDstBounds),
905 dst,
906 ctx.backend()->surfaceProps());
907}
908
909} // namespace
910
911skif::FilterResult SkBlurImageFilter::onFilterImage(const skif::Context& ctx) const {
912 const bool gpuBacked = SkToBool(ctx.backend()->getBlurEngine());
913
914 skif::Context inputCtx = ctx.withNewDesiredOutput(
915 this->kernelBounds(ctx.mapping(), ctx.desiredOutput(), gpuBacked));
916
917 skif::FilterResult childOutput = this->getChildOutput(0, inputCtx);
918 skif::LayerSpace<SkSize> sigma = this->mapSigma(ctx.mapping(), gpuBacked);
919 if (sigma.width() == 0.f && sigma.height() == 0.f) {
920 // No actual blur, so just return the input unmodified
921 return childOutput;
922 }
923
924 SkASSERT(sigma.width() >= 0.f && sigma.width() <= kMaxSigma &&
925 sigma.height() >= 0.f && sigma.height() <= kMaxSigma);
926
927 // TODO: This is equivalent to what Builder::blur() calculates under the hood, but is calculated
928 // *before* we apply any legacy tile mode since the legacy tiling did not actually cause the
929 // output to extend fully.
930 skif::LayerSpace<SkIRect> maxOutput =
931 this->kernelBounds(ctx.mapping(), childOutput.layerBounds(), gpuBacked);
932 if (!maxOutput.intersect(ctx.desiredOutput())) {
933 return {};
934 }
935
936 if (fLegacyTileMode != SkTileMode::kDecal) {
937 // Legacy tiling applied to the input image when there was no explicit crop rect. Use the
938 // child's output image's layer bounds as the crop rectangle to adjust the edge tile mode
939 // without restricting the image.
940 childOutput = childOutput.applyCrop(inputCtx,
941 childOutput.layerBounds(),
942 fLegacyTileMode);
943 }
944
945 // TODO(b/40039877): Once the CPU blur functions can handle tile modes and color types beyond
946 // N32, there won't be any need to branch on how to apply the blur to the filter result.
947 if (gpuBacked) {
948 // For non-legacy tiling, 'maxOutput' is equal to the desired output. For decal's it matches
949 // what Builder::blur() calculates internally. For legacy tiling, however, it's dependent on
950 // the original child output's bounds ignoring the tile mode's effect.
951 skif::Context croppedOutput = ctx.withNewDesiredOutput(maxOutput);
953 builder.add(childOutput);
954 return builder.blur(sigma);
955 }
956
957 // The CPU blur does not yet support tile modes so explicitly resolve it to a special image that
958 // has the tiling rendered into the pixels.
959
960 auto [resolvedChildOutput, origin] = childOutput.imageAndOffset(inputCtx);
961 if (!resolvedChildOutput) {
962 return {};
963 }
964 skif::LayerSpace<SkIRect> srcBounds{SkIRect::MakeXYWH(origin.x(),
965 origin.y(),
966 resolvedChildOutput->width(),
967 resolvedChildOutput->height())};
968
969 return skif::FilterResult{cpu_blur(ctx, sigma, std::move(resolvedChildOutput),
970 srcBounds, maxOutput),
971 maxOutput.topLeft()};
972}
973
974skif::LayerSpace<SkSize> SkBlurImageFilter::mapSigma(const skif::Mapping& mapping,
975 bool gpuBacked) const {
976 skif::LayerSpace<SkSize> sigma = mapping.paramToLayer(fSigma);
977 // Clamp to the maximum sigma
978 sigma = skif::LayerSpace<SkSize>({std::min(sigma.width(), kMaxSigma),
979 std::min(sigma.height(), kMaxSigma)});
980
981 // TODO(b/294575803) - The CPU and GPU implementations have different requirements for
982 // "identity", with the GPU able to handle smaller sigmas. calculate_window() returns <= 1 once
983 // sigma is below ~0.8. Ideally we should work out the sigma threshold such that the max
984 // contribution from adjacent pixels is less than 0.5/255 and use that for both backends.
985 // NOTE: For convenience with builds, and the flux that is about to occur with the blur utils,
986 // this GPU logic is just copied from GrBlurUtils
987
988 // Disable bluring on axes that are not finite, or that are small enough that the blur is
989 // effectively an identity.
990 if (!SkIsFinite(sigma.width()) || (!gpuBacked && calculate_window(sigma.width()) <= 1)
991#if defined(SK_GANESH) || defined(SK_GRAPHITE)
992 || (gpuBacked && skgpu::BlurIsEffectivelyIdentity(sigma.width()))
993#endif
994 ) {
995 sigma = skif::LayerSpace<SkSize>({0.f, sigma.height()});
996 }
997
998 if (!SkIsFinite(sigma.height()) || (!gpuBacked && calculate_window(sigma.height()) <= 1)
999#if defined(SK_GANESH) || defined(SK_GRAPHITE)
1000 || (gpuBacked && skgpu::BlurIsEffectivelyIdentity(sigma.height()))
1001#endif
1002 ) {
1003 sigma = skif::LayerSpace<SkSize>({sigma.width(), 0.f});
1004 }
1005
1006 return sigma;
1007}
1008
1009skif::LayerSpace<SkIRect> SkBlurImageFilter::onGetInputLayerBounds(
1010 const skif::Mapping& mapping,
1011 const skif::LayerSpace<SkIRect>& desiredOutput,
1012 std::optional<skif::LayerSpace<SkIRect>> contentBounds) const {
1013 // Use gpuBacked=true since that has a more sensitive kernel, ensuring any layer input bounds
1014 // will be sufficient for both GPU and CPU evaluations.
1015 skif::LayerSpace<SkIRect> requiredInput =
1016 this->kernelBounds(mapping, desiredOutput, /*gpuBacked=*/true);
1017 return this->getChildInputLayerBounds(0, mapping, requiredInput, contentBounds);
1018}
1019
1020std::optional<skif::LayerSpace<SkIRect>> SkBlurImageFilter::onGetOutputLayerBounds(
1021 const skif::Mapping& mapping,
1022 std::optional<skif::LayerSpace<SkIRect>> contentBounds) const {
1023 auto childOutput = this->getChildOutputLayerBounds(0, mapping, contentBounds);
1024 if (childOutput) {
1025 // Use gpuBacked=true since it will ensure output bounds are conservative; CPU-based blurs
1026 // may produce 1px inset from this for very small sigmas.
1027 return this->kernelBounds(mapping, *childOutput, /*gpuBacked=*/true);
1028 } else {
1030 }
1031}
1032
1033SkRect SkBlurImageFilter::computeFastBounds(const SkRect& src) const {
1034 SkRect bounds = this->getInput(0) ? this->getInput(0)->computeFastBounds(src) : src;
1035 bounds.outset(SkSize(fSigma).width() * 3, SkSize(fSigma).height() * 3);
1036 return bounds;
1037}
#define SK_ABORT(message,...)
Definition SkAssert.h:70
#define SkASSERT(cond)
Definition SkAssert.h:116
void SkRegisterBlurImageFilterFlattenable()
#define SK_PREFETCH(ptr)
constexpr SkColor SK_ColorTRANSPARENT
Definition SkColor.h:99
#define SK_FLATTENABLE_HOOKS(type)
#define SK_REGISTER_FLATTENABLE(type)
constexpr double SK_DoublePI
static bool SkIsFinite(T x, Pack... values)
#define SK_IMAGEFILTER_UNFLATTEN_COMMON(localVar, expectedCount)
static std::unique_ptr< SkEncoder > Make(SkWStream *dst, const SkPixmap *src, const SkYUVAPixmaps *srcYUVA, const SkColorSpace *srcYUVAColorSpace, const SkJpegEncoder::Options &options)
static void sk_bzero(void *buffer, size_t size)
Definition SkMalloc.h:105
SkTileMode
Definition SkTileMode.h:13
static constexpr bool SkToBool(const T &x)
Definition SkTo.h:35
void * makeBytesAlignedTo(size_t size, size_t align)
auto make(Ctor &&ctor) -> decltype(ctor(nullptr))
static void Register(const char name[], Factory)
void flatten(SkWriteBuffer &) const override
static sk_sp< SkImageFilter > Blur(SkScalar sigmaX, SkScalar sigmaY, SkTileMode tileMode, sk_sp< SkImageFilter > input, const CropRect &cropRect={})
static sk_sp< SkImageFilter > Crop(const SkRect &rect, SkTileMode tileMode, sk_sp< SkImageFilter > input)
T * get() const
Definition SkRefCnt.h:303
const SkSurfaceProps & surfaceProps() const
virtual const SkBlurEngine * getBlurEngine() const =0
const Backend * backend() const
const LayerSpace< SkIRect > & desiredOutput() const
Context withNewDesiredOutput(const LayerSpace< SkIRect > &desiredOutput) const
const Mapping & mapping() const
FilterResult applyCrop(const Context &ctx, const LayerSpace< SkIRect > &crop, SkTileMode tileMode=SkTileMode::kDecal) const
sk_sp< SkSpecialImage > imageAndOffset(const Context &ctx, SkIPoint *offset) const
LayerSpace< SkIRect > layerBounds() const
LayerSpace< T > paramToLayer(const ParameterSpace< T > &paramGeometry) const
GLFWwindow * window
Definition main.cc:45
float SkScalar
Definition extension.cpp:12
static const uint8_t buffer[]
double y
double x
Optional< SkRect > bounds
Definition SkRecords.h:189
dst
Definition cp.py:12
constexpr bool BlurIsEffectivelyIdentity(float sigma)
Definition BlurUtils.h:37
SIN Vec< N, float > sqrt(const Vec< N, float > &x)
Definition SkVx.h:706
SIN Vec< N, float > floor(const Vec< N, float > &x)
Definition SkVx.h:703
SIN Vec< N, float > ceil(const Vec< N, float > &x)
Definition SkVx.h:702
int32_t height
int32_t width
static constexpr SkIRect MakeXYWH(int32_t x, int32_t y, int32_t w, int32_t h)
Definition SkRect.h:104
static SKVX_ALWAYS_INLINE Vec Load(const void *ptr)
Definition SkVx.h:109
SKVX_ALWAYS_INLINE void store(void *ptr) const
Definition SkVx.h:112