Flutter Engine
The Flutter Engine
PerEdgeAAQuadRenderStep.cpp
Go to the documentation of this file.
1/*
2 * Copyright 2023 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
9
10#include "src/base/SkVx.h"
15
16// This RenderStep is specialized to draw filled rectangles with per-edge AA.
17//
18// Each of these "primitives" is represented by a single instance. The instance attributes are
19// flexible enough to describe per-edge AA quads without relying on uniforms to define its
20// operation. The attributes encode shape as follows:
21//
22// float4 edgeFlags - per-edge AA defined by each component: aa != 0.
23// float4 quadXs - these values provide the X coordinates of the quadrilateral in top-left CW order.
24// float4 quadYs - these values provide the Y coordinates of the quadrilateral.
25//
26// From the other direction, per-edge AA quads produce instance values like:
27// - [aa(t,r,b,l) ? 255 : 0] [xs(tl,tr,br,bl)] [ys(tl,tr,br,bl)]
28//
29// From this encoding, data can be unpacked for each corner, which are equivalent under
30// rotational symmetry. Per-edge quads are always mitered and fill the interior, but the
31// vertices are placed such that the edge coverage ramps can collapse to 0 area on non-AA edges.
32//
33// The vertices that describe each corner are placed so that edges and miters calculate
34// coverage by interpolating a varying and then clamping in the fragment shader. Triangles that
35// cover the inner and outer curves calculate distance to the curve within the fragment shader.
36//
37// See https://docs.google.com/presentation/d/1MCPstNsSlDBhR8CrsJo0r-cZNbu-sEJEvU9W94GOJoY/edit?usp=sharing
38// for diagrams and explanation of how the geometry is defined.
39//
40// PerEdgeAAQuadRenderStep uses the common technique of approximating distance to the level set by
41// one expansion of the Taylor's series for the level set's equation. Given a level set function
42// C(x,y), this amounts to calculating C(px,py)/|∇C(px,py)|. For the straight edges the level set
43// is linear and calculated in the vertex shader and then interpolated exactly over the rectangle.
44// This provides distances to all four exterior edges within the fragment shader and allows it to
45// reconstruct a relative position per elliptical corner. Unfortunately this requires the fragment
46// shader to calculate the length of the gradient for straight edges instead of interpolating
47// exact device-space distance.
48//
49// Unlike AnalyticRRectRenderStep, for per-edge AA quads it's valid to have each pixel calculate a
50// single corner's coverage that's controlled via the vertex shader. Any bias is a constant 1/2,
51// so this is also added in the vertex shader.
52//
53// Analytic derivatives are used so that a single pipeline can be used regardless of HW derivative
54// support or for geometry that would prove difficult for forward differencing. The device-space
55// gradient for ellipses is calculated per-pixel by transforming a per-pixel local gradient vector
56// with the Jacobian of the inverse local-to-device transform:
57//
58// (px,py) is the projected point of (u,v) transformed by a 3x3 matrix, M:
59// [x(u,v) / w(u,v)] [x] [m00 m01 m02] [u]
60// (px,py) = [y(u,v) / w(u,v)] where [y] = [m10 m11 m12]X[v] = M*(u,v,1)
61// [w] [m20 m21 m22] [1]
62//
63// C(px,py) can be defined in terms of a local Cl(u,v) as C(px,py) = Cl(p^-1(px,py)), where p^-1 =
64//
65// [x'(px,py) / w'(px,py)] [x'] [m00' m01' * m02'] [px]
66// (u,v) = [y'(px,py) / w'(px,py)] where [y'] = [m10' m11' * m12']X[py] = M^-1*(px,py,0,1)
67// [w'] [m20' m21' * m22'] [ 1]
68//
69// Note that if the 3x3 M was arrived by dropping the 3rd row and column from a 4x4 since we assume
70// a local 3rd coordinate of 0, M^-1 is not equal to the 4x4 inverse with dropped rows and columns.
71//
72// Using the chain rule, then ∇C(px,py)
73// = ∇Cl(u,v)X[1/w'(px,py) 0 -x'(px,py)/w'(px,py)^2] [m00' m01']
74// [ 0 1/w'(px,py) -y'(px,py)/w'(px,py)^2]X[m10' m11']
75// [m20' m21']
76//
77// = 1/w'(px,py)*∇Cl(u,v)X[1 0 -x'(px,py)/w'(px,py)] [m00' m01']
78// [0 1 -y'(px,py)/w'(px,py)]X[m10' m11']
79// [m20' m21']
80//
81// = w(u,v)*∇Cl(u,v)X[1 0 0 -u] [m00' m01']
82// [0 1 0 -v]X[m10' m11']
83// [m20' m21']
84//
85// = w(u,v)*∇Cl(u,v)X[m00'-m20'u m01'-m21'u]
86// [m10'-m20'v m11'-m21'v]
87//
88// The vertex shader calculates the rightmost 2x2 matrix and interpolates it across the shape since
89// each component is linear in (u,v). ∇Cl(u,v) is evaluated per pixel in the fragment shader and
90// depends on which corner and edge being evaluated. w(u,v) is the device-space W coordinate, so
91// its reciprocal is provided in sk_FragCoord.w.
92namespace skgpu::graphite {
93
95
96static bool is_clockwise(const EdgeAAQuad& quad) {
97 if (quad.isRect()) {
98 return true; // by construction, these are always locally clockwise
99 }
100
101 // This assumes that each corner has a consistent winding, which is the case for convex inputs,
102 // which is an assumption of the per-edge AA API. Check the sign of cross product between the
103 // first two edges.
104 const skvx::float4& xs = quad.xs();
105 const skvx::float4& ys = quad.ys();
106
107 float winding = (xs[0] - xs[3])*(ys[1] - ys[0]) - (ys[0] - ys[3])*(xs[1] - xs[0]);
108 if (winding == 0.f) {
109 // The input possibly forms a triangle with duplicate vertices, so check the opposite corner
110 winding = (xs[2] - xs[1])*(ys[3] - ys[2]) - (ys[2] - ys[1])*(xs[3] - xs[2]);
111 }
112
113 // At this point if winding is < 0, the quad's vertices are CCW. If it's still 0, the vertices
114 // form a line, in which case the vertex shader constructs a correct CW winding. Otherwise,
115 // the quad or triangle vertices produce a positive winding and are CW.
116 return winding >= 0.f;
117}
118
119// Represents the per-vertex attributes used in each instance.
120struct Vertex {
122};
123
124// Allowed values for the center weight instance value (selected at record time based on style
125// and transform), and are defined such that when (insance-weight > vertex-weight) is true, the
126// vertex should be snapped to the center instead of its regular calculation.
127static constexpr int kCornerVertexCount = 4; // sk_VertexID is divided by this in SkSL
128static constexpr int kVertexCount = 4 * kCornerVertexCount;
129static constexpr int kIndexCount = 29;
130
131static void write_index_buffer(VertexWriter writer) {
132 static constexpr uint16_t kTL = 0 * kCornerVertexCount;
133 static constexpr uint16_t kTR = 1 * kCornerVertexCount;
134 static constexpr uint16_t kBR = 2 * kCornerVertexCount;
135 static constexpr uint16_t kBL = 3 * kCornerVertexCount;
136
137 static const uint16_t kIndices[kIndexCount] = {
138 // Exterior AA ramp outset
139 kTL+1,kTL+2,kTL+3,kTR+0,kTR+3,kTR+1,
140 kTR+1,kTR+2,kTR+3,kBR+0,kBR+3,kBR+1,
141 kBR+1,kBR+2,kBR+3,kBL+0,kBL+3,kBL+1,
142 kBL+1,kBL+2,kBL+3,kTL+0,kTL+3,kTL+1,
143 kTL+3,
144 // Fill triangles
145 kTL+3,kTR+3,kBL+3,kBR+3
146 };
147
148 if (writer) {
149 writer << kIndices;
150 } // otherwise static buffer creation failed, so do nothing; Context initialization will fail.
151}
152
154 static constexpr float kHR2 = 0.5f * SK_FloatSqrt2; // "half root 2"
155
156 // This template is repeated 4 times in the vertex buffer, for each of the four corners.
157 // The vertex ID is used to lookup per-corner instance properties such as positions,
158 // but otherwise this vertex data produces a consistent clockwise mesh from
159 // TL -> TR -> BR -> BL.
160 static constexpr Vertex kCornerTemplate[kCornerVertexCount] = {
161 // Normals for device-space AA outsets from outer curve
162 { {1.0f, 0.0f} },
163 { {kHR2, kHR2} },
164 { {0.0f, 1.0f} },
165
166 // Normal for outer anchor (zero length to signal no local or device-space normal outset)
167 { {0.0f, 0.0f} },
168 };
169
170 if (writer) {
171 writer << kCornerTemplate // TL
172 << kCornerTemplate // TR
173 << kCornerTemplate // BR
174 << kCornerTemplate; // BL
175 } // otherwise static buffer creation failed, so do nothing; Context initialization will fail.
176}
177
179 : RenderStep("PerEdgeAAQuadRenderStep",
180 "",
181 Flags::kPerformsShading | Flags::kEmitsCoverage | Flags::kOutsetBoundsForAA,
182 /*uniforms=*/{},
185 /*vertexAttrs=*/{
187 },
188 /*instanceAttrs=*/
192
193 // TODO: pack depth and ssbo index into one 32-bit attribute, if we can
194 // go without needing both render step and paint ssbo index attributes.
197
201 /*varyings=*/{
202 // Device-space distance to LTRB edges of quad.
203 {"edgeDistances", SkSLType::kFloat4}, // distance to LTRB edges
204 }) {
205 // Initialize the static buffers we'll use when recording draw calls.
206 // NOTE: Each instance of this RenderStep gets its own copy of the data. Since there should only
207 // ever be one PerEdgeAAQuadRenderStep at a time, this shouldn't be an issue.
208 write_vertex_buffer(bufferManager->getVertexWriter(sizeof(Vertex) * kVertexCount,
209 &fVertexBuffer));
210 write_index_buffer(bufferManager->getIndexWriter(sizeof(uint16_t) * kIndexCount,
211 &fIndexBuffer));
212}
213
215
217 // Returns the body of a vertex function, which must define a float4 devPosition variable and
218 // must write to an already-defined float2 stepLocalCoords variable.
219 return "float4 devPosition = per_edge_aa_quad_vertex_fn("
220 // Vertex Attributes
221 "normal, "
222 // Instance Attributes
223 "edgeFlags, quadXs, quadYs, depth, "
224 "float3x3(mat0, mat1, mat2), "
225 // Varyings
226 "edgeDistances, "
227 // Render Step
228 "stepLocalCoords);\n";
229}
230
232 // The returned SkSL must write its coverage into a 'half4 outputCoverage' variable (defined in
233 // the calling code) with the actual coverage splatted out into all four channels.
234 return "outputCoverage = per_edge_aa_quad_coverage_fn(sk_FragCoord, edgeDistances);";
235}
236
238 const DrawParams& params,
239 skvx::ushort2 ssboIndices) const {
240 SkASSERT(params.geometry().isEdgeAAQuad());
241 const EdgeAAQuad& quad = params.geometry().edgeAAQuad();
242
243 DrawWriter::Instances instance{*writer, fVertexBuffer, fIndexBuffer, kIndexCount};
244 auto vw = instance.append(1);
245
246 // Empty fills should not have been recorded at all.
247 SkDEBUGCODE(Rect bounds = params.geometry().bounds());
248 SkASSERT(!bounds.isEmptyNegativeOrNaN());
249
250 constexpr uint8_t kAAOn = 255;
251 constexpr uint8_t kAAOff = 0;
252 auto edgeSigns = skvx::byte4{quad.edgeFlags() & AAFlags::kLeft ? kAAOn : kAAOff,
253 quad.edgeFlags() & AAFlags::kTop ? kAAOn : kAAOff,
254 quad.edgeFlags() & AAFlags::kRight ? kAAOn : kAAOff,
255 quad.edgeFlags() & AAFlags::kBottom ? kAAOn : kAAOff};
256
257 // The vertex shader expects points to be in clockwise order. EdgeAAQuad is the only
258 // shape that *might* have counter-clockwise input.
259 if (is_clockwise(quad)) {
260 vw << edgeSigns << quad.xs() << quad.ys();
261 } else {
262 vw << skvx::shuffle<2,1,0,3>(edgeSigns) // swap left and right AA bits
263 << skvx::shuffle<1,0,3,2>(quad.xs()) // swap TL with TR, and BL with BR
264 << skvx::shuffle<1,0,3,2>(quad.ys()); // ""
265 }
266
267 // All instance types share the remaining instance attribute definitions
268 const SkM44& m = params.transform().matrix();
269
270 vw << params.order().depthAsFloat()
271 << ssboIndices
272 << m.rc(0,0) << m.rc(1,0) << m.rc(3,0) // mat0
273 << m.rc(0,1) << m.rc(1,1) << m.rc(3,1) // mat1
274 << m.rc(0,3) << m.rc(1,3) << m.rc(3,3); // mat2
275}
276
278 PipelineDataGatherer*) const {
279 // All data is uploaded as instance attributes, so no uniforms are needed.
280}
281
282} // namespace skgpu::graphite
static const uint16_t kTL
static const uint16_t kBL
static const uint16_t kIndices[]
static const uint16_t kBR
static constexpr LocalCornerVert kCornerTemplate[19]
static const uint16_t kTR
static constexpr float kHR2
#define SkASSERT(cond)
Definition: SkAssert.h:116
constexpr float SK_FloatSqrt2
SkDEBUGCODE(SK_SPI) SkThreadID SkGetThreadID()
sk_sp< const GrBuffer > fIndexBuffer
sk_sp< const GrBuffer > fVertexBuffer
Definition: SkM44.h:150
const skvx::float4 & ys() const
Definition: EdgeAAQuad.h:73
const skvx::float4 & xs() const
Definition: EdgeAAQuad.h:72
SkEnumBitMask< Flags > edgeFlags() const
Definition: EdgeAAQuad.h:74
void writeUniformsAndTextures(const DrawParams &, PipelineDataGatherer *) const override
const char * fragmentCoverageSkSL() const override
void writeVertices(DrawWriter *, const DrawParams &, skvx::ushort2 ssboIndices) const override
PerEdgeAAQuadRenderStep(StaticBufferManager *bufferManager)
const EmbeddedViewParams * params
VkInstance instance
Definition: main.cc:48
Optional< SkRect > bounds
Definition: SkRecords.h:189
static void write_index_buffer(VertexWriter writer)
static bool is_clockwise(const EdgeAAQuad &quad)
static constexpr DepthStencilSettings kDirectDepthGreaterPass
static constexpr int kIndexCount
EdgeAAQuad::Flags AAFlags
static constexpr int kCornerVertexCount
static constexpr int kVertexCount
static void write_vertex_buffer(VertexWriter writer)
Definition: SkM44.h:19
Definition: SkVx.h:83