Flutter Engine
The Flutter Engine
Loading...
Searching...
No Matches
SkSLRasterPipelineBuilder.cpp
Go to the documentation of this file.
1/*
2 * Copyright 2022 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
9
14#include "src/core/SkOpts.h"
18#include "src/core/SkTHash.h"
20#include "src/sksl/SkSLString.h"
23#include "src/utils/SkBitSet.h"
24
25#if !defined(SKSL_STANDALONE)
27#endif
28
29#include <algorithm>
30#include <cmath>
31#include <cstddef>
32#include <cstring>
33#include <iterator>
34#include <string>
35#include <string_view>
36#include <tuple>
37#include <utility>
38#include <vector>
39
40using namespace skia_private;
41
42namespace SkSL::RP {
43
44#define ALL_SINGLE_SLOT_UNARY_OP_CASES \
45 BuilderOp::acos_float: \
46 case BuilderOp::asin_float: \
47 case BuilderOp::atan_float: \
48 case BuilderOp::cos_float: \
49 case BuilderOp::exp_float: \
50 case BuilderOp::exp2_float: \
51 case BuilderOp::log_float: \
52 case BuilderOp::log2_float: \
53 case BuilderOp::sin_float: \
54 case BuilderOp::sqrt_float: \
55 case BuilderOp::tan_float
56
57#define ALL_MULTI_SLOT_UNARY_OP_CASES \
58 BuilderOp::abs_int: \
59 case BuilderOp::cast_to_float_from_int: \
60 case BuilderOp::cast_to_float_from_uint: \
61 case BuilderOp::cast_to_int_from_float: \
62 case BuilderOp::cast_to_uint_from_float: \
63 case BuilderOp::ceil_float: \
64 case BuilderOp::floor_float: \
65 case BuilderOp::invsqrt_float
66
67#define ALL_N_WAY_BINARY_OP_CASES \
68 BuilderOp::atan2_n_floats: \
69 case BuilderOp::pow_n_floats
70
71#define ALL_MULTI_SLOT_BINARY_OP_CASES \
72 BuilderOp::add_n_floats: \
73 case BuilderOp::add_n_ints: \
74 case BuilderOp::sub_n_floats: \
75 case BuilderOp::sub_n_ints: \
76 case BuilderOp::mul_n_floats: \
77 case BuilderOp::mul_n_ints: \
78 case BuilderOp::div_n_floats: \
79 case BuilderOp::div_n_ints: \
80 case BuilderOp::div_n_uints: \
81 case BuilderOp::bitwise_and_n_ints: \
82 case BuilderOp::bitwise_or_n_ints: \
83 case BuilderOp::bitwise_xor_n_ints: \
84 case BuilderOp::mod_n_floats: \
85 case BuilderOp::min_n_floats: \
86 case BuilderOp::min_n_ints: \
87 case BuilderOp::min_n_uints: \
88 case BuilderOp::max_n_floats: \
89 case BuilderOp::max_n_ints: \
90 case BuilderOp::max_n_uints: \
91 case BuilderOp::cmple_n_floats: \
92 case BuilderOp::cmple_n_ints: \
93 case BuilderOp::cmple_n_uints: \
94 case BuilderOp::cmplt_n_floats: \
95 case BuilderOp::cmplt_n_ints: \
96 case BuilderOp::cmplt_n_uints: \
97 case BuilderOp::cmpeq_n_floats: \
98 case BuilderOp::cmpeq_n_ints: \
99 case BuilderOp::cmpne_n_floats: \
100 case BuilderOp::cmpne_n_ints
101
102#define ALL_IMMEDIATE_BINARY_OP_CASES \
103 BuilderOp::add_imm_float: \
104 case BuilderOp::add_imm_int: \
105 case BuilderOp::mul_imm_float: \
106 case BuilderOp::mul_imm_int: \
107 case BuilderOp::bitwise_and_imm_int: \
108 case BuilderOp::bitwise_xor_imm_int: \
109 case BuilderOp::min_imm_float: \
110 case BuilderOp::max_imm_float: \
111 case BuilderOp::cmple_imm_float: \
112 case BuilderOp::cmple_imm_int: \
113 case BuilderOp::cmple_imm_uint: \
114 case BuilderOp::cmplt_imm_float: \
115 case BuilderOp::cmplt_imm_int: \
116 case BuilderOp::cmplt_imm_uint: \
117 case BuilderOp::cmpeq_imm_float: \
118 case BuilderOp::cmpeq_imm_int: \
119 case BuilderOp::cmpne_imm_float: \
120 case BuilderOp::cmpne_imm_int
121
122#define ALL_IMMEDIATE_MULTI_SLOT_BINARY_OP_CASES \
123 BuilderOp::bitwise_and_imm_int
124
125#define ALL_N_WAY_TERNARY_OP_CASES \
126 BuilderOp::smoothstep_n_floats
127
128#define ALL_MULTI_SLOT_TERNARY_OP_CASES \
129 BuilderOp::mix_n_floats: \
130 case BuilderOp::mix_n_ints
131
132static bool is_immediate_op(BuilderOp op) {
133 switch (op) {
134 case ALL_IMMEDIATE_BINARY_OP_CASES: return true;
135 default: return false;
136 }
137}
138
140 switch (op) {
142 default: return false;
143 }
144}
145
146static BuilderOp convert_n_way_op_to_immediate(BuilderOp op, int slots, int32_t* constantValue) {
147 // We rely on the exact ordering of SkRP ops here; the immediate-mode op must always come
148 // directly before the n-way op. (If we have more than one, the increasing-slot variations
149 // continue backwards from there.)
150 BuilderOp immOp = (BuilderOp)((int)op - 1);
151
152 // Some immediate ops support multiple slots.
153 if (is_multi_slot_immediate_op(immOp)) {
154 return immOp;
155 }
156
157 // Most immediate ops only directly support a single slot. However, it's still faster to execute
158 // `add_imm_int, add_imm_int` instead of `splat_2_ints, add_2_ints`, so we allow those
159 // conversions as well.
160 if (slots <= 2) {
161 if (is_immediate_op(immOp)) {
162 return immOp;
163 }
164
165 // We also allow for immediate-mode subtraction, by adding a negative value.
166 switch (op) {
167 case BuilderOp::sub_n_ints:
168 *constantValue *= -1;
169 return BuilderOp::add_imm_int;
170
171 case BuilderOp::sub_n_floats: {
172 // This negates the floating-point value by inverting its sign bit.
173 *constantValue ^= 0x80000000;
174 return BuilderOp::add_imm_float;
175 }
176 default:
177 break;
178 }
179 }
180
181 // We don't have an immediate-mode version of this op.
182 return op;
183}
184
185void Builder::appendInstruction(BuilderOp op, SlotList slots,
186 int immA, int immB, int immC, int immD) {
187 fInstructions.push_back({op, slots.fSlotA, slots.fSlotB,
188 immA, immB, immC, immD, fCurrentStackID});
189}
190
191Instruction* Builder::lastInstruction(int fromBack) {
192 if (fInstructions.size() <= fromBack) {
193 return nullptr;
194 }
195 Instruction* inst = &fInstructions.fromBack(fromBack);
196 if (inst->fStackID != fCurrentStackID) {
197 return nullptr;
198 }
199 return inst;
200}
201
202Instruction* Builder::lastInstructionOnAnyStack(int fromBack) {
203 if (fInstructions.size() <= fromBack) {
204 return nullptr;
205 }
206 return &fInstructions.fromBack(fromBack);
207}
208
209void Builder::unary_op(BuilderOp op, int32_t slots) {
210 switch (op) {
213 this->appendInstruction(op, {}, slots);
214 break;
215
216 default:
217 SkDEBUGFAIL("not a unary op");
218 break;
219 }
220}
221
222void Builder::binary_op(BuilderOp op, int32_t slots) {
223 if (Instruction* lastInstruction = this->lastInstruction()) {
224 // If we just pushed or splatted a constant onto the stack...
225 if (lastInstruction->fOp == BuilderOp::push_constant &&
226 lastInstruction->fImmA >= slots) {
227 // ... and this op has an immediate-mode equivalent...
228 int32_t constantValue = lastInstruction->fImmB;
229 BuilderOp immOp = convert_n_way_op_to_immediate(op, slots, &constantValue);
230 if (immOp != op) {
231 // ... discard the constants from the stack, and use an immediate-mode op.
232 this->discard_stack(slots);
233 this->appendInstruction(immOp, {}, slots, constantValue);
234 return;
235 }
236 }
237 }
238
239 switch (op) {
242 this->appendInstruction(op, {}, slots);
243 break;
244
245 default:
246 SkDEBUGFAIL("not a binary op");
247 break;
248 }
249}
250
251void Builder::ternary_op(BuilderOp op, int32_t slots) {
252 switch (op) {
255 this->appendInstruction(op, {}, slots);
256 break;
257
258 default:
259 SkDEBUGFAIL("not a ternary op");
260 break;
261 }
262}
263
264void Builder::dot_floats(int32_t slots) {
265 switch (slots) {
266 case 1: this->appendInstruction(BuilderOp::mul_n_floats, {}, slots); break;
267 case 2: this->appendInstruction(BuilderOp::dot_2_floats, {}, slots); break;
268 case 3: this->appendInstruction(BuilderOp::dot_3_floats, {}, slots); break;
269 case 4: this->appendInstruction(BuilderOp::dot_4_floats, {}, slots); break;
270
271 default:
272 SkDEBUGFAIL("invalid number of slots");
273 break;
274 }
275}
276
278 this->appendInstruction(BuilderOp::refract_4_floats, {});
279}
280
281void Builder::inverse_matrix(int32_t n) {
282 switch (n) {
283 case 2: this->appendInstruction(BuilderOp::inverse_mat2, {}, 4); break;
284 case 3: this->appendInstruction(BuilderOp::inverse_mat3, {}, 9); break;
285 case 4: this->appendInstruction(BuilderOp::inverse_mat4, {}, 16); break;
286 default: SkUNREACHABLE;
287 }
288}
289
291 if (count > 0) {
292 this->appendInstruction(BuilderOp::pad_stack, {}, count);
293 }
294}
295
296bool Builder::simplifyImmediateUnmaskedOp() {
297 if (fInstructions.size() < 3) {
298 return false;
299 }
300
301 // If we detect a pattern of 'push, immediate-op, unmasked pop', then we can
302 // convert it into an immediate-op directly onto the value slots and take the
303 // stack entirely out of the equation.
304 Instruction* popInstruction = this->lastInstruction(/*fromBack=*/0);
305 Instruction* immInstruction = this->lastInstruction(/*fromBack=*/1);
306 Instruction* pushInstruction = this->lastInstruction(/*fromBack=*/2);
307
308 // If the last instruction is an unmasked pop...
309 if (popInstruction && immInstruction && pushInstruction &&
310 popInstruction->fOp == BuilderOp::copy_stack_to_slots_unmasked) {
311 // ... and the prior instruction was an immediate-mode op, with the same number of slots...
312 if (is_immediate_op(immInstruction->fOp) &&
313 immInstruction->fImmA == popInstruction->fImmA) {
314 // ... and we support multiple-slot immediates (if this op calls for it)...
315 if (immInstruction->fImmA == 1 || is_multi_slot_immediate_op(immInstruction->fOp)) {
316 // ... and the prior instruction was `push_slots` or `push_immutable` of at least
317 // that many slots...
318 if ((pushInstruction->fOp == BuilderOp::push_slots ||
319 pushInstruction->fOp == BuilderOp::push_immutable) &&
320 pushInstruction->fImmA >= popInstruction->fImmA) {
321 // ... onto the same slot range...
322 Slot immSlot = popInstruction->fSlotA + popInstruction->fImmA;
323 Slot pushSlot = pushInstruction->fSlotA + pushInstruction->fImmA;
324 if (immSlot == pushSlot) {
325 // ... we can shrink the push, eliminate the pop, and perform the immediate
326 // op in-place instead.
327 pushInstruction->fImmA -= immInstruction->fImmA;
328 immInstruction->fSlotA = immSlot - immInstruction->fImmA;
329 fInstructions.pop_back();
330 return true;
331 }
332 }
333 }
334 }
335 }
336
337 return false;
338}
339
340void Builder::discard_stack(int32_t count, int stackID) {
341 // If we pushed something onto the stack and then immediately discarded part of it, we can
342 // shrink or eliminate the push.
343 while (count > 0) {
344 Instruction* lastInstruction = this->lastInstructionOnAnyStack();
345 if (!lastInstruction || lastInstruction->fStackID != stackID) {
346 break;
347 }
348
349 switch (lastInstruction->fOp) {
351 // Our last op was actually a separate discard_stack; combine the discards.
352 lastInstruction->fImmA += count;
353 return;
354
366 // Our last op was a multi-slot push; these cancel out. Eliminate the op if its
367 // count reached zero.
368 int cancelOut = std::min(count, lastInstruction->fImmA);
369 count -= cancelOut;
370 lastInstruction->fImmA -= cancelOut;
371 if (lastInstruction->fImmA == 0) {
372 fInstructions.pop_back();
373 }
374 continue;
375 }
379 // Our last op was a single-slot push; cancel out one discard and eliminate the op.
380 --count;
381 fInstructions.pop_back();
382 continue;
383
385 // Look for a pattern of `push, immediate-ops, pop` and simplify it down to an
386 // immediate-op directly to the value slot.
387 if (count == 1) {
388 if (this->simplifyImmediateUnmaskedOp()) {
389 return;
390 }
391 }
392
393 // A `copy_stack_to_slots_unmasked` op, followed immediately by a `discard_stack`
394 // op with an equal number of slots, is interpreted as an unmasked stack pop.
395 // We can simplify pops in a variety of ways. First, temporarily get rid of
396 // `copy_stack_to_slots_unmasked`.
397 if (count == lastInstruction->fImmA) {
398 SlotRange dst{lastInstruction->fSlotA, lastInstruction->fImmA};
399 fInstructions.pop_back();
400
401 // See if we can write this pop in a simpler way.
402 this->simplifyPopSlotsUnmasked(&dst);
403
404 // If simplification consumed the entire range, we're done!
405 if (dst.count == 0) {
406 return;
407 }
408
409 // Simplification did not consume the entire range. We are still responsible for
410 // copying-back and discarding any remaining slots.
412 count = dst.count;
413 }
414 break;
415 }
416 default:
417 break;
418 }
419
420 // This instruction wasn't a push.
421 break;
422 }
423
424 if (count > 0) {
425 this->appendInstruction(BuilderOp::discard_stack, {}, count);
426 }
427}
428
429void Builder::label(int labelID) {
430 SkASSERT(labelID >= 0 && labelID < fNumLabels);
431
432 // If the previous instruction was a branch to this label, it's a no-op; jumping to the very
433 // next instruction is effectively meaningless.
434 while (const Instruction* lastInstruction = this->lastInstructionOnAnyStack()) {
435 switch (lastInstruction->fOp) {
436 case BuilderOp::jump:
437 case BuilderOp::branch_if_all_lanes_active:
438 case BuilderOp::branch_if_any_lanes_active:
439 case BuilderOp::branch_if_no_lanes_active:
441 if (lastInstruction->fImmA == labelID) {
442 fInstructions.pop_back();
443 continue;
444 }
445 break;
446
447 default:
448 break;
449 }
450 break;
451 }
452 this->appendInstruction(BuilderOp::label, {}, labelID);
453}
454
455void Builder::jump(int labelID) {
456 SkASSERT(labelID >= 0 && labelID < fNumLabels);
457 if (const Instruction* lastInstruction = this->lastInstructionOnAnyStack()) {
458 if (lastInstruction->fOp == BuilderOp::jump) {
459 // The previous instruction was also `jump`, so this branch could never possibly occur.
460 return;
461 }
462 }
463 this->appendInstruction(BuilderOp::jump, {}, labelID);
464}
465
467 if (!this->executionMaskWritesAreEnabled()) {
468 this->jump(labelID);
469 return;
470 }
471
472 SkASSERT(labelID >= 0 && labelID < fNumLabels);
473 if (const Instruction* lastInstruction = this->lastInstructionOnAnyStack()) {
474 if (lastInstruction->fOp == BuilderOp::branch_if_any_lanes_active ||
475 lastInstruction->fOp == BuilderOp::jump) {
476 // The previous instruction was `jump` or `branch_if_any_lanes_active`, so this branch
477 // could never possibly occur.
478 return;
479 }
480 }
481 this->appendInstruction(BuilderOp::branch_if_any_lanes_active, {}, labelID);
482}
483
485 if (!this->executionMaskWritesAreEnabled()) {
486 this->jump(labelID);
487 return;
488 }
489
490 SkASSERT(labelID >= 0 && labelID < fNumLabels);
491 if (const Instruction* lastInstruction = this->lastInstructionOnAnyStack()) {
492 if (lastInstruction->fOp == BuilderOp::branch_if_all_lanes_active ||
493 lastInstruction->fOp == BuilderOp::jump) {
494 // The previous instruction was `jump` or `branch_if_all_lanes_active`, so this branch
495 // could never possibly occur.
496 return;
497 }
498 }
499 this->appendInstruction(BuilderOp::branch_if_all_lanes_active, {}, labelID);
500}
501
503 if (!this->executionMaskWritesAreEnabled()) {
504 return;
505 }
506
507 SkASSERT(labelID >= 0 && labelID < fNumLabels);
508 if (const Instruction* lastInstruction = this->lastInstructionOnAnyStack()) {
509 if (lastInstruction->fOp == BuilderOp::branch_if_no_lanes_active ||
510 lastInstruction->fOp == BuilderOp::jump) {
511 // The previous instruction was `jump` or `branch_if_no_lanes_active`, so this branch
512 // could never possibly occur.
513 return;
514 }
515 }
516 this->appendInstruction(BuilderOp::branch_if_no_lanes_active, {}, labelID);
517}
518
520 SkASSERT(labelID >= 0 && labelID < fNumLabels);
521 if (const Instruction* lastInstruction = this->lastInstructionOnAnyStack()) {
522 if (lastInstruction->fOp == BuilderOp::jump ||
524 lastInstruction->fImmB == value)) {
525 // The previous instruction was `jump` or `branch_if_no_active_lanes_on_stack_top_equal`
526 // (checking against the same value), so this branch could never possibly occur.
527 return;
528 }
529 }
531 {}, labelID, value);
532}
533
535 SkASSERT(src.count >= 0);
536 if (Instruction* lastInstruction = this->lastInstruction()) {
537 // If the previous instruction was pushing slots contiguous to this range, we can collapse
538 // the two pushes into one larger push.
539 if (lastInstruction->fOp == op &&
540 lastInstruction->fSlotA + lastInstruction->fImmA == src.index) {
541 lastInstruction->fImmA += src.count;
542 src.count = 0;
543 }
544 }
545
546 if (src.count > 0) {
547 this->appendInstruction(op, {src.index}, src.count);
548 }
549
550 // Look for a sequence of "copy stack to X, discard stack, copy X to stack". This is a common
551 // pattern when multiple operations in a row affect the same variable. When we see this, we can
552 // eliminate both the discard and the push.
553 if (fInstructions.size() >= 3) {
554 const Instruction* pushInst = this->lastInstruction(/*fromBack=*/0);
555 const Instruction* discardInst = this->lastInstruction(/*fromBack=*/1);
556 const Instruction* copyToSlotsInst = this->lastInstruction(/*fromBack=*/2);
557
558 if (pushInst && discardInst && copyToSlotsInst && pushInst->fOp == BuilderOp::push_slots) {
559 int pushIndex = pushInst->fSlotA;
560 int pushCount = pushInst->fImmA;
561
562 // Look for a `discard_stack` matching our push count.
563 if (discardInst->fOp == BuilderOp::discard_stack && discardInst->fImmA == pushCount) {
564 // Look for a `copy_stack_to_slots` matching our push.
565 if ((copyToSlotsInst->fOp == BuilderOp::copy_stack_to_slots ||
566 copyToSlotsInst->fOp == BuilderOp::copy_stack_to_slots_unmasked) &&
567 copyToSlotsInst->fSlotA == pushIndex && copyToSlotsInst->fImmA == pushCount) {
568 // We found a matching sequence. Remove the discard and push.
569 fInstructions.pop_back();
570 fInstructions.pop_back();
571 return;
572 }
573 }
574 }
575 }
576}
577
579 int dynamicStackID,
580 SlotRange limitRange,
581 BuilderOp op) {
582 // SlotA: fixed-range start
583 // SlotB: limit-range end
584 // immA: number of slots
585 // immB: dynamic stack ID
586 this->appendInstruction(op,
587 {fixedRange.index, limitRange.index + limitRange.count},
588 fixedRange.count,
589 dynamicStackID);
590}
591
593 SkASSERT(src.count >= 0);
594 if (Instruction* lastInstruction = this->lastInstruction()) {
595 // If the previous instruction was pushing uniforms contiguous to this range, we can
596 // collapse the two pushes into one larger push.
597 if (lastInstruction->fOp == BuilderOp::push_uniform &&
598 lastInstruction->fSlotA + lastInstruction->fImmA == src.index) {
599 lastInstruction->fImmA += src.count;
600 return;
601 }
602 }
603
604 if (src.count > 0) {
605 this->appendInstruction(BuilderOp::push_uniform, {src.index}, src.count);
606 }
607}
608
610 int dynamicStackID,
611 SlotRange limitRange) {
612 // SlotA: fixed-range start
613 // SlotB: limit-range end
614 // immA: number of slots
615 // immB: dynamic stack ID
616 this->appendInstruction(BuilderOp::push_uniform_indirect,
617 {fixedRange.index, limitRange.index + limitRange.count},
618 fixedRange.count,
619 dynamicStackID);
620}
621
622void Builder::trace_var_indirect(int traceMaskStackID,
623 SlotRange fixedRange,
624 int dynamicStackID,
625 SlotRange limitRange) {
626 // SlotA: fixed-range start
627 // SlotB: limit-range end
628 // immA: trace-mask stack ID
629 // immB: number of slots
630 // immC: dynamic stack ID
631 this->appendInstruction(BuilderOp::trace_var_indirect,
632 {fixedRange.index, limitRange.index + limitRange.count},
633 traceMaskStackID,
634 fixedRange.count,
635 dynamicStackID);
636}
637
638void Builder::push_constant_i(int32_t val, int count) {
639 SkASSERT(count >= 0);
640 if (count > 0) {
641 if (Instruction* lastInstruction = this->lastInstruction()) {
642 // If the previous op is pushing the same value, we can just push more of them.
643 if (lastInstruction->fOp == BuilderOp::push_constant && lastInstruction->fImmB == val) {
644 lastInstruction->fImmA += count;
645 return;
646 }
647 }
648 this->appendInstruction(BuilderOp::push_constant, {}, count, val);
649 }
650}
651
653 if (Instruction* lastInstruction = this->lastInstruction()) {
654 // If the previous op is pushing a constant, we can just push more of them.
655 if (lastInstruction->fOp == BuilderOp::push_constant) {
656 lastInstruction->fImmA += count;
657 return;
658 }
659 }
660 SkASSERT(count >= 0);
661 if (count >= 3) {
662 // Use a swizzle to splat the input into a 4-slot value.
663 this->swizzle(/*consumedSlots=*/1, {0, 0, 0, 0});
664 count -= 3;
665 }
666 for (; count >= 4; count -= 4) {
667 // Clone the splatted value four slots at a time.
668 this->push_clone(/*numSlots=*/4);
669 }
670 // Use a swizzle or clone to handle the trailing items.
671 switch (count) {
672 case 3: this->swizzle(/*consumedSlots=*/1, {0, 0, 0, 0}); break;
673 case 2: this->swizzle(/*consumedSlots=*/1, {0, 0, 0}); break;
674 case 1: this->push_clone(/*numSlots=*/1); break;
675 default: break;
676 }
677}
678
679void Builder::push_clone(int numSlots, int offsetFromStackTop) {
680 // If we are cloning the stack top...
681 if (numSlots == 1 && offsetFromStackTop == 0) {
682 // ... and the previous op is pushing a constant...
683 if (Instruction* lastInstruction = this->lastInstruction()) {
684 if (lastInstruction->fOp == BuilderOp::push_constant) {
685 // ... we can just push more of them.
686 lastInstruction->fImmA += 1;
687 return;
688 }
689 }
690 }
691 this->appendInstruction(BuilderOp::push_clone, {}, numSlots, numSlots + offsetFromStackTop);
692}
693
694void Builder::push_clone_from_stack(SlotRange range, int otherStackID, int offsetFromStackTop) {
695 // immA: number of slots
696 // immB: other stack ID
697 // immC: offset from stack top
698 offsetFromStackTop -= range.index;
699
700 if (Instruction* lastInstruction = this->lastInstruction()) {
701 // If the previous op is also pushing a clone...
702 if (lastInstruction->fOp == BuilderOp::push_clone_from_stack &&
703 // ... from the same stack...
704 lastInstruction->fImmB == otherStackID &&
705 // ... and this clone starts at the same place that the last clone ends...
706 lastInstruction->fImmC - lastInstruction->fImmA == offsetFromStackTop) {
707 // ... just extend the existing clone-op.
708 lastInstruction->fImmA += range.count;
709 return;
710 }
711 }
712
713 this->appendInstruction(BuilderOp::push_clone_from_stack, {},
714 range.count, otherStackID, offsetFromStackTop);
715}
716
718 int dynamicStackID,
719 int otherStackID,
720 int offsetFromStackTop) {
721 // immA: number of slots
722 // immB: other stack ID
723 // immC: offset from stack top
724 // immD: dynamic stack ID
725 offsetFromStackTop -= fixedOffset.index;
726
727 this->appendInstruction(BuilderOp::push_clone_indirect_from_stack, {},
728 fixedOffset.count, otherStackID, offsetFromStackTop, dynamicStackID);
729}
730
732 if (!this->executionMaskWritesAreEnabled()) {
733 this->pop_slots_unmasked(dst);
734 return;
735 }
736
737 this->copy_stack_to_slots(dst);
738 this->discard_stack(dst.count);
739}
740
741void Builder::simplifyPopSlotsUnmasked(SlotRange* dst) {
742 if (!dst->count) {
743 // There's nothing left to simplify.
744 return;
745 }
746 Instruction* lastInstruction = this->lastInstruction();
747 if (!lastInstruction) {
748 // There's nothing left to simplify.
749 return;
750 }
751 BuilderOp lastOp = lastInstruction->fOp;
752
753 // If the last instruction is pushing a constant, we can simplify it by copying the constant
754 // directly into the destination slot.
755 if (lastOp == BuilderOp::push_constant) {
756 // Get the last slot.
757 int32_t value = lastInstruction->fImmB;
758 lastInstruction->fImmA--;
759 if (lastInstruction->fImmA == 0) {
760 fInstructions.pop_back();
761 }
762
763 // Consume one destination slot.
764 dst->count--;
765 Slot destinationSlot = dst->index + dst->count;
766
767 // Continue simplifying if possible.
768 this->simplifyPopSlotsUnmasked(dst);
769
770 // Write the constant directly to the destination slot.
771 this->copy_constant(destinationSlot, value);
772 return;
773 }
774
775 // If the last instruction is pushing a uniform, we can simplify it by copying the uniform
776 // directly into the destination slot.
777 if (lastOp == BuilderOp::push_uniform) {
778 // Get the last slot.
779 Slot sourceSlot = lastInstruction->fSlotA + lastInstruction->fImmA - 1;
780 lastInstruction->fImmA--;
781 if (lastInstruction->fImmA == 0) {
782 fInstructions.pop_back();
783 }
784
785 // Consume one destination slot.
786 dst->count--;
787 Slot destinationSlot = dst->index + dst->count;
788
789 // Continue simplifying if possible.
790 this->simplifyPopSlotsUnmasked(dst);
791
792 // Write the constant directly to the destination slot.
793 this->copy_uniform_to_slots_unmasked({destinationSlot, 1}, {sourceSlot, 1});
794 return;
795 }
796
797 // If the last instruction is pushing a slot or immutable, we can just copy that slot.
798 if (lastOp == BuilderOp::push_slots || lastOp == BuilderOp::push_immutable) {
799 // Get the last slot.
800 Slot sourceSlot = lastInstruction->fSlotA + lastInstruction->fImmA - 1;
801 lastInstruction->fImmA--;
802 if (lastInstruction->fImmA == 0) {
803 fInstructions.pop_back();
804 }
805
806 // Consume one destination slot.
807 dst->count--;
808 Slot destinationSlot = dst->index + dst->count;
809
810 // Try once more.
811 this->simplifyPopSlotsUnmasked(dst);
812
813 // Copy the slot directly.
814 if (lastOp == BuilderOp::push_slots) {
815 if (destinationSlot != sourceSlot) {
816 this->copy_slots_unmasked({destinationSlot, 1}, {sourceSlot, 1});
817 } else {
818 // Copying from a value-slot into the same value-slot is a no-op.
819 }
820 } else {
821 // Copy from immutable data directly to the destination slot.
822 this->copy_immutable_unmasked({destinationSlot, 1}, {sourceSlot, 1});
823 }
824 return;
825 }
826}
827
829 SkASSERT(dst.count >= 0);
831 this->discard_stack(dst.count);
832}
833
835 if (Instruction* lastInstruction = this->lastInstruction()) {
836 // If the previous op is also an exchange-src...
837 if (lastInstruction->fOp == BuilderOp::exchange_src) {
838 // ... both ops can be eliminated. A double-swap is a no-op.
839 fInstructions.pop_back();
840 return;
841 }
842 }
843
844 this->appendInstruction(BuilderOp::exchange_src, {});
845}
846
848 if (Instruction* lastInstruction = this->lastInstruction()) {
849 // If the previous op is exchanging src.rgba with the stack...
850 if (lastInstruction->fOp == BuilderOp::exchange_src) {
851 // ... both ops can be eliminated. It's just sliding the color back and forth.
852 fInstructions.pop_back();
853 this->discard_stack(4);
854 return;
855 }
856 }
857
858 this->appendInstruction(BuilderOp::pop_src_rgba, {});
859}
860
861void Builder::copy_stack_to_slots(SlotRange dst, int offsetFromStackTop) {
862 // If the execution mask is known to be all-true, then we can ignore the write mask.
863 if (!this->executionMaskWritesAreEnabled()) {
864 this->copy_stack_to_slots_unmasked(dst, offsetFromStackTop);
865 return;
866 }
867
868 // If the last instruction copied the previous stack slots, just extend it.
869 if (Instruction* lastInstruction = this->lastInstruction()) {
870 // If the last op is copy-stack-to-slots...
871 if (lastInstruction->fOp == BuilderOp::copy_stack_to_slots &&
872 // and this op's destination is immediately after the last copy-slots-op's destination
873 lastInstruction->fSlotA + lastInstruction->fImmA == dst.index &&
874 // and this op's source is immediately after the last copy-slots-op's source
875 lastInstruction->fImmB - lastInstruction->fImmA == offsetFromStackTop) {
876 // then we can just extend the copy!
877 lastInstruction->fImmA += dst.count;
878 return;
879 }
880 }
881
882 this->appendInstruction(BuilderOp::copy_stack_to_slots, {dst.index},
883 dst.count, offsetFromStackTop);
884}
885
887 int dynamicStackID,
888 SlotRange limitRange) {
889 // SlotA: fixed-range start
890 // SlotB: limit-range end
891 // immA: number of slots
892 // immB: dynamic stack ID
893 this->appendInstruction(BuilderOp::copy_stack_to_slots_indirect,
894 {fixedRange.index, limitRange.index + limitRange.count},
895 fixedRange.count,
896 dynamicStackID);
897}
898
900 return x.index < y.index + y.count &&
901 y.index < x.index + x.count;
902}
903
904void Builder::copy_constant(Slot slot, int constantValue) {
905 // If the last instruction copied the same constant, just extend it.
906 if (Instruction* lastInstr = this->lastInstruction()) {
907 // If the last op is copy-constant...
908 if (lastInstr->fOp == BuilderOp::copy_constant &&
909 // ... and has the same value...
910 lastInstr->fImmB == constantValue &&
911 // ... and the slot is immediately after the last copy-constant's destination...
912 lastInstr->fSlotA + lastInstr->fImmA == slot) {
913 // ... then we can extend the copy!
914 lastInstr->fImmA += 1;
915 return;
916 }
917 }
918
919 this->appendInstruction(BuilderOp::copy_constant, {slot}, 1, constantValue);
920}
921
923 // If the last instruction copied adjacent slots, just extend it.
924 if (Instruction* lastInstr = this->lastInstruction()) {
925 // If the last op is a match...
926 if (lastInstr->fOp == BuilderOp::copy_slot_unmasked &&
927 // and this op's destination is immediately after the last copy-slots-op's destination
928 lastInstr->fSlotA + lastInstr->fImmA == dst.index &&
929 // and this op's source is immediately after the last copy-slots-op's source
930 lastInstr->fSlotB + lastInstr->fImmA == src.index &&
931 // and the source/dest ranges will not overlap
932 !slot_ranges_overlap({lastInstr->fSlotB, lastInstr->fImmA + dst.count},
933 {lastInstr->fSlotA, lastInstr->fImmA + dst.count})) {
934 // then we can just extend the copy!
935 lastInstr->fImmA += dst.count;
936 return;
937 }
938 }
939
940 SkASSERT(dst.count == src.count);
941 this->appendInstruction(BuilderOp::copy_slot_unmasked, {dst.index, src.index}, dst.count);
942}
943
945 // If the last instruction copied adjacent immutable data, just extend it.
946 if (Instruction* lastInstr = this->lastInstruction()) {
947 // If the last op is a match...
948 if (lastInstr->fOp == BuilderOp::copy_immutable_unmasked &&
949 // and this op's destination is immediately after the last copy-slots-op's destination
950 lastInstr->fSlotA + lastInstr->fImmA == dst.index &&
951 // and this op's source is immediately after the last copy-slots-op's source
952 lastInstr->fSlotB + lastInstr->fImmA == src.index) {
953 // then we can just extend the copy!
954 lastInstr->fImmA += dst.count;
955 return;
956 }
957 }
958
959 SkASSERT(dst.count == src.count);
960 this->appendInstruction(BuilderOp::copy_immutable_unmasked, {dst.index, src.index}, dst.count);
961}
962
964 // If the last instruction copied adjacent uniforms, just extend it.
965 if (Instruction* lastInstr = this->lastInstruction()) {
966 // If the last op is copy-constant...
967 if (lastInstr->fOp == BuilderOp::copy_uniform_to_slots_unmasked &&
968 // and this op's destination is immediately after the last copy-constant's destination
969 lastInstr->fSlotB + lastInstr->fImmA == dst.index &&
970 // and this op's source is immediately after the last copy-constant's source
971 lastInstr->fSlotA + lastInstr->fImmA == src.index) {
972 // then we can just extend the copy!
973 lastInstr->fImmA += dst.count;
974 return;
975 }
976 }
977
978 SkASSERT(dst.count == src.count);
979 this->appendInstruction(BuilderOp::copy_uniform_to_slots_unmasked, {src.index, dst.index},
980 dst.count);
981}
982
983void Builder::copy_stack_to_slots_unmasked(SlotRange dst, int offsetFromStackTop) {
984 // If the last instruction copied the previous stack slots, just extend it.
985 if (Instruction* lastInstr = this->lastInstruction()) {
986 // If the last op is copy-stack-to-slots-unmasked...
987 if (lastInstr->fOp == BuilderOp::copy_stack_to_slots_unmasked &&
988 // and this op's destination is immediately after the last copy-slots-op's destination
989 lastInstr->fSlotA + lastInstr->fImmA == dst.index &&
990 // and this op's source is immediately after the last copy-slots-op's source
991 lastInstr->fImmB - lastInstr->fImmA == offsetFromStackTop) {
992 // then we can just extend the copy!
993 lastInstr->fImmA += dst.count;
994 return;
995 }
996 }
997
998 this->appendInstruction(BuilderOp::copy_stack_to_slots_unmasked, {dst.index},
999 dst.count, offsetFromStackTop);
1000}
1001
1004
1005 // This instruction is going to overwrite the return mask. If the previous instruction was
1006 // masking off the return mask, that's wasted work and it can be eliminated.
1007 if (Instruction* lastInstruction = this->lastInstructionOnAnyStack()) {
1008 if (lastInstruction->fOp == BuilderOp::mask_off_return_mask) {
1009 fInstructions.pop_back();
1010 }
1011 }
1012
1013 this->appendInstruction(BuilderOp::pop_return_mask, {});
1014}
1015
1018
1019 // If the previous instruction is popping the condition mask, we can restore it onto the stack
1020 // "for free" instead of copying it.
1021 if (Instruction* lastInstruction = this->lastInstruction()) {
1022 if (lastInstruction->fOp == BuilderOp::pop_condition_mask) {
1023 this->pad_stack(1);
1024 return;
1025 }
1026 }
1027 this->appendInstruction(BuilderOp::push_condition_mask, {});
1028}
1029
1032
1033 // This instruction is going to overwrite the condition mask. If the previous instruction was
1034 // loading the condition mask, that's wasted work and it can be eliminated.
1035 if (Instruction* lastInstruction = this->lastInstructionOnAnyStack()) {
1036 if (lastInstruction->fOp == BuilderOp::pop_condition_mask) {
1037 int stackID = lastInstruction->fStackID;
1038 fInstructions.pop_back();
1039 this->discard_stack(/*count=*/1, stackID);
1040 }
1041 }
1042
1043 this->appendInstruction(BuilderOp::merge_condition_mask, {});
1044}
1045
1047 if (Instruction* lastInstruction = this->lastInstruction()) {
1048 if (lastInstruction->fOp == BuilderOp::copy_constant && lastInstruction->fImmB == 0) {
1049 if (lastInstruction->fSlotA + lastInstruction->fImmA == dst.index) {
1050 // The previous instruction was zeroing the range immediately before this range.
1051 // Combine the ranges.
1052 lastInstruction->fImmA += dst.count;
1053 return;
1054 }
1055
1056 if (lastInstruction->fSlotA == dst.index + dst.count) {
1057 // The previous instruction was zeroing the range immediately after this range.
1058 // Combine the ranges.
1059 lastInstruction->fSlotA = dst.index;
1060 lastInstruction->fImmA += dst.count;
1061 return;
1062 }
1063 }
1064 }
1065
1066 this->appendInstruction(BuilderOp::copy_constant, {dst.index}, dst.count, 0);
1067}
1068
1069static int pack_nybbles(SkSpan<const int8_t> components) {
1070 // Pack up to 8 elements into nybbles, in reverse order.
1071 int packed = 0;
1072 for (auto iter = components.rbegin(); iter != components.rend(); ++iter) {
1073 SkASSERT(*iter >= 0 && *iter <= 0xF);
1074 packed <<= 4;
1075 packed |= *iter;
1076 }
1077 return packed;
1078}
1079
1080template <typename T>
1081static void unpack_nybbles_to_offsets(uint32_t components, SkSpan<T> offsets) {
1082 // Unpack component nybbles into byte-offsets pointing at stack slots.
1083 for (size_t index = 0; index < offsets.size(); ++index) {
1084 offsets[index] = (components & 0xF) * SkOpts::raster_pipeline_highp_stride * sizeof(float);
1085 components >>= 4;
1086 }
1087}
1088
1089static int max_packed_nybble(uint32_t components, size_t numComponents) {
1090 int largest = 0;
1091 for (size_t index = 0; index < numComponents; ++index) {
1092 largest = std::max<int>(largest, components & 0xF);
1093 components >>= 4;
1094 }
1095 return largest;
1096}
1097
1099 SkSpan<const int8_t> components,
1100 int offsetFromStackTop) {
1101 // When the execution-mask writes-enabled flag is off, we could squeeze out a little bit of
1102 // extra speed here by implementing and using an unmasked version of this op.
1103
1104 // SlotA: fixed-range start
1105 // immA: number of swizzle components
1106 // immB: swizzle components
1107 // immC: offset from stack top
1108 this->appendInstruction(BuilderOp::swizzle_copy_stack_to_slots, {dst.index},
1109 (int)components.size(),
1110 pack_nybbles(components),
1111 offsetFromStackTop);
1112}
1113
1115 int dynamicStackID,
1116 SlotRange limitRange,
1117 SkSpan<const int8_t> components,
1118 int offsetFromStackTop) {
1119 // When the execution-mask writes-enabled flag is off, we could squeeze out a little bit of
1120 // extra speed here by implementing and using an unmasked version of this op.
1121
1122 // SlotA: fixed-range start
1123 // SlotB: limit-range end
1124 // immA: number of swizzle components
1125 // immB: swizzle components
1126 // immC: offset from stack top
1127 // immD: dynamic stack ID
1129 {fixedRange.index, limitRange.index + limitRange.count},
1130 (int)components.size(),
1131 pack_nybbles(components),
1132 offsetFromStackTop,
1133 dynamicStackID);
1134}
1135
1136void Builder::swizzle(int consumedSlots, SkSpan<const int8_t> components) {
1137 // Consumes `consumedSlots` elements on the stack, then generates `elementSpan.size()` elements.
1138 SkASSERT(consumedSlots >= 0);
1139
1140 // We only allow up to 16 elements, and they can only reach 0-15 slots, due to nybble packing.
1141 int numElements = components.size();
1142 SkASSERT(numElements <= 16);
1143 SkASSERT(std::all_of(components.begin(), components.end(), [](int8_t e){ return e >= 0; }));
1144 SkASSERT(std::all_of(components.begin(), components.end(), [](int8_t e){ return e <= 0xF; }));
1145
1146 // Make a local copy of the element array.
1147 int8_t elements[16] = {};
1148 std::copy(components.begin(), components.end(), std::begin(elements));
1149
1150 while (numElements > 0) {
1151 // If the first element of the swizzle is zero...
1152 if (elements[0] != 0) {
1153 break;
1154 }
1155 // ...and zero isn't used elsewhere in the swizzle...
1156 if (std::any_of(&elements[1], &elements[numElements], [](int8_t e) { return e == 0; })) {
1157 break;
1158 }
1159 // We can omit the first slot from the swizzle entirely.
1160 // Slide everything forward by one slot, and reduce the element index by one.
1161 for (int index = 1; index < numElements; ++index) {
1162 elements[index - 1] = elements[index] - 1;
1163 }
1164 elements[numElements - 1] = 0;
1165 --consumedSlots;
1166 --numElements;
1167 }
1168
1169 // A completely empty swizzle is a discard.
1170 if (numElements == 0) {
1171 this->discard_stack(consumedSlots);
1172 return;
1173 }
1174
1175 if (consumedSlots <= 4 && numElements <= 4) {
1176 // We can fit everything into a little swizzle.
1177 int op = (int)BuilderOp::swizzle_1 + numElements - 1;
1178 this->appendInstruction((BuilderOp)op, {}, consumedSlots,
1179 pack_nybbles(SkSpan(elements, numElements)));
1180 return;
1181 }
1182
1183 // This is a big swizzle. We use the `shuffle` op to handle these. immA counts the consumed
1184 // slots. immB counts the generated slots. immC and immD hold packed-nybble shuffle values.
1185 this->appendInstruction(BuilderOp::shuffle, {},
1186 consumedSlots, numElements,
1187 pack_nybbles(SkSpan(&elements[0], 8)),
1188 pack_nybbles(SkSpan(&elements[8], 8)));
1189}
1190
1191void Builder::transpose(int columns, int rows) {
1192 // Transposes a matrix of size CxR on the stack (into a matrix of size RxC).
1193 int8_t elements[16] = {};
1194 size_t index = 0;
1195 for (int r = 0; r < rows; ++r) {
1196 for (int c = 0; c < columns; ++c) {
1197 elements[index++] = (c * rows) + r;
1198 }
1199 }
1200 this->swizzle(/*consumedSlots=*/columns * rows, SkSpan(elements, index));
1201}
1202
1203void Builder::diagonal_matrix(int columns, int rows) {
1204 // Generates a CxR diagonal matrix from the top two scalars on the stack.
1205 int8_t elements[16] = {};
1206 size_t index = 0;
1207 for (int c = 0; c < columns; ++c) {
1208 for (int r = 0; r < rows; ++r) {
1209 elements[index++] = (c == r) ? 1 : 0;
1210 }
1211 }
1212 this->swizzle(/*consumedSlots=*/2, SkSpan(elements, index));
1213}
1214
1215void Builder::matrix_resize(int origColumns, int origRows, int newColumns, int newRows) {
1216 // Resizes a CxR matrix at the top of the stack to C'xR'.
1217 int8_t elements[16] = {};
1218 size_t index = 0;
1219
1220 size_t consumedSlots = origColumns * origRows;
1221 size_t zeroOffset = 0, oneOffset = 0;
1222
1223 for (int c = 0; c < newColumns; ++c) {
1224 for (int r = 0; r < newRows; ++r) {
1225 if (c < origColumns && r < origRows) {
1226 // Push an element from the original matrix.
1227 elements[index++] = (c * origRows) + r;
1228 } else {
1229 // This element is outside the original matrix; push 1 or 0.
1230 if (c == r) {
1231 // We need to synthesize a literal 1.
1232 if (oneOffset == 0) {
1233 this->push_constant_f(1.0f);
1234 oneOffset = consumedSlots++;
1235 }
1236 elements[index++] = oneOffset;
1237 } else {
1238 // We need to synthesize a literal 0.
1239 if (zeroOffset == 0) {
1240 this->push_constant_f(0.0f);
1241 zeroOffset = consumedSlots++;
1242 }
1243 elements[index++] = zeroOffset;
1244 }
1245 }
1246 }
1247 }
1248 this->swizzle(consumedSlots, SkSpan(elements, index));
1249}
1250
1251void Builder::matrix_multiply(int leftColumns, int leftRows, int rightColumns, int rightRows) {
1252 BuilderOp op;
1253 switch (leftColumns) {
1254 case 2: op = BuilderOp::matrix_multiply_2; break;
1255 case 3: op = BuilderOp::matrix_multiply_3; break;
1256 case 4: op = BuilderOp::matrix_multiply_4; break;
1257 default: SkDEBUGFAIL("unsupported matrix dimensions"); return;
1258 }
1259
1260 this->appendInstruction(op, {}, leftColumns, leftRows, rightColumns, rightRows);
1261}
1262
1263std::unique_ptr<Program> Builder::finish(int numValueSlots,
1264 int numUniformSlots,
1265 int numImmutableSlots,
1266 DebugTracePriv* debugTrace) {
1267 // Verify that calls to enableExecutionMaskWrites and disableExecutionMaskWrites are balanced.
1268 SkASSERT(fExecutionMaskWritesEnabled == 0);
1269
1270 return std::make_unique<Program>(std::move(fInstructions), numValueSlots, numUniformSlots,
1271 numImmutableSlots, fNumLabels, debugTrace);
1272}
1273
1274void Program::optimize() {
1275 // TODO(johnstiles): perform any last-minute cleanup of the instruction stream here
1276}
1277
1278static int stack_usage(const Instruction& inst) {
1279 switch (inst.fOp) {
1283 return 1;
1284
1288 return 4;
1289
1301 return inst.fImmA;
1302
1307 return -1;
1308
1311 return -4;
1312
1316 case BuilderOp::select:
1317 return -inst.fImmA;
1318
1321 return 2 * -inst.fImmA;
1322
1323 case BuilderOp::swizzle_1:
1324 return 1 - inst.fImmA; // consumes immA slots and emits a scalar
1325 case BuilderOp::swizzle_2:
1326 return 2 - inst.fImmA; // consumes immA slots and emits a 2-slot vector
1327 case BuilderOp::swizzle_3:
1328 return 3 - inst.fImmA; // consumes immA slots and emits a 3-slot vector
1329 case BuilderOp::swizzle_4:
1330 return 4 - inst.fImmA; // consumes immA slots and emits a 4-slot vector
1331
1332 case BuilderOp::dot_2_floats:
1333 return -3; // consumes two 2-slot vectors and emits one scalar
1334 case BuilderOp::dot_3_floats:
1335 return -5; // consumes two 3-slot vectors and emits one scalar
1336 case BuilderOp::dot_4_floats:
1337 return -7; // consumes two 4-slot vectors and emits one scalar
1338
1339 case BuilderOp::refract_4_floats:
1340 return -5; // consumes nine slots (N + I + eta) and emits a 4-slot vector (R)
1341
1342 case BuilderOp::matrix_multiply_2:
1343 case BuilderOp::matrix_multiply_3:
1344 case BuilderOp::matrix_multiply_4:
1345 // consumes the left- and right-matrices; emits result over existing padding slots
1346 return -(inst.fImmA * inst.fImmB + inst.fImmC * inst.fImmD);
1347
1348 case BuilderOp::shuffle: {
1349 int consumed = inst.fImmA;
1350 int generated = inst.fImmB;
1351 return generated - consumed;
1352 }
1356 default:
1357 return 0;
1358 }
1359}
1360
1361Program::StackDepths Program::tempStackMaxDepths() const {
1362 // Count the number of separate temp stacks that the program uses.
1363 int numStacks = 1;
1364 for (const Instruction& inst : fInstructions) {
1365 numStacks = std::max(numStacks, inst.fStackID + 1);
1366 }
1367
1368 // Walk the program and calculate how deep each stack can potentially get.
1369 StackDepths largest, current;
1370 largest.push_back_n(numStacks, 0);
1371 current.push_back_n(numStacks, 0);
1372
1373 for (const Instruction& inst : fInstructions) {
1374 int stackID = inst.fStackID;
1375 current[stackID] += stack_usage(inst);
1376 largest[stackID] = std::max(current[stackID], largest[stackID]);
1377 // If we assert here, the generated program has popped off the top of the stack.
1378 SkASSERTF(current[stackID] >= 0, "unbalanced temp stack push/pop on stack %d", stackID);
1379 }
1380
1381 // Ensure that when the program is complete, our stacks are fully balanced.
1382 for (int stackID = 0; stackID < numStacks; ++stackID) {
1383 // If we assert here, the generated program has pushed more data than it has popped.
1384 SkASSERTF(current[stackID] == 0, "unbalanced temp stack push/pop on stack %d", stackID);
1385 }
1386
1387 return largest;
1388}
1389
1391 int numValueSlots,
1392 int numUniformSlots,
1393 int numImmutableSlots,
1394 int numLabels,
1395 DebugTracePriv* debugTrace)
1396 : fInstructions(std::move(instrs))
1397 , fNumValueSlots(numValueSlots)
1398 , fNumUniformSlots(numUniformSlots)
1399 , fNumImmutableSlots(numImmutableSlots)
1400 , fNumLabels(numLabels)
1401 , fDebugTrace(debugTrace) {
1402 this->optimize();
1403
1404 fTempStackMaxDepths = this->tempStackMaxDepths();
1405
1406 fNumTempStackSlots = 0;
1407 for (const int depth : fTempStackMaxDepths) {
1408 fNumTempStackSlots += depth;
1409 }
1410
1411 if (fDebugTrace) {
1412 fTraceHook = SkSL::Tracer::Make(&fDebugTrace->fTraceInfo);
1413 }
1414}
1415
1416Program::~Program() = default;
1417
1418static bool immutable_data_is_splattable(int32_t* immutablePtr, int numSlots) {
1419 // If every value between `immutablePtr[0]` and `immutablePtr[numSlots]` is bit-identical, we
1420 // can use a splat.
1421 for (int index = 1; index < numSlots; ++index) {
1422 if (immutablePtr[0] != immutablePtr[index]) {
1423 return false;
1424 }
1425 }
1426 return true;
1427}
1428
1429void Program::appendCopy(TArray<Stage>* pipeline,
1430 SkArenaAlloc* alloc,
1431 std::byte* basePtr, // only used for immutable-value copies
1432 ProgramOp baseStage,
1433 SkRPOffset dst, int dstStride,
1434 SkRPOffset src, int srcStride,
1435 int numSlots) const {
1436 SkASSERT(numSlots >= 0);
1437 while (numSlots > 4) {
1438 // If we are appending a large copy, split it up into groups of four at a time.
1439 this->appendCopy(pipeline, alloc, basePtr,
1440 baseStage,
1441 dst, dstStride,
1442 src, srcStride,
1443 /*numSlots=*/4);
1444 dst += 4 * dstStride * sizeof(float);
1445 src += 4 * srcStride * sizeof(float);
1446 numSlots -= 4;
1447 }
1448
1449 SkASSERT(numSlots <= 4);
1450
1451 if (numSlots > 0) {
1452 // If we are copying immutable data, it might be representable by a splat; this is
1453 // preferable, since splats are a tiny bit faster than regular copies.
1454 if (basePtr) {
1455 SkASSERT(srcStride == 1);
1456 int32_t* immutablePtr = reinterpret_cast<int32_t*>(basePtr + src);
1457 if (immutable_data_is_splattable(immutablePtr, numSlots)) {
1458 auto stage = (ProgramOp)((int)ProgramOp::copy_constant + numSlots - 1);
1460 ctx.dst = dst;
1461 ctx.value = *immutablePtr;
1462 pipeline->push_back({stage, SkRPCtxUtils::Pack(ctx, alloc)});
1463 return;
1464 }
1465 }
1466
1467 // We can't use a splat, so emit the requested copy op.
1468 auto stage = (ProgramOp)((int)baseStage + numSlots - 1);
1470 ctx.dst = dst;
1471 ctx.src = src;
1472 pipeline->push_back({stage, SkRPCtxUtils::Pack(ctx, alloc)});
1473 }
1474}
1475
1476void Program::appendCopySlotsUnmasked(TArray<Stage>* pipeline,
1477 SkArenaAlloc* alloc,
1478 SkRPOffset dst,
1479 SkRPOffset src,
1480 int numSlots) const {
1481 this->appendCopy(pipeline, alloc, /*basePtr=*/nullptr,
1482 ProgramOp::copy_slot_unmasked,
1485 numSlots);
1486}
1487
1488void Program::appendCopyImmutableUnmasked(TArray<Stage>* pipeline,
1489 SkArenaAlloc* alloc,
1490 std::byte* basePtr,
1491 SkRPOffset dst,
1492 SkRPOffset src,
1493 int numSlots) const {
1494 this->appendCopy(pipeline, alloc, basePtr,
1495 ProgramOp::copy_immutable_unmasked,
1497 src, 1,
1498 numSlots);
1499}
1500
1501void Program::appendCopySlotsMasked(TArray<Stage>* pipeline,
1502 SkArenaAlloc* alloc,
1503 SkRPOffset dst,
1504 SkRPOffset src,
1505 int numSlots) const {
1506 this->appendCopy(pipeline, alloc, /*basePtr=*/nullptr,
1507 ProgramOp::copy_slot_masked,
1510 numSlots);
1511}
1512
1513void Program::appendSingleSlotUnaryOp(TArray<Stage>* pipeline, ProgramOp stage,
1514 float* dst, int numSlots) const {
1515 SkASSERT(numSlots >= 0);
1516 while (numSlots--) {
1517 pipeline->push_back({stage, dst});
1519 }
1520}
1521
1522void Program::appendMultiSlotUnaryOp(TArray<Stage>* pipeline, ProgramOp baseStage,
1523 float* dst, int numSlots) const {
1524 SkASSERT(numSlots >= 0);
1525 while (numSlots > 0) {
1526 int currentSlots = std::min(numSlots, 4);
1527 auto stage = (ProgramOp)((int)baseStage + currentSlots - 1);
1528 pipeline->push_back({stage, dst});
1529
1531 numSlots -= 4;
1532 }
1533}
1534
1535void Program::appendImmediateBinaryOp(TArray<Stage>* pipeline, SkArenaAlloc* alloc,
1536 ProgramOp baseStage,
1537 SkRPOffset dst, int32_t value, int numSlots) const {
1538 SkASSERT(is_immediate_op((BuilderOp)baseStage));
1539 int slotsPerStage = is_multi_slot_immediate_op((BuilderOp)baseStage) ? 4 : 1;
1540
1542 ctx.dst = dst;
1543 ctx.value = value;
1544
1545 SkASSERT(numSlots >= 0);
1546 while (numSlots > 0) {
1547 int currentSlots = std::min(numSlots, slotsPerStage);
1548 auto stage = (ProgramOp)((int)baseStage - (currentSlots - 1));
1549 pipeline->push_back({stage, SkRPCtxUtils::Pack(ctx, alloc)});
1550
1551 ctx.dst += slotsPerStage * SkOpts::raster_pipeline_highp_stride * sizeof(float);
1552 numSlots -= slotsPerStage;
1553 }
1554}
1555
1556void Program::appendAdjacentNWayBinaryOp(TArray<Stage>* pipeline, SkArenaAlloc* alloc,
1557 ProgramOp stage,
1558 SkRPOffset dst, SkRPOffset src, int numSlots) const {
1559 // The source and destination must be directly next to one another.
1560 SkASSERT(numSlots >= 0);
1561 SkASSERT((dst + SkOpts::raster_pipeline_highp_stride * numSlots * sizeof(float)) == src);
1562
1563 if (numSlots > 0) {
1565 ctx.dst = dst;
1566 ctx.src = src;
1567 pipeline->push_back({stage, SkRPCtxUtils::Pack(ctx, alloc)});
1568 }
1569}
1570
1571void Program::appendAdjacentMultiSlotBinaryOp(TArray<Stage>* pipeline, SkArenaAlloc* alloc,
1572 ProgramOp baseStage, std::byte* basePtr,
1573 SkRPOffset dst, SkRPOffset src, int numSlots) const {
1574 // The source and destination must be directly next to one another.
1575 SkASSERT(numSlots >= 0);
1576 SkASSERT((dst + SkOpts::raster_pipeline_highp_stride * numSlots * sizeof(float)) == src);
1577
1578 if (numSlots > 4) {
1579 this->appendAdjacentNWayBinaryOp(pipeline, alloc, baseStage, dst, src, numSlots);
1580 return;
1581 }
1582 if (numSlots > 0) {
1583 auto specializedStage = (ProgramOp)((int)baseStage + numSlots);
1584 pipeline->push_back({specializedStage, basePtr + dst});
1585 }
1586}
1587
1588void Program::appendAdjacentNWayTernaryOp(TArray<Stage>* pipeline, SkArenaAlloc* alloc,
1589 ProgramOp stage, std::byte* basePtr, SkRPOffset dst,
1590 SkRPOffset src0, SkRPOffset src1, int numSlots) const {
1591 // The float pointers must all be immediately adjacent to each other.
1592 SkASSERT(numSlots >= 0);
1593 SkASSERT((dst + SkOpts::raster_pipeline_highp_stride * numSlots * sizeof(float)) == src0);
1594 SkASSERT((src0 + SkOpts::raster_pipeline_highp_stride * numSlots * sizeof(float)) == src1);
1595
1596 if (numSlots > 0) {
1598 ctx.dst = dst;
1599 ctx.delta = src0 - dst;
1600 pipeline->push_back({stage, SkRPCtxUtils::Pack(ctx, alloc)});
1601 }
1602}
1603
1604void Program::appendAdjacentMultiSlotTernaryOp(TArray<Stage>* pipeline, SkArenaAlloc* alloc,
1605 ProgramOp baseStage, std::byte* basePtr,
1606 SkRPOffset dst, SkRPOffset src0, SkRPOffset src1,
1607 int numSlots) const {
1608 // The float pointers must all be immediately adjacent to each other.
1609 SkASSERT(numSlots >= 0);
1610 SkASSERT((dst + SkOpts::raster_pipeline_highp_stride * numSlots * sizeof(float)) == src0);
1611 SkASSERT((src0 + SkOpts::raster_pipeline_highp_stride * numSlots * sizeof(float)) == src1);
1612
1613 if (numSlots > 4) {
1614 this->appendAdjacentNWayTernaryOp(pipeline, alloc, baseStage, basePtr,
1615 dst, src0, src1, numSlots);
1616 return;
1617 }
1618 if (numSlots > 0) {
1619 auto specializedStage = (ProgramOp)((int)baseStage + numSlots);
1620 pipeline->push_back({specializedStage, basePtr + dst});
1621 }
1622}
1623
1624void Program::appendStackRewind(TArray<Stage>* pipeline) const {
1625#if defined(SKSL_STANDALONE) || !SK_HAS_MUSTTAIL
1626 pipeline->push_back({ProgramOp::stack_rewind, nullptr});
1627#endif
1628}
1629
1630static void* context_bit_pun(intptr_t val) {
1631 return sk_bit_cast<void*>(val);
1632}
1633
1634Program::SlotData Program::allocateSlotData(SkArenaAlloc* alloc) const {
1635 // Allocate a contiguous slab of slot data for immutables, values, and stack entries.
1637 const int scalarWidth = 1 * sizeof(float);
1638 const int vectorWidth = N * sizeof(float);
1639 const int allocSize = vectorWidth * (fNumValueSlots + fNumTempStackSlots) +
1640 scalarWidth * fNumImmutableSlots;
1641 float* slotPtr = static_cast<float*>(alloc->makeBytesAlignedTo(allocSize, vectorWidth));
1642 sk_bzero(slotPtr, allocSize);
1643
1644 // Store the temp stack immediately after the values, and immutable data after the stack.
1645 SlotData s;
1646 s.values = SkSpan{slotPtr, N * fNumValueSlots};
1647 s.stack = SkSpan{s.values.end(), N * fNumTempStackSlots};
1648 s.immutable = SkSpan{s.stack.end(), 1 * fNumImmutableSlots};
1649 return s;
1650}
1651
1653 SkArenaAlloc* alloc,
1654 RP::Callbacks* callbacks,
1655 SkSpan<const float> uniforms) const {
1656#if defined(SKSL_STANDALONE)
1657 return false;
1658#else
1659 // Convert our Instruction list to an array of ProgramOps.
1660 TArray<Stage> stages;
1661 SlotData slotData = this->allocateSlotData(alloc);
1662 this->makeStages(&stages, alloc, uniforms, slotData);
1663
1664 // Allocate buffers for branch targets and labels; these are needed to convert labels into
1665 // actual offsets into the pipeline and fix up branches.
1667 branchContexts.reserve_exact(fNumLabels);
1668 TArray<int> labelOffsets;
1669 labelOffsets.push_back_n(fNumLabels, -1);
1670 TArray<int> branchGoesToLabel;
1671 branchGoesToLabel.reserve_exact(fNumLabels);
1672
1673 auto resetBasePointer = [&]() {
1674 // Whenever we hand off control to another shader, we have to assume that it might overwrite
1675 // the base pointer (if it uses SkSL, it will!), so we reset it on return.
1676 pipeline->append(SkRasterPipelineOp::set_base_pointer, slotData.values.data());
1677 };
1678
1679 resetBasePointer();
1680
1681 for (const Stage& stage : stages) {
1682 switch (stage.op) {
1683 case ProgramOp::stack_rewind:
1684 pipeline->appendStackRewind();
1685 break;
1686
1687 case ProgramOp::invoke_shader:
1688 if (!callbacks || !callbacks->appendShader(sk_bit_cast<intptr_t>(stage.ctx))) {
1689 return false;
1690 }
1691 resetBasePointer();
1692 break;
1693
1694 case ProgramOp::invoke_color_filter:
1695 if (!callbacks || !callbacks->appendColorFilter(sk_bit_cast<intptr_t>(stage.ctx))) {
1696 return false;
1697 }
1698 resetBasePointer();
1699 break;
1700
1701 case ProgramOp::invoke_blender:
1702 if (!callbacks || !callbacks->appendBlender(sk_bit_cast<intptr_t>(stage.ctx))) {
1703 return false;
1704 }
1705 resetBasePointer();
1706 break;
1707
1708 case ProgramOp::invoke_to_linear_srgb:
1709 if (!callbacks) {
1710 return false;
1711 }
1712 callbacks->toLinearSrgb(stage.ctx);
1713 // A ColorSpaceXform shouldn't ever alter the base pointer, so we don't need to call
1714 // resetBasePointer here.
1715 break;
1716
1717 case ProgramOp::invoke_from_linear_srgb:
1718 if (!callbacks) {
1719 return false;
1720 }
1721 callbacks->fromLinearSrgb(stage.ctx);
1722 // A ColorSpaceXform shouldn't ever alter the base pointer, so we don't need to call
1723 // resetBasePointer here.
1724 break;
1725
1726 case ProgramOp::label: {
1727 // Remember the absolute pipeline position of this label.
1728 int labelID = sk_bit_cast<intptr_t>(stage.ctx);
1729 SkASSERT(labelID >= 0 && labelID < fNumLabels);
1730 labelOffsets[labelID] = pipeline->getNumStages();
1731 break;
1732 }
1733 case ProgramOp::jump:
1734 case ProgramOp::branch_if_all_lanes_active:
1735 case ProgramOp::branch_if_any_lanes_active:
1736 case ProgramOp::branch_if_no_lanes_active:
1737 case ProgramOp::branch_if_no_active_lanes_eq: {
1738 // The branch context contain a valid label ID at this point.
1739 auto* branchCtx = static_cast<SkRasterPipeline_BranchCtx*>(stage.ctx);
1740 int labelID = branchCtx->offset;
1741 SkASSERT(labelID >= 0 && labelID < fNumLabels);
1742
1743 // Replace the label ID in the branch context with the absolute pipeline position.
1744 // We will go back over the branch targets at the end and fix them up.
1745 branchCtx->offset = pipeline->getNumStages();
1746
1747 SkASSERT(branchContexts.size() == branchGoesToLabel.size());
1748 branchContexts.push_back(branchCtx);
1749 branchGoesToLabel.push_back(labelID);
1750 [[fallthrough]];
1751 }
1752 default:
1753 // Append a regular op to the program.
1754 SkASSERT((int)stage.op < kNumRasterPipelineHighpOps);
1755 pipeline->append((SkRasterPipelineOp)stage.op, stage.ctx);
1756 break;
1757 }
1758 }
1759
1760 // Now that we have assembled the program and know the pipeline positions of each label and
1761 // branch, fix up every branch target.
1762 SkASSERT(branchContexts.size() == branchGoesToLabel.size());
1763 for (int index = 0; index < branchContexts.size(); ++index) {
1764 int branchFromIdx = branchContexts[index]->offset;
1765 int branchToIdx = labelOffsets[branchGoesToLabel[index]];
1766 branchContexts[index]->offset = branchToIdx - branchFromIdx;
1767 }
1768
1769 return true;
1770#endif
1771}
1772
1773void Program::makeStages(TArray<Stage>* pipeline,
1774 SkArenaAlloc* alloc,
1775 SkSpan<const float> uniforms,
1776 const SlotData& slots) const {
1777 SkASSERT(fNumUniformSlots == SkToInt(uniforms.size()));
1778
1780 int mostRecentRewind = 0;
1781
1782 // Assemble a map holding the current stack-top for each temporary stack. Position each temp
1783 // stack immediately after the previous temp stack; temp stacks are never allowed to overlap.
1784 int pos = 0;
1785 TArray<float*> tempStackMap;
1786 tempStackMap.resize(fTempStackMaxDepths.size());
1787 for (int idx = 0; idx < fTempStackMaxDepths.size(); ++idx) {
1788 tempStackMap[idx] = slots.stack.begin() + (pos * N);
1789 pos += fTempStackMaxDepths[idx];
1790 }
1791
1792 // Track labels that we have reached in processing.
1793 SkBitSet labelsEncountered(fNumLabels);
1794
1795 auto EmitStackRewindForBackwardsBranch = [&](int labelID) {
1796 // If we have already encountered the label associated with this branch, this is a
1797 // backwards branch. Add a stack-rewind immediately before the branch to ensure that
1798 // long-running loops don't use an unbounded amount of stack space.
1799 if (labelsEncountered.test(labelID)) {
1800 this->appendStackRewind(pipeline);
1801 mostRecentRewind = pipeline->size();
1802 }
1803 };
1804
1805 auto* const basePtr = (std::byte*)slots.values.data();
1806 auto OffsetFromBase = [&](const void* ptr) -> SkRPOffset {
1807 return (SkRPOffset)((const std::byte*)ptr - basePtr);
1808 };
1809
1810 // Copy all immutable values into the immutable slots.
1811 for (const Instruction& inst : fInstructions) {
1813 slots.immutable[inst.fSlotA] = sk_bit_cast<float>(inst.fImmA);
1814 }
1815 }
1816
1817 // Write each BuilderOp to the pipeline array.
1818 pipeline->reserve_exact(pipeline->size() + fInstructions.size());
1819 for (const Instruction& inst : fInstructions) {
1820 auto ImmutableA = [&]() { return &slots.immutable[1 * inst.fSlotA]; };
1821 auto ImmutableB = [&]() { return &slots.immutable[1 * inst.fSlotB]; };
1822 auto SlotA = [&]() { return &slots.values[N * inst.fSlotA]; };
1823 auto SlotB = [&]() { return &slots.values[N * inst.fSlotB]; };
1824 auto UniformA = [&]() { return &uniforms[inst.fSlotA]; };
1825 auto AllocTraceContext = [&](auto* ctx) {
1826 // We pass `ctx` solely for its type; the value is unused.
1827 using ContextType = typename std::remove_reference<decltype(*ctx)>::type;
1828 ctx = alloc->make<ContextType>();
1829 ctx->traceMask = reinterpret_cast<int*>(tempStackMap[inst.fImmA] - N);
1830 ctx->traceHook = fTraceHook.get();
1831 return ctx;
1832 };
1833 float*& tempStackPtr = tempStackMap[inst.fStackID];
1834
1835 switch (inst.fOp) {
1836 case BuilderOp::label:
1837 SkASSERT(inst.fImmA >= 0 && inst.fImmA < fNumLabels);
1838 labelsEncountered.set(inst.fImmA);
1839 pipeline->push_back({ProgramOp::label, context_bit_pun(inst.fImmA)});
1840 break;
1841
1842 case BuilderOp::jump:
1843 case BuilderOp::branch_if_any_lanes_active:
1844 case BuilderOp::branch_if_no_lanes_active: {
1845 SkASSERT(inst.fImmA >= 0 && inst.fImmA < fNumLabels);
1846 EmitStackRewindForBackwardsBranch(inst.fImmA);
1847
1848 auto* ctx = alloc->make<SkRasterPipeline_BranchCtx>();
1849 ctx->offset = inst.fImmA;
1850 pipeline->push_back({(ProgramOp)inst.fOp, ctx});
1851 break;
1852 }
1853 case BuilderOp::branch_if_all_lanes_active: {
1854 SkASSERT(inst.fImmA >= 0 && inst.fImmA < fNumLabels);
1855 EmitStackRewindForBackwardsBranch(inst.fImmA);
1856
1858 ctx->offset = inst.fImmA;
1859 pipeline->push_back({ProgramOp::branch_if_all_lanes_active, ctx});
1860 break;
1861 }
1863 SkASSERT(inst.fImmA >= 0 && inst.fImmA < fNumLabels);
1864 EmitStackRewindForBackwardsBranch(inst.fImmA);
1865
1866 auto* ctx = alloc->make<SkRasterPipeline_BranchIfEqualCtx>();
1867 ctx->offset = inst.fImmA;
1868 ctx->value = inst.fImmB;
1869 ctx->ptr = reinterpret_cast<int*>(tempStackPtr - N);
1870 pipeline->push_back({ProgramOp::branch_if_no_active_lanes_eq, ctx});
1871 break;
1872 }
1873 case BuilderOp::init_lane_masks: {
1874 auto* ctx = alloc->make<SkRasterPipeline_InitLaneMasksCtx>();
1875 pipeline->push_back({ProgramOp::init_lane_masks, ctx});
1876 break;
1877 }
1878 case BuilderOp::store_src_rg:
1879 pipeline->push_back({ProgramOp::store_src_rg, SlotA()});
1880 break;
1881
1882 case BuilderOp::store_src:
1883 pipeline->push_back({ProgramOp::store_src, SlotA()});
1884 break;
1885
1886 case BuilderOp::store_dst:
1887 pipeline->push_back({ProgramOp::store_dst, SlotA()});
1888 break;
1889
1890 case BuilderOp::store_device_xy01:
1891 pipeline->push_back({ProgramOp::store_device_xy01, SlotA()});
1892 break;
1893
1895 // The immutable slots were populated in an earlier pass.
1896 break;
1897
1898 case BuilderOp::load_src:
1899 pipeline->push_back({ProgramOp::load_src, SlotA()});
1900 break;
1901
1902 case BuilderOp::load_dst:
1903 pipeline->push_back({ProgramOp::load_dst, SlotA()});
1904 break;
1905
1907 float* dst = tempStackPtr - (inst.fImmA * N);
1908 this->appendSingleSlotUnaryOp(pipeline, (ProgramOp)inst.fOp, dst, inst.fImmA);
1909 break;
1910 }
1912 float* dst = tempStackPtr - (inst.fImmA * N);
1913 this->appendMultiSlotUnaryOp(pipeline, (ProgramOp)inst.fOp, dst, inst.fImmA);
1914 break;
1915 }
1917 float* dst = (inst.fSlotA == NA) ? tempStackPtr - (inst.fImmA * N)
1918 : SlotA();
1919
1920 this->appendImmediateBinaryOp(pipeline, alloc, (ProgramOp)inst.fOp,
1921 OffsetFromBase(dst), inst.fImmB, inst.fImmA);
1922 break;
1923 }
1925 float* src = tempStackPtr - (inst.fImmA * N);
1926 float* dst = tempStackPtr - (inst.fImmA * 2 * N);
1927 this->appendAdjacentNWayBinaryOp(pipeline, alloc, (ProgramOp)inst.fOp,
1928 OffsetFromBase(dst), OffsetFromBase(src),
1929 inst.fImmA);
1930 break;
1931 }
1933 float* src = tempStackPtr - (inst.fImmA * N);
1934 float* dst = tempStackPtr - (inst.fImmA * 2 * N);
1935 this->appendAdjacentMultiSlotBinaryOp(pipeline, alloc, (ProgramOp)inst.fOp,
1936 basePtr,
1937 OffsetFromBase(dst),
1938 OffsetFromBase(src),
1939 inst.fImmA);
1940 break;
1941 }
1943 float* src1 = tempStackPtr - (inst.fImmA * N);
1944 float* src0 = tempStackPtr - (inst.fImmA * 2 * N);
1945 float* dst = tempStackPtr - (inst.fImmA * 3 * N);
1946 this->appendAdjacentNWayTernaryOp(pipeline, alloc, (ProgramOp)inst.fOp, basePtr,
1947 OffsetFromBase(dst),
1948 OffsetFromBase(src0),
1949 OffsetFromBase(src1),
1950 inst.fImmA);
1951 break;
1952 }
1954 float* src1 = tempStackPtr - (inst.fImmA * N);
1955 float* src0 = tempStackPtr - (inst.fImmA * 2 * N);
1956 float* dst = tempStackPtr - (inst.fImmA * 3 * N);
1957 this->appendAdjacentMultiSlotTernaryOp(pipeline, alloc,(ProgramOp)inst.fOp, basePtr,
1958 OffsetFromBase(dst),
1959 OffsetFromBase(src0),
1960 OffsetFromBase(src1),
1961 inst.fImmA);
1962 break;
1963 }
1964 case BuilderOp::select: {
1965 float* src = tempStackPtr - (inst.fImmA * N);
1966 float* dst = tempStackPtr - (inst.fImmA * 2 * N);
1967 this->appendCopySlotsMasked(pipeline, alloc,
1968 OffsetFromBase(dst),
1969 OffsetFromBase(src),
1970 inst.fImmA);
1971 break;
1972 }
1973 case BuilderOp::copy_slot_masked:
1974 this->appendCopySlotsMasked(pipeline, alloc,
1975 OffsetFromBase(SlotA()),
1976 OffsetFromBase(SlotB()),
1977 inst.fImmA);
1978 break;
1979
1980 case BuilderOp::copy_slot_unmasked:
1981 this->appendCopySlotsUnmasked(pipeline, alloc,
1982 OffsetFromBase(SlotA()),
1983 OffsetFromBase(SlotB()),
1984 inst.fImmA);
1985 break;
1986
1987 case BuilderOp::copy_immutable_unmasked:
1988 this->appendCopyImmutableUnmasked(pipeline, alloc, basePtr,
1989 OffsetFromBase(SlotA()),
1990 OffsetFromBase(ImmutableB()),
1991 inst.fImmA);
1992 break;
1993
1994 case BuilderOp::refract_4_floats: {
1995 float* dst = tempStackPtr - (9 * N);
1996 pipeline->push_back({ProgramOp::refract_4_floats, dst});
1997 break;
1998 }
1999 case BuilderOp::inverse_mat2:
2000 case BuilderOp::inverse_mat3:
2001 case BuilderOp::inverse_mat4: {
2002 float* dst = tempStackPtr - (inst.fImmA * N);
2003 pipeline->push_back({(ProgramOp)inst.fOp, dst});
2004 break;
2005 }
2006 case BuilderOp::dot_2_floats:
2007 case BuilderOp::dot_3_floats:
2008 case BuilderOp::dot_4_floats: {
2009 float* dst = tempStackPtr - (inst.fImmA * 2 * N);
2010 pipeline->push_back({(ProgramOp)inst.fOp, dst});
2011 break;
2012 }
2013 case BuilderOp::swizzle_1: {
2014 // A single-component swizzle just copies a slot and shrinks the stack; we can
2015 // slightly improve codegen by making that simplification here.
2016 int offset = inst.fImmB;
2017 SkASSERT(offset >= 0 && offset <= 15);
2018 float* dst = tempStackPtr - (inst.fImmA * N);
2019 float* src = dst + (offset * N);
2020 if (src != dst) {
2021 this->appendCopySlotsUnmasked(pipeline, alloc,
2022 OffsetFromBase(dst),
2023 OffsetFromBase(src),
2024 /*numSlots=*/1);
2025 }
2026 break;
2027 }
2028 case BuilderOp::swizzle_2:
2029 case BuilderOp::swizzle_3:
2030 case BuilderOp::swizzle_4: {
2032 ctx.dst = OffsetFromBase(tempStackPtr - (N * inst.fImmA));
2033 // Unpack component nybbles into byte-offsets pointing at stack slots.
2035 pipeline->push_back({(ProgramOp)inst.fOp, SkRPCtxUtils::Pack(ctx, alloc)});
2036 break;
2037 }
2038 case BuilderOp::shuffle: {
2039 int consumed = inst.fImmA;
2040 int generated = inst.fImmB;
2041
2042 auto* ctx = alloc->make<SkRasterPipeline_ShuffleCtx>();
2043 ctx->ptr = reinterpret_cast<int32_t*>(tempStackPtr) - (N * consumed);
2044 ctx->count = generated;
2045 // Unpack immB and immC from nybble form into the offset array.
2046 unpack_nybbles_to_offsets(inst.fImmC, SkSpan(&ctx->offsets[0], 8));
2047 unpack_nybbles_to_offsets(inst.fImmD, SkSpan(&ctx->offsets[8], 8));
2048 pipeline->push_back({ProgramOp::shuffle, ctx});
2049 break;
2050 }
2051 case BuilderOp::matrix_multiply_2:
2052 case BuilderOp::matrix_multiply_3:
2053 case BuilderOp::matrix_multiply_4: {
2054 int consumed = (inst.fImmB * inst.fImmC) + // result
2055 (inst.fImmA * inst.fImmB) + // left-matrix
2056 (inst.fImmC * inst.fImmD); // right-matrix
2057
2059 ctx.dst = OffsetFromBase(tempStackPtr - (N * consumed));
2060 ctx.leftColumns = inst.fImmA;
2061 ctx.leftRows = inst.fImmB;
2062 ctx.rightColumns = inst.fImmC;
2063 ctx.rightRows = inst.fImmD;
2064 pipeline->push_back({(ProgramOp)inst.fOp, SkRPCtxUtils::Pack(ctx, alloc)});
2065 break;
2066 }
2067 case BuilderOp::exchange_src: {
2068 float* dst = tempStackPtr - (4 * N);
2069 pipeline->push_back({ProgramOp::exchange_src, dst});
2070 break;
2071 }
2073 float* dst = tempStackPtr;
2074 pipeline->push_back({ProgramOp::store_src, dst});
2075 break;
2076 }
2078 float* dst = tempStackPtr;
2079 pipeline->push_back({ProgramOp::store_dst, dst});
2080 break;
2081 }
2083 float* dst = tempStackPtr;
2084 pipeline->push_back({ProgramOp::store_device_xy01, dst});
2085 break;
2086 }
2088 float* src = tempStackPtr - (4 * N);
2089 pipeline->push_back({ProgramOp::load_src, src});
2090 break;
2091 }
2093 float* src = tempStackPtr - (4 * N);
2094 pipeline->push_back({ProgramOp::load_dst, src});
2095 break;
2096 }
2097 case BuilderOp::push_slots: {
2098 float* dst = tempStackPtr;
2099 this->appendCopySlotsUnmasked(pipeline, alloc,
2100 OffsetFromBase(dst),
2101 OffsetFromBase(SlotA()),
2102 inst.fImmA);
2103 break;
2104 }
2106 float* dst = tempStackPtr;
2107 this->appendCopyImmutableUnmasked(pipeline, alloc, basePtr,
2108 OffsetFromBase(dst),
2109 OffsetFromBase(ImmutableA()),
2110 inst.fImmA);
2111 break;
2112 }
2117 // SlotA: fixed-range start
2118 // SlotB: limit-range end
2119 // immA: number of slots to copy
2120 // immB: dynamic stack ID
2121 ProgramOp op;
2122 auto* ctx = alloc->make<SkRasterPipeline_CopyIndirectCtx>();
2123 ctx->indirectOffset =
2124 reinterpret_cast<const uint32_t*>(tempStackMap[inst.fImmB]) - (1 * N);
2125 ctx->indirectLimit = inst.fSlotB - inst.fSlotA - inst.fImmA;
2126 ctx->slots = inst.fImmA;
2128 op = ProgramOp::copy_from_indirect_unmasked;
2129 ctx->src = reinterpret_cast<const int32_t*>(SlotA());
2130 ctx->dst = reinterpret_cast<int32_t*>(tempStackPtr);
2131 } else if (inst.fOp == BuilderOp::push_immutable_indirect) {
2132 // We reuse the indirect-uniform op for indirect copies of immutable data.
2133 op = ProgramOp::copy_from_indirect_uniform_unmasked;
2134 ctx->src = reinterpret_cast<const int32_t*>(ImmutableA());
2135 ctx->dst = reinterpret_cast<int32_t*>(tempStackPtr);
2136 } else if (inst.fOp == BuilderOp::push_uniform_indirect) {
2137 op = ProgramOp::copy_from_indirect_uniform_unmasked;
2138 ctx->src = reinterpret_cast<const int32_t*>(UniformA());
2139 ctx->dst = reinterpret_cast<int32_t*>(tempStackPtr);
2140 } else {
2141 op = ProgramOp::copy_to_indirect_masked;
2142 ctx->src = reinterpret_cast<const int32_t*>(tempStackPtr) - (ctx->slots * N);
2143 ctx->dst = reinterpret_cast<int32_t*>(SlotA());
2144 }
2145 pipeline->push_back({op, ctx});
2146 break;
2147 }
2150 const float* src = UniformA();
2151 float* dst = (inst.fOp == BuilderOp::push_uniform) ? tempStackPtr : SlotB();
2152
2153 for (int remaining = inst.fImmA; remaining > 0; remaining -= 4) {
2154 auto ctx = alloc->make<SkRasterPipeline_UniformCtx>();
2155 ctx->dst = reinterpret_cast<int32_t*>(dst);
2156 ctx->src = reinterpret_cast<const int32_t*>(src);
2157 switch (remaining) {
2158 case 1: pipeline->push_back({ProgramOp::copy_uniform, ctx}); break;
2159 case 2: pipeline->push_back({ProgramOp::copy_2_uniforms, ctx}); break;
2160 case 3: pipeline->push_back({ProgramOp::copy_3_uniforms, ctx}); break;
2161 default: pipeline->push_back({ProgramOp::copy_4_uniforms, ctx}); break;
2162 }
2163 dst += 4 * N;
2164 src += 4;
2165 }
2166 break;
2167 }
2169 float* dst = tempStackPtr;
2170 pipeline->push_back({ProgramOp::store_condition_mask, dst});
2171 break;
2172 }
2174 float* src = tempStackPtr - (1 * N);
2175 pipeline->push_back({ProgramOp::load_condition_mask, src});
2176 break;
2177 }
2178 case BuilderOp::merge_condition_mask:
2179 case BuilderOp::merge_inv_condition_mask: {
2180 float* ptr = tempStackPtr - (2 * N);
2181 pipeline->push_back({(ProgramOp)inst.fOp, ptr});
2182 break;
2183 }
2185 float* dst = tempStackPtr;
2186 pipeline->push_back({ProgramOp::store_loop_mask, dst});
2187 break;
2188 }
2190 float* src = tempStackPtr - (1 * N);
2191 pipeline->push_back({ProgramOp::load_loop_mask, src});
2192 break;
2193 }
2195 float* src = tempStackPtr - (1 * N);
2196 pipeline->push_back({ProgramOp::reenable_loop_mask, src});
2197 break;
2198 }
2199 case BuilderOp::reenable_loop_mask:
2200 pipeline->push_back({ProgramOp::reenable_loop_mask, SlotA()});
2201 break;
2202
2203 case BuilderOp::mask_off_loop_mask:
2204 pipeline->push_back({ProgramOp::mask_off_loop_mask, nullptr});
2205 break;
2206
2207 case BuilderOp::merge_loop_mask: {
2208 float* src = tempStackPtr - (1 * N);
2209 pipeline->push_back({ProgramOp::merge_loop_mask, src});
2210 break;
2211 }
2213 float* dst = tempStackPtr;
2214 pipeline->push_back({ProgramOp::store_return_mask, dst});
2215 break;
2216 }
2218 float* src = tempStackPtr - (1 * N);
2219 pipeline->push_back({ProgramOp::load_return_mask, src});
2220 break;
2221 }
2222 case BuilderOp::mask_off_return_mask:
2223 pipeline->push_back({ProgramOp::mask_off_return_mask, nullptr});
2224 break;
2225
2226 case BuilderOp::copy_constant:
2228 float* dst = (inst.fOp == BuilderOp::copy_constant) ? SlotA() : tempStackPtr;
2229 // Splat constant values onto the stack.
2230 for (int remaining = inst.fImmA; remaining > 0; remaining -= 4) {
2232 ctx.dst = OffsetFromBase(dst);
2233 ctx.value = inst.fImmB;
2234 void* ptr = SkRPCtxUtils::Pack(ctx, alloc);
2235 switch (remaining) {
2236 case 1: pipeline->push_back({ProgramOp::copy_constant, ptr}); break;
2237 case 2: pipeline->push_back({ProgramOp::splat_2_constants, ptr}); break;
2238 case 3: pipeline->push_back({ProgramOp::splat_3_constants, ptr}); break;
2239 default: pipeline->push_back({ProgramOp::splat_4_constants, ptr}); break;
2240 }
2241 dst += 4 * N;
2242 }
2243 break;
2244 }
2246 float* src = tempStackPtr - (inst.fImmB * N);
2247 this->appendCopySlotsMasked(pipeline, alloc,
2248 OffsetFromBase(SlotA()),
2249 OffsetFromBase(src),
2250 inst.fImmA);
2251 break;
2252 }
2254 float* src = tempStackPtr - (inst.fImmB * N);
2255 this->appendCopySlotsUnmasked(pipeline, alloc,
2256 OffsetFromBase(SlotA()),
2257 OffsetFromBase(src),
2258 inst.fImmA);
2259 break;
2260 }
2262 // SlotA: fixed-range start
2263 // immA: number of swizzle components
2264 // immB: swizzle components
2265 // immC: offset from stack top
2266 auto stage = (ProgramOp)((int)ProgramOp::swizzle_copy_slot_masked + inst.fImmA - 1);
2267 auto* ctx = alloc->make<SkRasterPipeline_SwizzleCopyCtx>();
2268 ctx->src = reinterpret_cast<const int32_t*>(tempStackPtr) - (inst.fImmC * N);
2269 ctx->dst = reinterpret_cast<int32_t*>(SlotA());
2270 unpack_nybbles_to_offsets(inst.fImmB, SkSpan(ctx->offsets));
2271 pipeline->push_back({stage, ctx});
2272 break;
2273 }
2274 case BuilderOp::push_clone: {
2275 float* src = tempStackPtr - (inst.fImmB * N);
2276 float* dst = tempStackPtr;
2277 this->appendCopySlotsUnmasked(pipeline, alloc,
2278 OffsetFromBase(dst),
2279 OffsetFromBase(src),
2280 inst.fImmA);
2281 break;
2282 }
2284 // immA: number of slots
2285 // immB: other stack ID
2286 // immC: offset from stack top
2287 float* sourceStackPtr = tempStackMap[inst.fImmB];
2288 float* src = sourceStackPtr - (inst.fImmC * N);
2289 float* dst = tempStackPtr;
2290 this->appendCopySlotsUnmasked(pipeline, alloc,
2291 OffsetFromBase(dst),
2292 OffsetFromBase(src),
2293 inst.fImmA);
2294 break;
2295 }
2297 // immA: number of slots
2298 // immB: other stack ID
2299 // immC: offset from stack top
2300 // immD: dynamic stack ID
2301 float* sourceStackPtr = tempStackMap[inst.fImmB];
2302
2303 auto* ctx = alloc->make<SkRasterPipeline_CopyIndirectCtx>();
2304 ctx->dst = reinterpret_cast<int32_t*>(tempStackPtr);
2305 ctx->src = reinterpret_cast<const int32_t*>(sourceStackPtr) - (inst.fImmC * N);
2306 ctx->indirectOffset =
2307 reinterpret_cast<const uint32_t*>(tempStackMap[inst.fImmD]) - (1 * N);
2308 ctx->indirectLimit = inst.fImmC - inst.fImmA;
2309 ctx->slots = inst.fImmA;
2310 pipeline->push_back({ProgramOp::copy_from_indirect_unmasked, ctx});
2311 break;
2312 }
2314 // SlotA: fixed-range start
2315 // SlotB: limit-range end
2316 // immA: number of swizzle components
2317 // immB: swizzle components
2318 // immC: offset from stack top
2319 // immD: dynamic stack ID
2320 auto* ctx = alloc->make<SkRasterPipeline_SwizzleCopyIndirectCtx>();
2321 ctx->src = reinterpret_cast<const int32_t*>(tempStackPtr) - (inst.fImmC * N);
2322 ctx->dst = reinterpret_cast<int32_t*>(SlotA());
2323 ctx->indirectOffset =
2324 reinterpret_cast<const uint32_t*>(tempStackMap[inst.fImmD]) - (1 * N);
2325 ctx->indirectLimit =
2326 inst.fSlotB - inst.fSlotA - (max_packed_nybble(inst.fImmB, inst.fImmA) + 1);
2327 ctx->slots = inst.fImmA;
2328 unpack_nybbles_to_offsets(inst.fImmB, SkSpan(ctx->offsets));
2329 pipeline->push_back({ProgramOp::swizzle_copy_to_indirect_masked, ctx});
2330 break;
2331 }
2332 case BuilderOp::case_op: {
2334 ctx.expectedValue = inst.fImmA;
2335 ctx.offset = OffsetFromBase(tempStackPtr - (2 * N));
2336 pipeline->push_back({ProgramOp::case_op, SkRPCtxUtils::Pack(ctx, alloc)});
2337 break;
2338 }
2339 case BuilderOp::continue_op:
2340 pipeline->push_back({ProgramOp::continue_op, tempStackMap[inst.fImmA] - (1 * N)});
2341 break;
2342
2345 break;
2346
2347 case BuilderOp::invoke_shader:
2348 case BuilderOp::invoke_color_filter:
2349 case BuilderOp::invoke_blender:
2350 pipeline->push_back({(ProgramOp)inst.fOp, context_bit_pun(inst.fImmA)});
2351 break;
2352
2353 case BuilderOp::invoke_to_linear_srgb:
2354 case BuilderOp::invoke_from_linear_srgb:
2355 pipeline->push_back({(ProgramOp)inst.fOp, tempStackMap[inst.fImmA] - (4 * N)});
2356 break;
2357
2358 case BuilderOp::trace_line: {
2359 auto* ctx = AllocTraceContext((SkRasterPipeline_TraceLineCtx*)nullptr);
2360 ctx->lineNumber = inst.fImmB;
2361 pipeline->push_back({ProgramOp::trace_line, ctx});
2362 break;
2363 }
2364 case BuilderOp::trace_scope: {
2365 auto* ctx = AllocTraceContext((SkRasterPipeline_TraceScopeCtx*)nullptr);
2366 ctx->delta = inst.fImmB;
2367 pipeline->push_back({ProgramOp::trace_scope, ctx});
2368 break;
2369 }
2370 case BuilderOp::trace_enter:
2371 case BuilderOp::trace_exit: {
2372 auto* ctx = AllocTraceContext((SkRasterPipeline_TraceFuncCtx*)nullptr);
2373 ctx->funcIdx = inst.fImmB;
2374 pipeline->push_back({(ProgramOp)inst.fOp, ctx});
2375 break;
2376 }
2377 case BuilderOp::trace_var:
2379 // SlotA: fixed-range start
2380 // SlotB: limit-range end
2381 // immA: trace-mask stack ID
2382 // immB: number of slots
2383 // immC: dynamic stack ID
2384 auto* ctx = AllocTraceContext((SkRasterPipeline_TraceVarCtx*)nullptr);
2385 ctx->slotIdx = inst.fSlotA;
2386 ctx->numSlots = inst.fImmB;
2387 ctx->data = reinterpret_cast<int*>(SlotA());
2389 ctx->indirectOffset =
2390 reinterpret_cast<const uint32_t*>(tempStackMap[inst.fImmC]) - (1 * N);
2391 ctx->indirectLimit = inst.fSlotB - inst.fSlotA - inst.fImmB;
2392 } else {
2393 ctx->indirectOffset = nullptr;
2394 ctx->indirectLimit = 0;
2395 }
2396 pipeline->push_back({ProgramOp::trace_var, ctx});
2397 break;
2398 }
2399 default:
2400 SkDEBUGFAILF("Raster Pipeline: unsupported instruction %d", (int)inst.fOp);
2401 break;
2402 }
2403
2404 int stackUsage = stack_usage(inst);
2405 if (stackUsage != 0) {
2406 tempStackPtr += stackUsage * N;
2407 SkASSERT(tempStackPtr >= slots.stack.begin());
2408 SkASSERT(tempStackPtr <= slots.stack.end());
2409 }
2410
2411 // Periodically rewind the stack every 500 instructions. When SK_HAS_MUSTTAIL is set,
2412 // rewinds are not actually used; the appendStackRewind call becomes a no-op. On platforms
2413 // that don't support SK_HAS_MUSTTAIL, rewinding the stack periodically can prevent a
2414 // potential stack overflow when running a long program.
2415 int numPipelineStages = pipeline->size();
2416 if (numPipelineStages - mostRecentRewind > 500) {
2417 this->appendStackRewind(pipeline);
2418 mostRecentRewind = numPipelineStages;
2419 }
2420 }
2421}
2422
2424public:
2425 Dumper(const Program& p) : fProgram(p) {}
2426
2427 void dump(SkWStream* out, bool writeInstructionCount);
2428
2429 // Finds the labels in the program, and keeps track of their offsets.
2431 for (int index = 0; index < fStages.size(); ++index) {
2432 if (fStages[index].op == ProgramOp::label) {
2433 int labelID = sk_bit_cast<intptr_t>(fStages[index].ctx);
2434 SkASSERT(!fLabelToStageMap.find(labelID));
2435 fLabelToStageMap[labelID] = index;
2436 }
2437 }
2438 }
2439
2440 // Assign unique names to each variable slot; our trace might have multiple variables with the
2441 // same name, which can make a dump hard to read. We disambiguate them with subscripts.
2443 if (fProgram.fDebugTrace) {
2444 fSlotNameList.reserve_exact(fProgram.fDebugTrace->fSlotInfo.size());
2445
2446 // The map consists of <variable name, <source position, unique name>>.
2448
2449 for (const SlotDebugInfo& slotInfo : fProgram.fDebugTrace->fSlotInfo) {
2450 // Look up this variable by its name and source position.
2451 int pos = slotInfo.pos.valid() ? slotInfo.pos.startOffset() : 0;
2452 THashMap<int, std::string>& positionMap = uniqueNameMap[slotInfo.name];
2453 std::string& uniqueName = positionMap[pos];
2454
2455 // Have we seen this variable name/position combination before?
2456 if (uniqueName.empty()) {
2457 // This is a unique name/position pair.
2458 uniqueName = slotInfo.name;
2459
2460 // But if it's not a unique _name_, it deserves a subscript to disambiguate it.
2461 int subscript = positionMap.count() - 1;
2462 if (subscript > 0) {
2463 for (char digit : std::to_string(subscript)) {
2464 // U+2080 through U+2089 (₀₁₂₃₄₅₆₇₈₉) in UTF8:
2465 uniqueName.push_back((char)0xE2);
2466 uniqueName.push_back((char)0x82);
2467 uniqueName.push_back((char)(0x80 + digit - '0'));
2468 }
2469 }
2470 }
2471
2472 fSlotNameList.push_back(uniqueName);
2473 }
2474 }
2475 }
2476
2477 // Interprets the context value as a branch offset.
2478 std::string branchOffset(const SkRasterPipeline_BranchCtx* ctx, int index) const {
2479 // The context's offset field contains a label ID
2480 int labelID = ctx->offset;
2481 const int* targetIndex = fLabelToStageMap.find(labelID);
2482 SkASSERT(targetIndex);
2483 return SkSL::String::printf("%+d (label %d at #%d)", *targetIndex - index, labelID,
2484 *targetIndex + 1);
2485 }
2486
2487 // Prints a 32-bit immediate value of unknown type (int/float).
2488 std::string imm(float immFloat, bool showAsFloat = true) const {
2489 // Special case exact zero as "0" for readability (vs `0x00000000 (0.0)`).
2490 if (sk_bit_cast<int32_t>(immFloat) == 0) {
2491 return "0";
2492 }
2493 // Start with `0x3F800000` as a baseline.
2494 uint32_t immUnsigned;
2495 memcpy(&immUnsigned, &immFloat, sizeof(uint32_t));
2496 auto text = SkSL::String::printf("0x%08X", immUnsigned);
2497
2498 // Extend it to `0x3F800000 (1.0)` for finite floating point values.
2499 if (showAsFloat && std::isfinite(immFloat)) {
2500 text += " (";
2501 text += skstd::to_string(immFloat);
2502 text += ')';
2503 }
2504 return text;
2505 }
2506
2507 // Interprets the context pointer as a 32-bit immediate value of unknown type (int/float).
2508 std::string immCtx(const void* ctx, bool showAsFloat = true) const {
2509 float f;
2510 memcpy(&f, &ctx, sizeof(float));
2511 return this->imm(f, showAsFloat);
2512 }
2513
2514 // Prints `1` for single slots and `1..3` for ranges of slots.
2515 std::string asRange(int first, int count) const {
2516 std::string text = std::to_string(first);
2517 if (count > 1) {
2518 text += ".." + std::to_string(first + count - 1);
2519 }
2520 return text;
2521 }
2522
2523 // Generates a reasonable name for a range of slots or uniforms, e.g.:
2524 // `val`: slot range points at one variable, named val
2525 // `val(0..1)`: slot range points at the first and second slot of val (which has 3+ slots)
2526 // `foo, bar`: slot range fully covers two variables, named foo and bar
2527 // `foo(3), bar(0)`: slot range covers the fourth slot of foo and the first slot of bar
2530 SlotRange range) const {
2531 SkASSERT(range.index >= 0 && (range.index + range.count) <= (int)debugInfo.size());
2532
2533 std::string text;
2534 auto separator = SkSL::String::Separator();
2535 while (range.count > 0) {
2536 const SlotDebugInfo& slotInfo = debugInfo[range.index];
2537 text += separator();
2538 text += names.empty() ? slotInfo.name : names[range.index];
2539
2540 // Figure out how many slots we can chomp in this iteration.
2541 int entireVariable = slotInfo.columns * slotInfo.rows;
2542 int slotsToChomp = std::min(range.count, entireVariable - slotInfo.componentIndex);
2543 // If we aren't consuming an entire variable, from first slot to last...
2544 if (slotsToChomp != entireVariable) {
2545 // ... decorate it with a range suffix.
2546 text += '(' + this->asRange(slotInfo.componentIndex, slotsToChomp) + ')';
2547 }
2548 range.index += slotsToChomp;
2549 range.count -= slotsToChomp;
2550 }
2551
2552 return text;
2553 }
2554
2555 // Generates a reasonable name for a range of slots.
2556 std::string slotName(SlotRange range) const {
2557 return this->slotOrUniformName(fProgram.fDebugTrace->fSlotInfo, fSlotNameList, range);
2558 }
2559
2560 // Generates a reasonable name for a range of uniforms.
2561 std::string uniformName(SlotRange range) const {
2562 return this->slotOrUniformName(fProgram.fDebugTrace->fUniformInfo, /*names=*/{}, range);
2563 }
2564
2565 // Attempts to interpret the passed-in pointer as a uniform range.
2566 std::string uniformPtrCtx(const float* ptr, int numSlots) const {
2567 const float* end = ptr + numSlots;
2568 if (ptr >= fUniforms.begin() && end <= fUniforms.end()) {
2569 int uniformIdx = ptr - fUniforms.begin();
2570 if (fProgram.fDebugTrace) {
2571 // Handle pointers to named uniform slots.
2572 std::string name = this->uniformName({uniformIdx, numSlots});
2573 if (!name.empty()) {
2574 return name;
2575 }
2576 }
2577 // Handle pointers to uniforms (when no debug info exists).
2578 return 'u' + this->asRange(uniformIdx, numSlots);
2579 }
2580 return {};
2581 }
2582
2583 // Attempts to interpret the passed-in pointer as a value slot range.
2584 std::string valuePtrCtx(const float* ptr, int numSlots) const {
2585 const float* end = ptr + (N * numSlots);
2586 if (ptr >= fSlots.values.begin() && end <= fSlots.values.end()) {
2587 int valueIdx = ptr - fSlots.values.begin();
2588 SkASSERT((valueIdx % N) == 0);
2589 valueIdx /= N;
2590 if (fProgram.fDebugTrace) {
2591 // Handle pointers to named value slots.
2592 std::string name = this->slotName({valueIdx, numSlots});
2593 if (!name.empty()) {
2594 return name;
2595 }
2596 }
2597 // Handle pointers to value slots (when no debug info exists).
2598 return 'v' + this->asRange(valueIdx, numSlots);
2599 }
2600 return {};
2601 }
2602
2603 // Attempts to interpret the passed-in pointer as a immutable slot range.
2604 std::string immutablePtrCtx(const float* ptr, int numSlots) const {
2605 const float* end = ptr + numSlots;
2606 if (ptr >= fSlots.immutable.begin() && end <= fSlots.immutable.end()) {
2607 int index = ptr - fSlots.immutable.begin();
2608 return 'i' + this->asRange(index, numSlots) + ' ' +
2609 this->multiImmCtx(ptr, numSlots);
2610 }
2611 return {};
2612 }
2613
2614 // Interprets the context value as a pointer to `count` immediate values.
2615 std::string multiImmCtx(const float* ptr, int count) const {
2616 // If this is a uniform, print it by name.
2617 if (std::string text = this->uniformPtrCtx(ptr, count); !text.empty()) {
2618 return text;
2619 }
2620 // Emit a single bracketed immediate.
2621 if (count == 1) {
2622 return '[' + this->imm(*ptr) + ']';
2623 }
2624 // Emit a list like `[0x00000000 (0.0), 0x3F80000 (1.0)]`.
2625 std::string text = "[";
2626 auto separator = SkSL::String::Separator();
2627 while (count--) {
2628 text += separator();
2629 text += this->imm(*ptr++);
2630 }
2631 return text + ']';
2632 }
2633
2634 // Interprets the context value as a generic pointer.
2635 std::string ptrCtx(const void* ctx, int numSlots) const {
2636 const float *ctxAsSlot = static_cast<const float*>(ctx);
2637 // Check for uniform, value, and immutable pointers.
2638 if (std::string uniform = this->uniformPtrCtx(ctxAsSlot, numSlots); !uniform.empty()) {
2639 return uniform;
2640 }
2641 if (std::string value = this->valuePtrCtx(ctxAsSlot, numSlots); !value.empty()) {
2642 return value;
2643 }
2644 if (std::string value = this->immutablePtrCtx(ctxAsSlot, numSlots); !value.empty()) {
2645 return value;
2646 }
2647 // Handle pointers to temporary stack slots.
2648 if (ctxAsSlot >= fSlots.stack.begin() && ctxAsSlot < fSlots.stack.end()) {
2649 int stackIdx = ctxAsSlot - fSlots.stack.begin();
2650 SkASSERT((stackIdx % N) == 0);
2651 return '$' + this->asRange(stackIdx / N, numSlots);
2652 }
2653 // This pointer is out of our expected bounds; this generally isn't expected to happen.
2654 return "ExternalPtr(" + this->asRange(0, numSlots) + ")";
2655 }
2656
2657 // Converts an SkRPOffset to a pointer into the value-slot range.
2658 std::byte* offsetToPtr(SkRPOffset offset) const {
2659 return (std::byte*)fSlots.values.data() + offset;
2660 }
2661
2662 // Interprets a slab offset as a slot range.
2663 std::string offsetCtx(SkRPOffset offset, int numSlots) const {
2664 return this->ptrCtx(this->offsetToPtr(offset), numSlots);
2665 }
2666
2667 // Interprets the context value as a packed ConstantCtx structure.
2668 std::tuple<std::string, std::string> constantCtx(const void* v,
2669 int slots,
2670 bool showAsFloat = true) const {
2672 return {this->offsetCtx(ctx.dst, slots),
2673 this->imm(sk_bit_cast<float>(ctx.value), showAsFloat)};
2674 }
2675
2676 // Interprets the context value as a BinaryOp structure for copy_n_slots (numSlots is dictated
2677 // by the op itself).
2678 std::tuple<std::string, std::string> binaryOpCtx(const void* v, int numSlots) const {
2680 return {this->offsetCtx(ctx.dst, numSlots),
2681 this->offsetCtx(ctx.src, numSlots)};
2682 }
2683
2684 // Interprets the context value as a BinaryOp structure for copy_n_uniforms (numSlots is
2685 // dictated by the op itself).
2686 std::tuple<std::string, std::string> copyUniformCtx(const void* v, int numSlots) const {
2687 const auto *ctx = static_cast<const SkRasterPipeline_UniformCtx*>(v);
2688 return {this->ptrCtx(ctx->dst, numSlots),
2689 this->multiImmCtx(reinterpret_cast<const float*>(ctx->src), numSlots)};
2690 }
2691
2692 // Interprets the context value as a pointer to two adjacent values.
2693 std::tuple<std::string, std::string> adjacentPtrCtx(const void* ctx, int numSlots) const {
2694 const float *ctxAsSlot = static_cast<const float*>(ctx);
2695 return std::make_tuple(this->ptrCtx(ctxAsSlot, numSlots),
2696 this->ptrCtx(ctxAsSlot + (N * numSlots), numSlots));
2697 }
2698
2699 // Interprets a slab offset as two adjacent slot ranges.
2700 std::tuple<std::string, std::string> adjacentOffsetCtx(SkRPOffset offset, int numSlots) const {
2701 return this->adjacentPtrCtx((std::byte*)fSlots.values.data() + offset, numSlots);
2702 }
2703
2704 // Interprets the context value as a BinaryOp structure (numSlots is inferred from the distance
2705 // between pointers).
2706 std::tuple<std::string, std::string> adjacentBinaryOpCtx(const void* v) const {
2708 int numSlots = (ctx.src - ctx.dst) / (N * sizeof(float));
2709 return this->adjacentOffsetCtx(ctx.dst, numSlots);
2710 }
2711
2712 // Interprets the context value as a pointer to three adjacent values.
2713 std::tuple<std::string, std::string, std::string> adjacent3PtrCtx(const void* ctx,
2714 int numSlots) const {
2715 const float *ctxAsSlot = static_cast<const float*>(ctx);
2716 return {this->ptrCtx(ctxAsSlot, numSlots),
2717 this->ptrCtx(ctxAsSlot + (N * numSlots), numSlots),
2718 this->ptrCtx(ctxAsSlot + (2 * N * numSlots), numSlots)};
2719 }
2720
2721 // Interprets a slab offset as three adjacent slot ranges.
2722 std::tuple<std::string, std::string, std::string> adjacent3OffsetCtx(SkRPOffset offset,
2723 int numSlots) const {
2724 return this->adjacent3PtrCtx((std::byte*)fSlots.values.data() + offset, numSlots);
2725 }
2726
2727 // Interprets the context value as a TernaryOp structure (numSlots is inferred from `delta`).
2728 std::tuple<std::string, std::string, std::string> adjacentTernaryOpCtx(const void* v) const {
2730 int numSlots = ctx.delta / (sizeof(float) * N);
2731 return this->adjacent3OffsetCtx(ctx.dst, numSlots);
2732 }
2733
2734 // Stringizes a span of swizzle offsets to the textual equivalent (`xyzw`).
2735 template <typename T>
2736 std::string swizzleOffsetSpan(SkSpan<T> offsets) const {
2737 std::string src;
2738 for (uint16_t offset : offsets) {
2739 if (offset == (0 * N * sizeof(float))) {
2740 src.push_back('x');
2741 } else if (offset == (1 * N * sizeof(float))) {
2742 src.push_back('y');
2743 } else if (offset == (2 * N * sizeof(float))) {
2744 src.push_back('z');
2745 } else if (offset == (3 * N * sizeof(float))) {
2746 src.push_back('w');
2747 } else {
2748 src.push_back('?');
2749 }
2750 }
2751 return src;
2752 }
2753
2754 // Determines the effective width of a swizzle op. When we decode a swizzle, we don't know the
2755 // slot width of the original value; that's not preserved in the instruction encoding. (e.g.,
2756 // myFloat4.y would be indistinguishable from myFloat2.y.) We do our best to make a readable
2757 // dump using the data we have.
2758 template <typename T>
2759 size_t swizzleWidth(SkSpan<T> offsets) const {
2760 size_t highestComponent = *std::max_element(offsets.begin(), offsets.end()) /
2761 (N * sizeof(float));
2762 size_t swizzleWidth = offsets.size();
2763 return std::max(swizzleWidth, highestComponent + 1);
2764 }
2765
2766 // Stringizes a swizzled pointer.
2767 template <typename T>
2768 std::string swizzlePtr(const void* ptr, SkSpan<T> offsets) const {
2769 return "(" + this->ptrCtx(ptr, this->swizzleWidth(SkSpan(offsets))) + ")." +
2770 this->swizzleOffsetSpan(SkSpan(offsets));
2771 }
2772
2773 // Interprets the context value as a SwizzleCtx structure.
2774 std::tuple<std::string, std::string> swizzleCtx(ProgramOp op, const void* v) const {
2776 int destSlots = (int)op - (int)BuilderOp::swizzle_1 + 1;
2777 return {this->offsetCtx(ctx.dst, destSlots),
2778 this->swizzlePtr(this->offsetToPtr(ctx.dst), SkSpan(ctx.offsets, destSlots))};
2779 }
2780
2781 // Interprets the context value as a SwizzleCopyCtx structure.
2782 std::tuple<std::string, std::string> swizzleCopyCtx(ProgramOp op, const void* v) const {
2783 const auto* ctx = static_cast<const SkRasterPipeline_SwizzleCopyCtx*>(v);
2784 int destSlots = (int)op - (int)BuilderOp::swizzle_copy_slot_masked + 1;
2785
2786 return {this->swizzlePtr(ctx->dst, SkSpan(ctx->offsets, destSlots)),
2787 this->ptrCtx(ctx->src, destSlots)};
2788 }
2789
2790 // Interprets the context value as a ShuffleCtx structure.
2791 std::tuple<std::string, std::string> shuffleCtx(const void* v) const {
2792 const auto* ctx = static_cast<const SkRasterPipeline_ShuffleCtx*>(v);
2793
2794 std::string dst = this->ptrCtx(ctx->ptr, ctx->count);
2795 std::string src = "(" + dst + ")[";
2796 for (int index = 0; index < ctx->count; ++index) {
2797 if (ctx->offsets[index] % (N * sizeof(float))) {
2798 src.push_back('?');
2799 } else {
2800 src += std::to_string(ctx->offsets[index] / (N * sizeof(float)));
2801 }
2802 src.push_back(' ');
2803 }
2804 src.back() = ']';
2805 return std::make_tuple(dst, src);
2806 }
2807
2808 // Interprets the context value as a packed MatrixMultiplyCtx structure.
2809 std::tuple<std::string, std::string, std::string> matrixMultiply(const void* v) const {
2811 int leftMatrix = ctx.leftColumns * ctx.leftRows;
2812 int rightMatrix = ctx.rightColumns * ctx.rightRows;
2813 int resultMatrix = ctx.rightColumns * ctx.leftRows;
2814 SkRPOffset leftOffset = ctx.dst + (ctx.rightColumns * ctx.leftRows * sizeof(float) * N);
2815 SkRPOffset rightOffset = leftOffset + (ctx.leftColumns * ctx.leftRows * sizeof(float) * N);
2816 return {SkSL::String::printf("mat%dx%d(%s)",
2817 ctx.rightColumns,
2818 ctx.leftRows,
2819 this->offsetCtx(ctx.dst, resultMatrix).c_str()),
2820 SkSL::String::printf("mat%dx%d(%s)",
2821 ctx.leftColumns,
2822 ctx.leftRows,
2823 this->offsetCtx(leftOffset, leftMatrix).c_str()),
2824 SkSL::String::printf("mat%dx%d(%s)",
2825 ctx.rightColumns,
2826 ctx.rightRows,
2827 this->offsetCtx(rightOffset, rightMatrix).c_str())};
2828 }
2829
2830private:
2832 const Program& fProgram;
2833 TArray<Stage> fStages;
2834 TArray<std::string> fSlotNameList;
2835 THashMap<int, int> fLabelToStageMap; // <label ID, stage index>
2836 SlotData fSlots;
2837 SkSpan<float> fUniforms;
2838};
2839
2840void Program::Dumper::dump(SkWStream* out, bool writeInstructionCount) {
2841 using POp = ProgramOp;
2842
2843 // Allocate memory for the slot and uniform data, even though the program won't ever be
2844 // executed. The program requires pointer ranges for managing its data, and ASAN will report
2845 // errors if those pointers are pointing at unallocated memory.
2846 SkArenaAlloc alloc(/*firstHeapAllocation=*/1000);
2847 fSlots = fProgram.allocateSlotData(&alloc);
2848 float* uniformPtr = alloc.makeArray<float>(fProgram.fNumUniformSlots);
2849 fUniforms = SkSpan(uniformPtr, fProgram.fNumUniformSlots);
2850
2851 // Turn this program into an array of Raster Pipeline stages.
2852 fProgram.makeStages(&fStages, &alloc, fUniforms, fSlots);
2853
2854 // Assemble lookup tables for program labels and slot names.
2855 this->buildLabelToStageMap();
2857
2858 // Emit the program's instruction count.
2859 if (writeInstructionCount) {
2860 int invocationCount = 0, instructionCount = 0;
2861 for (const Stage& stage : fStages) {
2862 switch (stage.op) {
2863 case POp::label:
2864 // consumes zero instructions
2865 break;
2866
2867 case POp::invoke_shader:
2868 case POp::invoke_color_filter:
2869 case POp::invoke_blender:
2870 case POp::invoke_to_linear_srgb:
2871 case POp::invoke_from_linear_srgb:
2872 ++invocationCount;
2873 break;
2874
2875 default:
2876 ++instructionCount;
2877 break;
2878 }
2879 }
2880
2881 out->writeText(std::to_string(instructionCount).c_str());
2882 out->writeText(" instructions");
2883 if (invocationCount > 0) {
2884 out->writeText(", ");
2885 out->writeText(std::to_string(invocationCount).c_str());
2886 out->writeText(" invocations");
2887 }
2888 out->writeText("\n\n");
2889 }
2890
2891 // Emit all of the program's immutable data.
2892 const char* header = "[immutable slots]\n";
2893 const char* footer = "";
2894 for (const Instruction& inst : fProgram.fInstructions) {
2895 if (inst.fOp == BuilderOp::store_immutable_value) {
2896 out->writeText(header);
2897 out->writeText("i");
2898 out->writeText(std::to_string(inst.fSlotA).c_str());
2899 out->writeText(" = ");
2900 out->writeText(this->imm(sk_bit_cast<float>(inst.fImmA)).c_str());
2901 out->writeText("\n");
2902
2903 header = "";
2904 footer = "\n";
2905 }
2906 }
2907 out->writeText(footer);
2908
2909 // Emit the program's instruction list.
2910 for (int index = 0; index < fStages.size(); ++index) {
2911 const Stage& stage = fStages[index];
2912
2913 std::string opArg1, opArg2, opArg3, opSwizzle;
2914 switch (stage.op) {
2915 case POp::label:
2916 case POp::invoke_shader:
2917 case POp::invoke_color_filter:
2918 case POp::invoke_blender:
2919 opArg1 = this->immCtx(stage.ctx, /*showAsFloat=*/false);
2920 break;
2921
2922 case POp::case_op: {
2923 auto ctx = SkRPCtxUtils::Unpack((const SkRasterPipeline_CaseOpCtx*)stage.ctx);
2924 opArg1 = this->offsetCtx(ctx.offset, 1);
2925 opArg2 = this->offsetCtx(ctx.offset + sizeof(int32_t) * N, 1);
2926 opArg3 = this->imm(sk_bit_cast<float>(ctx.expectedValue), /*showAsFloat=*/false);
2927 break;
2928 }
2929 case POp::swizzle_1:
2930 case POp::swizzle_2:
2931 case POp::swizzle_3:
2932 case POp::swizzle_4:
2933 std::tie(opArg1, opArg2) = this->swizzleCtx(stage.op, stage.ctx);
2934 break;
2935
2936 case POp::swizzle_copy_slot_masked:
2937 case POp::swizzle_copy_2_slots_masked:
2938 case POp::swizzle_copy_3_slots_masked:
2939 case POp::swizzle_copy_4_slots_masked:
2940 std::tie(opArg1, opArg2) = this->swizzleCopyCtx(stage.op, stage.ctx);
2941 break;
2942
2943 case POp::refract_4_floats:
2944 std::tie(opArg1, opArg2) = this->adjacentPtrCtx(stage.ctx, 4);
2945 opArg3 = this->ptrCtx((const float*)(stage.ctx) + (8 * N), 1);
2946 break;
2947
2948 case POp::dot_2_floats:
2949 opArg1 = this->ptrCtx(stage.ctx, 1);
2950 std::tie(opArg2, opArg3) = this->adjacentPtrCtx(stage.ctx, 2);
2951 break;
2952
2953 case POp::dot_3_floats:
2954 opArg1 = this->ptrCtx(stage.ctx, 1);
2955 std::tie(opArg2, opArg3) = this->adjacentPtrCtx(stage.ctx, 3);
2956 break;
2957
2958 case POp::dot_4_floats:
2959 opArg1 = this->ptrCtx(stage.ctx, 1);
2960 std::tie(opArg2, opArg3) = this->adjacentPtrCtx(stage.ctx, 4);
2961 break;
2962
2963 case POp::shuffle:
2964 std::tie(opArg1, opArg2) = this->shuffleCtx(stage.ctx);
2965 break;
2966
2967 case POp::matrix_multiply_2:
2968 case POp::matrix_multiply_3:
2969 case POp::matrix_multiply_4:
2970 std::tie(opArg1, opArg2, opArg3) = this->matrixMultiply(stage.ctx);
2971 break;
2972
2973 case POp::load_condition_mask:
2974 case POp::store_condition_mask:
2975 case POp::load_loop_mask:
2976 case POp::store_loop_mask:
2977 case POp::merge_loop_mask:
2978 case POp::reenable_loop_mask:
2979 case POp::load_return_mask:
2980 case POp::store_return_mask:
2981 case POp::continue_op:
2982 case POp::cast_to_float_from_int: case POp::cast_to_float_from_uint:
2983 case POp::cast_to_int_from_float: case POp::cast_to_uint_from_float:
2984 case POp::abs_int:
2985 case POp::acos_float:
2986 case POp::asin_float:
2987 case POp::atan_float:
2988 case POp::ceil_float:
2989 case POp::cos_float:
2990 case POp::exp_float:
2991 case POp::exp2_float:
2992 case POp::log_float:
2993 case POp::log2_float:
2994 case POp::floor_float:
2995 case POp::invsqrt_float:
2996 case POp::sin_float:
2997 case POp::sqrt_float:
2998 case POp::tan_float:
2999 opArg1 = this->ptrCtx(stage.ctx, 1);
3000 break;
3001
3002 case POp::store_src_rg:
3003 case POp::cast_to_float_from_2_ints: case POp::cast_to_float_from_2_uints:
3004 case POp::cast_to_int_from_2_floats: case POp::cast_to_uint_from_2_floats:
3005 case POp::abs_2_ints:
3006 case POp::ceil_2_floats:
3007 case POp::floor_2_floats:
3008 case POp::invsqrt_2_floats:
3009 opArg1 = this->ptrCtx(stage.ctx, 2);
3010 break;
3011
3012 case POp::cast_to_float_from_3_ints: case POp::cast_to_float_from_3_uints:
3013 case POp::cast_to_int_from_3_floats: case POp::cast_to_uint_from_3_floats:
3014 case POp::abs_3_ints:
3015 case POp::ceil_3_floats:
3016 case POp::floor_3_floats:
3017 case POp::invsqrt_3_floats:
3018 opArg1 = this->ptrCtx(stage.ctx, 3);
3019 break;
3020
3021 case POp::load_src:
3022 case POp::load_dst:
3023 case POp::exchange_src:
3024 case POp::store_src:
3025 case POp::store_dst:
3026 case POp::store_device_xy01:
3027 case POp::invoke_to_linear_srgb:
3028 case POp::invoke_from_linear_srgb:
3029 case POp::cast_to_float_from_4_ints: case POp::cast_to_float_from_4_uints:
3030 case POp::cast_to_int_from_4_floats: case POp::cast_to_uint_from_4_floats:
3031 case POp::abs_4_ints:
3032 case POp::ceil_4_floats:
3033 case POp::floor_4_floats:
3034 case POp::invsqrt_4_floats:
3035 case POp::inverse_mat2:
3036 opArg1 = this->ptrCtx(stage.ctx, 4);
3037 break;
3038
3039 case POp::inverse_mat3:
3040 opArg1 = this->ptrCtx(stage.ctx, 9);
3041 break;
3042
3043 case POp::inverse_mat4:
3044 opArg1 = this->ptrCtx(stage.ctx, 16);
3045 break;
3046
3047 case POp::copy_constant:
3048 case POp::add_imm_float:
3049 case POp::mul_imm_float:
3050 case POp::cmple_imm_float:
3051 case POp::cmplt_imm_float:
3052 case POp::cmpeq_imm_float:
3053 case POp::cmpne_imm_float:
3054 case POp::min_imm_float:
3055 case POp::max_imm_float:
3056 std::tie(opArg1, opArg2) = this->constantCtx(stage.ctx, 1);
3057 break;
3058
3059 case POp::add_imm_int:
3060 case POp::mul_imm_int:
3061 case POp::bitwise_and_imm_int:
3062 case POp::bitwise_xor_imm_int:
3063 case POp::cmple_imm_int:
3064 case POp::cmple_imm_uint:
3065 case POp::cmplt_imm_int:
3066 case POp::cmplt_imm_uint:
3067 case POp::cmpeq_imm_int:
3068 case POp::cmpne_imm_int:
3069 std::tie(opArg1, opArg2) = this->constantCtx(stage.ctx, 1, /*showAsFloat=*/false);
3070 break;
3071
3072 case POp::splat_2_constants:
3073 case POp::bitwise_and_imm_2_ints:
3074 std::tie(opArg1, opArg2) = this->constantCtx(stage.ctx, 2);
3075 break;
3076
3077 case POp::splat_3_constants:
3078 case POp::bitwise_and_imm_3_ints:
3079 std::tie(opArg1, opArg2) = this->constantCtx(stage.ctx, 3);
3080 break;
3081
3082 case POp::splat_4_constants:
3083 case POp::bitwise_and_imm_4_ints:
3084 std::tie(opArg1, opArg2) = this->constantCtx(stage.ctx, 4);
3085 break;
3086
3087 case POp::copy_uniform:
3088 std::tie(opArg1, opArg2) = this->copyUniformCtx(stage.ctx, 1);
3089 break;
3090
3091 case POp::copy_2_uniforms:
3092 std::tie(opArg1, opArg2) = this->copyUniformCtx(stage.ctx, 2);
3093 break;
3094
3095 case POp::copy_3_uniforms:
3096 std::tie(opArg1, opArg2) = this->copyUniformCtx(stage.ctx, 3);
3097 break;
3098
3099 case POp::copy_4_uniforms:
3100 std::tie(opArg1, opArg2) = this->copyUniformCtx(stage.ctx, 4);
3101 break;
3102
3103 case POp::copy_slot_masked:
3104 case POp::copy_slot_unmasked:
3105 case POp::copy_immutable_unmasked:
3106 std::tie(opArg1, opArg2) = this->binaryOpCtx(stage.ctx, 1);
3107 break;
3108
3109 case POp::copy_2_slots_masked:
3110 case POp::copy_2_slots_unmasked:
3111 case POp::copy_2_immutables_unmasked:
3112 std::tie(opArg1, opArg2) = this->binaryOpCtx(stage.ctx, 2);
3113 break;
3114
3115 case POp::copy_3_slots_masked:
3116 case POp::copy_3_slots_unmasked:
3117 case POp::copy_3_immutables_unmasked:
3118 std::tie(opArg1, opArg2) = this->binaryOpCtx(stage.ctx, 3);
3119 break;
3120
3121 case POp::copy_4_slots_masked:
3122 case POp::copy_4_slots_unmasked:
3123 case POp::copy_4_immutables_unmasked:
3124 std::tie(opArg1, opArg2) = this->binaryOpCtx(stage.ctx, 4);
3125 break;
3126
3127 case POp::copy_from_indirect_uniform_unmasked:
3128 case POp::copy_from_indirect_unmasked:
3129 case POp::copy_to_indirect_masked: {
3130 const auto* ctx = static_cast<SkRasterPipeline_CopyIndirectCtx*>(stage.ctx);
3131 // We don't incorporate the indirect-limit in the output
3132 opArg1 = this->ptrCtx(ctx->dst, ctx->slots);
3133 opArg2 = this->ptrCtx(ctx->src, ctx->slots);
3134 opArg3 = this->ptrCtx(ctx->indirectOffset, 1);
3135 break;
3136 }
3137 case POp::swizzle_copy_to_indirect_masked: {
3138 const auto* ctx = static_cast<SkRasterPipeline_SwizzleCopyIndirectCtx*>(stage.ctx);
3139 opArg1 = this->ptrCtx(ctx->dst, this->swizzleWidth(SkSpan(ctx->offsets,
3140 ctx->slots)));
3141 opArg2 = this->ptrCtx(ctx->src, ctx->slots);
3142 opArg3 = this->ptrCtx(ctx->indirectOffset, 1);
3143 opSwizzle = this->swizzleOffsetSpan(SkSpan(ctx->offsets, ctx->slots));
3144 break;
3145 }
3146 case POp::merge_condition_mask:
3147 case POp::merge_inv_condition_mask:
3148 case POp::add_float: case POp::add_int:
3149 case POp::sub_float: case POp::sub_int:
3150 case POp::mul_float: case POp::mul_int:
3151 case POp::div_float: case POp::div_int: case POp::div_uint:
3152 case POp::bitwise_and_int:
3153 case POp::bitwise_or_int:
3154 case POp::bitwise_xor_int:
3155 case POp::mod_float:
3156 case POp::min_float: case POp::min_int: case POp::min_uint:
3157 case POp::max_float: case POp::max_int: case POp::max_uint:
3158 case POp::cmplt_float: case POp::cmplt_int: case POp::cmplt_uint:
3159 case POp::cmple_float: case POp::cmple_int: case POp::cmple_uint:
3160 case POp::cmpeq_float: case POp::cmpeq_int:
3161 case POp::cmpne_float: case POp::cmpne_int:
3162 std::tie(opArg1, opArg2) = this->adjacentPtrCtx(stage.ctx, 1);
3163 break;
3164
3165 case POp::mix_float: case POp::mix_int:
3166 std::tie(opArg1, opArg2, opArg3) = this->adjacent3PtrCtx(stage.ctx, 1);
3167 break;
3168
3169 case POp::add_2_floats: case POp::add_2_ints:
3170 case POp::sub_2_floats: case POp::sub_2_ints:
3171 case POp::mul_2_floats: case POp::mul_2_ints:
3172 case POp::div_2_floats: case POp::div_2_ints: case POp::div_2_uints:
3173 case POp::bitwise_and_2_ints:
3174 case POp::bitwise_or_2_ints:
3175 case POp::bitwise_xor_2_ints:
3176 case POp::mod_2_floats:
3177 case POp::min_2_floats: case POp::min_2_ints: case POp::min_2_uints:
3178 case POp::max_2_floats: case POp::max_2_ints: case POp::max_2_uints:
3179 case POp::cmplt_2_floats: case POp::cmplt_2_ints: case POp::cmplt_2_uints:
3180 case POp::cmple_2_floats: case POp::cmple_2_ints: case POp::cmple_2_uints:
3181 case POp::cmpeq_2_floats: case POp::cmpeq_2_ints:
3182 case POp::cmpne_2_floats: case POp::cmpne_2_ints:
3183 std::tie(opArg1, opArg2) = this->adjacentPtrCtx(stage.ctx, 2);
3184 break;
3185
3186 case POp::mix_2_floats: case POp::mix_2_ints:
3187 std::tie(opArg1, opArg2, opArg3) = this->adjacent3PtrCtx(stage.ctx, 2);
3188 break;
3189
3190 case POp::add_3_floats: case POp::add_3_ints:
3191 case POp::sub_3_floats: case POp::sub_3_ints:
3192 case POp::mul_3_floats: case POp::mul_3_ints:
3193 case POp::div_3_floats: case POp::div_3_ints: case POp::div_3_uints:
3194 case POp::bitwise_and_3_ints:
3195 case POp::bitwise_or_3_ints:
3196 case POp::bitwise_xor_3_ints:
3197 case POp::mod_3_floats:
3198 case POp::min_3_floats: case POp::min_3_ints: case POp::min_3_uints:
3199 case POp::max_3_floats: case POp::max_3_ints: case POp::max_3_uints:
3200 case POp::cmplt_3_floats: case POp::cmplt_3_ints: case POp::cmplt_3_uints:
3201 case POp::cmple_3_floats: case POp::cmple_3_ints: case POp::cmple_3_uints:
3202 case POp::cmpeq_3_floats: case POp::cmpeq_3_ints:
3203 case POp::cmpne_3_floats: case POp::cmpne_3_ints:
3204 std::tie(opArg1, opArg2) = this->adjacentPtrCtx(stage.ctx, 3);
3205 break;
3206
3207 case POp::mix_3_floats: case POp::mix_3_ints:
3208 std::tie(opArg1, opArg2, opArg3) = this->adjacent3PtrCtx(stage.ctx, 3);
3209 break;
3210
3211 case POp::add_4_floats: case POp::add_4_ints:
3212 case POp::sub_4_floats: case POp::sub_4_ints:
3213 case POp::mul_4_floats: case POp::mul_4_ints:
3214 case POp::div_4_floats: case POp::div_4_ints: case POp::div_4_uints:
3215 case POp::bitwise_and_4_ints:
3216 case POp::bitwise_or_4_ints:
3217 case POp::bitwise_xor_4_ints:
3218 case POp::mod_4_floats:
3219 case POp::min_4_floats: case POp::min_4_ints: case POp::min_4_uints:
3220 case POp::max_4_floats: case POp::max_4_ints: case POp::max_4_uints:
3221 case POp::cmplt_4_floats: case POp::cmplt_4_ints: case POp::cmplt_4_uints:
3222 case POp::cmple_4_floats: case POp::cmple_4_ints: case POp::cmple_4_uints:
3223 case POp::cmpeq_4_floats: case POp::cmpeq_4_ints:
3224 case POp::cmpne_4_floats: case POp::cmpne_4_ints:
3225 std::tie(opArg1, opArg2) = this->adjacentPtrCtx(stage.ctx, 4);
3226 break;
3227
3228 case POp::mix_4_floats: case POp::mix_4_ints:
3229 std::tie(opArg1, opArg2, opArg3) = this->adjacent3PtrCtx(stage.ctx, 4);
3230 break;
3231
3232 case POp::add_n_floats: case POp::add_n_ints:
3233 case POp::sub_n_floats: case POp::sub_n_ints:
3234 case POp::mul_n_floats: case POp::mul_n_ints:
3235 case POp::div_n_floats: case POp::div_n_ints: case POp::div_n_uints:
3236 case POp::bitwise_and_n_ints:
3237 case POp::bitwise_or_n_ints:
3238 case POp::bitwise_xor_n_ints:
3239 case POp::mod_n_floats:
3240 case POp::min_n_floats: case POp::min_n_ints: case POp::min_n_uints:
3241 case POp::max_n_floats: case POp::max_n_ints: case POp::max_n_uints:
3242 case POp::cmplt_n_floats: case POp::cmplt_n_ints: case POp::cmplt_n_uints:
3243 case POp::cmple_n_floats: case POp::cmple_n_ints: case POp::cmple_n_uints:
3244 case POp::cmpeq_n_floats: case POp::cmpeq_n_ints:
3245 case POp::cmpne_n_floats: case POp::cmpne_n_ints:
3246 case POp::atan2_n_floats:
3247 case POp::pow_n_floats:
3248 std::tie(opArg1, opArg2) = this->adjacentBinaryOpCtx(stage.ctx);
3249 break;
3250
3251 case POp::mix_n_floats: case POp::mix_n_ints:
3252 case POp::smoothstep_n_floats:
3253 std::tie(opArg1, opArg2, opArg3) = this->adjacentTernaryOpCtx(stage.ctx);
3254 break;
3255
3256 case POp::jump:
3257 case POp::branch_if_all_lanes_active:
3258 case POp::branch_if_any_lanes_active:
3259 case POp::branch_if_no_lanes_active:
3260 opArg1 = this->branchOffset(static_cast<SkRasterPipeline_BranchCtx*>(stage.ctx),
3261 index);
3262 break;
3263
3264 case POp::branch_if_no_active_lanes_eq: {
3265 const auto* ctx = static_cast<SkRasterPipeline_BranchIfEqualCtx*>(stage.ctx);
3266 opArg1 = this->branchOffset(ctx, index);
3267 opArg2 = this->ptrCtx(ctx->ptr, 1);
3268 opArg3 = this->imm(sk_bit_cast<float>(ctx->value));
3269 break;
3270 }
3271 case POp::trace_var: {
3272 const auto* ctx = static_cast<SkRasterPipeline_TraceVarCtx*>(stage.ctx);
3273 opArg1 = this->ptrCtx(ctx->traceMask, 1);
3274 opArg2 = this->ptrCtx(ctx->data, ctx->numSlots);
3275 if (ctx->indirectOffset != nullptr) {
3276 opArg3 = " + " + this->ptrCtx(ctx->indirectOffset, 1);
3277 }
3278 break;
3279 }
3280 case POp::trace_line: {
3281 const auto* ctx = static_cast<SkRasterPipeline_TraceLineCtx*>(stage.ctx);
3282 opArg1 = this->ptrCtx(ctx->traceMask, 1);
3283 opArg2 = std::to_string(ctx->lineNumber);
3284 break;
3285 }
3286 case POp::trace_enter:
3287 case POp::trace_exit: {
3288 const auto* ctx = static_cast<SkRasterPipeline_TraceFuncCtx*>(stage.ctx);
3289 opArg1 = this->ptrCtx(ctx->traceMask, 1);
3290 opArg2 = (fProgram.fDebugTrace &&
3291 ctx->funcIdx >= 0 &&
3292 ctx->funcIdx < (int)fProgram.fDebugTrace->fFuncInfo.size())
3293 ? fProgram.fDebugTrace->fFuncInfo[ctx->funcIdx].name
3294 : "???";
3295 break;
3296 }
3297 case POp::trace_scope: {
3298 const auto* ctx = static_cast<SkRasterPipeline_TraceScopeCtx*>(stage.ctx);
3299 opArg1 = this->ptrCtx(ctx->traceMask, 1);
3300 opArg2 = SkSL::String::printf("%+d", ctx->delta);
3301 break;
3302 }
3303 default:
3304 break;
3305 }
3306
3307 std::string_view opName;
3308 switch (stage.op) {
3309 #define M(x) case POp::x: opName = #x; break;
3312 #undef M
3313 }
3314
3315 std::string opText;
3316 switch (stage.op) {
3317 case POp::trace_var:
3318 opText = "TraceVar(" + opArg2 + opArg3 + ") when " + opArg1 + " is true";
3319 break;
3320
3321 case POp::trace_line:
3322 opText = "TraceLine(" + opArg2 + ") when " + opArg1 + " is true";
3323 break;
3324
3325 case POp::trace_enter:
3326 opText = "TraceEnter(" + opArg2 + ") when " + opArg1 + " is true";
3327 break;
3328
3329 case POp::trace_exit:
3330 opText = "TraceExit(" + opArg2 + ") when " + opArg1 + " is true";
3331 break;
3332
3333 case POp::trace_scope:
3334 opText = "TraceScope(" + opArg2 + ") when " + opArg1 + " is true";
3335 break;
3336
3337 case POp::init_lane_masks:
3338 opText = "CondMask = LoopMask = RetMask = true";
3339 break;
3340
3341 case POp::load_condition_mask:
3342 opText = "CondMask = " + opArg1;
3343 break;
3344
3345 case POp::store_condition_mask:
3346 opText = opArg1 + " = CondMask";
3347 break;
3348
3349 case POp::merge_condition_mask:
3350 opText = "CondMask = " + opArg1 + " & " + opArg2;
3351 break;
3352
3353 case POp::merge_inv_condition_mask:
3354 opText = "CondMask = " + opArg1 + " & ~" + opArg2;
3355 break;
3356
3357 case POp::load_loop_mask:
3358 opText = "LoopMask = " + opArg1;
3359 break;
3360
3361 case POp::store_loop_mask:
3362 opText = opArg1 + " = LoopMask";
3363 break;
3364
3365 case POp::mask_off_loop_mask:
3366 opText = "LoopMask &= ~(CondMask & LoopMask & RetMask)";
3367 break;
3368
3369 case POp::reenable_loop_mask:
3370 opText = "LoopMask |= " + opArg1;
3371 break;
3372
3373 case POp::merge_loop_mask:
3374 opText = "LoopMask &= " + opArg1;
3375 break;
3376
3377 case POp::load_return_mask:
3378 opText = "RetMask = " + opArg1;
3379 break;
3380
3381 case POp::store_return_mask:
3382 opText = opArg1 + " = RetMask";
3383 break;
3384
3385 case POp::mask_off_return_mask:
3386 opText = "RetMask &= ~(CondMask & LoopMask & RetMask)";
3387 break;
3388
3389 case POp::store_src_rg:
3390 opText = opArg1 + " = src.rg";
3391 break;
3392
3393 case POp::exchange_src:
3394 opText = "swap(src.rgba, " + opArg1 + ")";
3395 break;
3396
3397 case POp::store_src:
3398 opText = opArg1 + " = src.rgba";
3399 break;
3400
3401 case POp::store_dst:
3402 opText = opArg1 + " = dst.rgba";
3403 break;
3404
3405 case POp::store_device_xy01:
3406 opText = opArg1 + " = DeviceCoords.xy01";
3407 break;
3408
3409 case POp::load_src:
3410 opText = "src.rgba = " + opArg1;
3411 break;
3412
3413 case POp::load_dst:
3414 opText = "dst.rgba = " + opArg1;
3415 break;
3416
3417 case POp::bitwise_and_int:
3418 case POp::bitwise_and_2_ints:
3419 case POp::bitwise_and_3_ints:
3420 case POp::bitwise_and_4_ints:
3421 case POp::bitwise_and_n_ints:
3422 case POp::bitwise_and_imm_int:
3423 case POp::bitwise_and_imm_2_ints:
3424 case POp::bitwise_and_imm_3_ints:
3425 case POp::bitwise_and_imm_4_ints:
3426 opText = opArg1 + " &= " + opArg2;
3427 break;
3428
3429 case POp::bitwise_or_int:
3430 case POp::bitwise_or_2_ints:
3431 case POp::bitwise_or_3_ints:
3432 case POp::bitwise_or_4_ints:
3433 case POp::bitwise_or_n_ints:
3434 opText = opArg1 + " |= " + opArg2;
3435 break;
3436
3437 case POp::bitwise_xor_int:
3438 case POp::bitwise_xor_2_ints:
3439 case POp::bitwise_xor_3_ints:
3440 case POp::bitwise_xor_4_ints:
3441 case POp::bitwise_xor_n_ints:
3442 case POp::bitwise_xor_imm_int:
3443 opText = opArg1 + " ^= " + opArg2;
3444 break;
3445
3446 case POp::cast_to_float_from_int:
3447 case POp::cast_to_float_from_2_ints:
3448 case POp::cast_to_float_from_3_ints:
3449 case POp::cast_to_float_from_4_ints:
3450 opText = opArg1 + " = IntToFloat(" + opArg1 + ")";
3451 break;
3452
3453 case POp::cast_to_float_from_uint:
3454 case POp::cast_to_float_from_2_uints:
3455 case POp::cast_to_float_from_3_uints:
3456 case POp::cast_to_float_from_4_uints:
3457 opText = opArg1 + " = UintToFloat(" + opArg1 + ")";
3458 break;
3459
3460 case POp::cast_to_int_from_float:
3461 case POp::cast_to_int_from_2_floats:
3462 case POp::cast_to_int_from_3_floats:
3463 case POp::cast_to_int_from_4_floats:
3464 opText = opArg1 + " = FloatToInt(" + opArg1 + ")";
3465 break;
3466
3467 case POp::cast_to_uint_from_float:
3468 case POp::cast_to_uint_from_2_floats:
3469 case POp::cast_to_uint_from_3_floats:
3470 case POp::cast_to_uint_from_4_floats:
3471 opText = opArg1 + " = FloatToUint(" + opArg1 + ")";
3472 break;
3473
3474 case POp::copy_slot_masked: case POp::copy_2_slots_masked:
3475 case POp::copy_3_slots_masked: case POp::copy_4_slots_masked:
3476 case POp::swizzle_copy_slot_masked: case POp::swizzle_copy_2_slots_masked:
3477 case POp::swizzle_copy_3_slots_masked: case POp::swizzle_copy_4_slots_masked:
3478 opText = opArg1 + " = Mask(" + opArg2 + ")";
3479 break;
3480
3481 case POp::copy_uniform: case POp::copy_2_uniforms:
3482 case POp::copy_3_uniforms: case POp::copy_4_uniforms:
3483 case POp::copy_slot_unmasked: case POp::copy_2_slots_unmasked:
3484 case POp::copy_3_slots_unmasked: case POp::copy_4_slots_unmasked:
3485 case POp::copy_immutable_unmasked: case POp::copy_2_immutables_unmasked:
3486 case POp::copy_3_immutables_unmasked: case POp::copy_4_immutables_unmasked:
3487 case POp::copy_constant: case POp::splat_2_constants:
3488 case POp::splat_3_constants: case POp::splat_4_constants:
3489 case POp::swizzle_1: case POp::swizzle_2:
3490 case POp::swizzle_3: case POp::swizzle_4:
3491 case POp::shuffle:
3492 opText = opArg1 + " = " + opArg2;
3493 break;
3494
3495 case POp::copy_from_indirect_unmasked:
3496 case POp::copy_from_indirect_uniform_unmasked:
3497 opText = opArg1 + " = Indirect(" + opArg2 + " + " + opArg3 + ")";
3498 break;
3499
3500 case POp::copy_to_indirect_masked:
3501 opText = "Indirect(" + opArg1 + " + " + opArg3 + ") = Mask(" + opArg2 + ")";
3502 break;
3503
3504 case POp::swizzle_copy_to_indirect_masked:
3505 opText = "Indirect(" + opArg1 + " + " + opArg3 + ")." + opSwizzle + " = Mask(" +
3506 opArg2 + ")";
3507 break;
3508
3509 case POp::abs_int:
3510 case POp::abs_2_ints:
3511 case POp::abs_3_ints:
3512 case POp::abs_4_ints:
3513 opText = opArg1 + " = abs(" + opArg1 + ")";
3514 break;
3515
3516 case POp::acos_float:
3517 opText = opArg1 + " = acos(" + opArg1 + ")";
3518 break;
3519
3520 case POp::asin_float:
3521 opText = opArg1 + " = asin(" + opArg1 + ")";
3522 break;
3523
3524 case POp::atan_float:
3525 opText = opArg1 + " = atan(" + opArg1 + ")";
3526 break;
3527
3528 case POp::atan2_n_floats:
3529 opText = opArg1 + " = atan2(" + opArg1 + ", " + opArg2 + ")";
3530 break;
3531
3532 case POp::ceil_float:
3533 case POp::ceil_2_floats:
3534 case POp::ceil_3_floats:
3535 case POp::ceil_4_floats:
3536 opText = opArg1 + " = ceil(" + opArg1 + ")";
3537 break;
3538
3539 case POp::cos_float:
3540 opText = opArg1 + " = cos(" + opArg1 + ")";
3541 break;
3542
3543 case POp::refract_4_floats:
3544 opText = opArg1 + " = refract(" + opArg1 + ", " + opArg2 + ", " + opArg3 + ")";
3545 break;
3546
3547 case POp::dot_2_floats:
3548 case POp::dot_3_floats:
3549 case POp::dot_4_floats:
3550 opText = opArg1 + " = dot(" + opArg2 + ", " + opArg3 + ")";
3551 break;
3552
3553 case POp::exp_float:
3554 opText = opArg1 + " = exp(" + opArg1 + ")";
3555 break;
3556
3557 case POp::exp2_float:
3558 opText = opArg1 + " = exp2(" + opArg1 + ")";
3559 break;
3560
3561 case POp::log_float:
3562 opText = opArg1 + " = log(" + opArg1 + ")";
3563 break;
3564
3565 case POp::log2_float:
3566 opText = opArg1 + " = log2(" + opArg1 + ")";
3567 break;
3568
3569 case POp::pow_n_floats:
3570 opText = opArg1 + " = pow(" + opArg1 + ", " + opArg2 + ")";
3571 break;
3572
3573 case POp::sin_float:
3574 opText = opArg1 + " = sin(" + opArg1 + ")";
3575 break;
3576
3577 case POp::sqrt_float:
3578 opText = opArg1 + " = sqrt(" + opArg1 + ")";
3579 break;
3580
3581 case POp::tan_float:
3582 opText = opArg1 + " = tan(" + opArg1 + ")";
3583 break;
3584
3585 case POp::floor_float:
3586 case POp::floor_2_floats:
3587 case POp::floor_3_floats:
3588 case POp::floor_4_floats:
3589 opText = opArg1 + " = floor(" + opArg1 + ")";
3590 break;
3591
3592 case POp::invsqrt_float:
3593 case POp::invsqrt_2_floats:
3594 case POp::invsqrt_3_floats:
3595 case POp::invsqrt_4_floats:
3596 opText = opArg1 + " = inversesqrt(" + opArg1 + ")";
3597 break;
3598
3599 case POp::inverse_mat2:
3600 case POp::inverse_mat3:
3601 case POp::inverse_mat4:
3602 opText = opArg1 + " = inverse(" + opArg1 + ")";
3603 break;
3604
3605 case POp::add_float: case POp::add_int:
3606 case POp::add_2_floats: case POp::add_2_ints:
3607 case POp::add_3_floats: case POp::add_3_ints:
3608 case POp::add_4_floats: case POp::add_4_ints:
3609 case POp::add_n_floats: case POp::add_n_ints:
3610 case POp::add_imm_float: case POp::add_imm_int:
3611 opText = opArg1 + " += " + opArg2;
3612 break;
3613
3614 case POp::sub_float: case POp::sub_int:
3615 case POp::sub_2_floats: case POp::sub_2_ints:
3616 case POp::sub_3_floats: case POp::sub_3_ints:
3617 case POp::sub_4_floats: case POp::sub_4_ints:
3618 case POp::sub_n_floats: case POp::sub_n_ints:
3619 opText = opArg1 + " -= " + opArg2;
3620 break;
3621
3622 case POp::mul_float: case POp::mul_int:
3623 case POp::mul_2_floats: case POp::mul_2_ints:
3624 case POp::mul_3_floats: case POp::mul_3_ints:
3625 case POp::mul_4_floats: case POp::mul_4_ints:
3626 case POp::mul_n_floats: case POp::mul_n_ints:
3627 case POp::mul_imm_float: case POp::mul_imm_int:
3628 opText = opArg1 + " *= " + opArg2;
3629 break;
3630
3631 case POp::div_float: case POp::div_int: case POp::div_uint:
3632 case POp::div_2_floats: case POp::div_2_ints: case POp::div_2_uints:
3633 case POp::div_3_floats: case POp::div_3_ints: case POp::div_3_uints:
3634 case POp::div_4_floats: case POp::div_4_ints: case POp::div_4_uints:
3635 case POp::div_n_floats: case POp::div_n_ints: case POp::div_n_uints:
3636 opText = opArg1 + " /= " + opArg2;
3637 break;
3638
3639 case POp::matrix_multiply_2:
3640 case POp::matrix_multiply_3:
3641 case POp::matrix_multiply_4:
3642 opText = opArg1 + " = " + opArg2 + " * " + opArg3;
3643 break;
3644
3645 case POp::mod_float:
3646 case POp::mod_2_floats:
3647 case POp::mod_3_floats:
3648 case POp::mod_4_floats:
3649 case POp::mod_n_floats:
3650 opText = opArg1 + " = mod(" + opArg1 + ", " + opArg2 + ")";
3651 break;
3652
3653 case POp::min_float: case POp::min_int: case POp::min_uint:
3654 case POp::min_2_floats: case POp::min_2_ints: case POp::min_2_uints:
3655 case POp::min_3_floats: case POp::min_3_ints: case POp::min_3_uints:
3656 case POp::min_4_floats: case POp::min_4_ints: case POp::min_4_uints:
3657 case POp::min_n_floats: case POp::min_n_ints: case POp::min_n_uints:
3658 case POp::min_imm_float:
3659 opText = opArg1 + " = min(" + opArg1 + ", " + opArg2 + ")";
3660 break;
3661
3662 case POp::max_float: case POp::max_int: case POp::max_uint:
3663 case POp::max_2_floats: case POp::max_2_ints: case POp::max_2_uints:
3664 case POp::max_3_floats: case POp::max_3_ints: case POp::max_3_uints:
3665 case POp::max_4_floats: case POp::max_4_ints: case POp::max_4_uints:
3666 case POp::max_n_floats: case POp::max_n_ints: case POp::max_n_uints:
3667 case POp::max_imm_float:
3668 opText = opArg1 + " = max(" + opArg1 + ", " + opArg2 + ")";
3669 break;
3670
3671 case POp::cmplt_float: case POp::cmplt_int: case POp::cmplt_uint:
3672 case POp::cmplt_2_floats: case POp::cmplt_2_ints: case POp::cmplt_2_uints:
3673 case POp::cmplt_3_floats: case POp::cmplt_3_ints: case POp::cmplt_3_uints:
3674 case POp::cmplt_4_floats: case POp::cmplt_4_ints: case POp::cmplt_4_uints:
3675 case POp::cmplt_n_floats: case POp::cmplt_n_ints: case POp::cmplt_n_uints:
3676 case POp::cmplt_imm_float: case POp::cmplt_imm_int: case POp::cmplt_imm_uint:
3677 opText = opArg1 + " = lessThan(" + opArg1 + ", " + opArg2 + ")";
3678 break;
3679
3680 case POp::cmple_float: case POp::cmple_int: case POp::cmple_uint:
3681 case POp::cmple_2_floats: case POp::cmple_2_ints: case POp::cmple_2_uints:
3682 case POp::cmple_3_floats: case POp::cmple_3_ints: case POp::cmple_3_uints:
3683 case POp::cmple_4_floats: case POp::cmple_4_ints: case POp::cmple_4_uints:
3684 case POp::cmple_n_floats: case POp::cmple_n_ints: case POp::cmple_n_uints:
3685 case POp::cmple_imm_float: case POp::cmple_imm_int: case POp::cmple_imm_uint:
3686 opText = opArg1 + " = lessThanEqual(" + opArg1 + ", " + opArg2 + ")";
3687 break;
3688
3689 case POp::cmpeq_float: case POp::cmpeq_int:
3690 case POp::cmpeq_2_floats: case POp::cmpeq_2_ints:
3691 case POp::cmpeq_3_floats: case POp::cmpeq_3_ints:
3692 case POp::cmpeq_4_floats: case POp::cmpeq_4_ints:
3693 case POp::cmpeq_n_floats: case POp::cmpeq_n_ints:
3694 case POp::cmpeq_imm_float: case POp::cmpeq_imm_int:
3695 opText = opArg1 + " = equal(" + opArg1 + ", " + opArg2 + ")";
3696 break;
3697
3698 case POp::cmpne_float: case POp::cmpne_int:
3699 case POp::cmpne_2_floats: case POp::cmpne_2_ints:
3700 case POp::cmpne_3_floats: case POp::cmpne_3_ints:
3701 case POp::cmpne_4_floats: case POp::cmpne_4_ints:
3702 case POp::cmpne_n_floats: case POp::cmpne_n_ints:
3703 case POp::cmpne_imm_float: case POp::cmpne_imm_int:
3704 opText = opArg1 + " = notEqual(" + opArg1 + ", " + opArg2 + ")";
3705 break;
3706
3707 case POp::mix_float: case POp::mix_int:
3708 case POp::mix_2_floats: case POp::mix_2_ints:
3709 case POp::mix_3_floats: case POp::mix_3_ints:
3710 case POp::mix_4_floats: case POp::mix_4_ints:
3711 case POp::mix_n_floats: case POp::mix_n_ints:
3712 opText = opArg1 + " = mix(" + opArg2 + ", " + opArg3 + ", " + opArg1 + ")";
3713 break;
3714
3715 case POp::smoothstep_n_floats:
3716 opText = opArg1 + " = smoothstep(" + opArg1 + ", " + opArg2 + ", " + opArg3 + ")";
3717 break;
3718
3719 case POp::jump:
3720 case POp::branch_if_all_lanes_active:
3721 case POp::branch_if_any_lanes_active:
3722 case POp::branch_if_no_lanes_active:
3723 case POp::invoke_shader:
3724 case POp::invoke_color_filter:
3725 case POp::invoke_blender:
3726 opText = std::string(opName) + " " + opArg1;
3727 break;
3728
3729 case POp::invoke_to_linear_srgb:
3730 opText = opArg1 + " = toLinearSrgb(" + opArg1 + ")";
3731 break;
3732
3733 case POp::invoke_from_linear_srgb:
3734 opText = opArg1 + " = fromLinearSrgb(" + opArg1 + ")";
3735 break;
3736
3737 case POp::branch_if_no_active_lanes_eq:
3738 opText = "branch " + opArg1 + " if no lanes of " + opArg2 + " == " + opArg3;
3739 break;
3740
3741 case POp::label:
3742 opText = "label " + opArg1;
3743 break;
3744
3745 case POp::case_op:
3746 opText = "if (" + opArg1 + " == " + opArg3 +
3747 ") { LoopMask = true; " + opArg2 + " = false; }";
3748 break;
3749
3750 case POp::continue_op:
3751 opText = opArg1 +
3752 " |= Mask(0xFFFFFFFF); LoopMask &= ~(CondMask & LoopMask & RetMask)";
3753 break;
3754
3755 default:
3756 break;
3757 }
3758
3759 opName = opName.substr(0, 30);
3760 if (!opText.empty()) {
3761 out->writeText(SkSL::String::printf("%-30.*s %s\n",
3762 (int)opName.size(), opName.data(),
3763 opText.c_str()).c_str());
3764 } else {
3765 out->writeText(SkSL::String::printf("%.*s\n",
3766 (int)opName.size(), opName.data()).c_str());
3767 }
3768 }
3769}
3770
3771void Program::dump(SkWStream* out, bool writeInstructionCount) const {
3772 Dumper(*this).dump(out, writeInstructionCount);
3773}
3774
3775} // namespace SkSL::RP
int count
SkPoint pos
#define SkUNREACHABLE
Definition SkAssert.h:135
#define SkDEBUGFAIL(message)
Definition SkAssert.h:118
#define SkDEBUGFAILF(fmt,...)
Definition SkAssert.h:119
#define SkASSERT(cond)
Definition SkAssert.h:116
#define SkASSERTF(cond, fmt,...)
Definition SkAssert.h:117
static void sk_bzero(void *buffer, size_t size)
Definition SkMalloc.h:105
uint32_t SkRPOffset
#define SK_RASTER_PIPELINE_OPS_ALL(M)
static constexpr int kNumRasterPipelineHighpOps
#define ALL_N_WAY_TERNARY_OP_CASES
#define ALL_IMMEDIATE_BINARY_OP_CASES
#define ALL_MULTI_SLOT_TERNARY_OP_CASES
#define ALL_N_WAY_BINARY_OP_CASES
#define ALL_MULTI_SLOT_UNARY_OP_CASES
#define ALL_MULTI_SLOT_BINARY_OP_CASES
#define ALL_IMMEDIATE_MULTI_SLOT_BINARY_OP_CASES
#define ALL_SINGLE_SLOT_UNARY_OP_CASES
#define SKRP_EXTENDED_OPS(M)
constexpr int SkToInt(S x)
Definition SkTo.h:29
Type::kYUV Type::kRGBA() int(0.7 *637)
#define N
Definition beziers.cpp:19
void * makeBytesAlignedTo(size_t size, size_t align)
T * makeArray(size_t count)
auto make(Ctor &&ctor) -> decltype(ctor(nullptr))
int getNumStages() const
void append(SkRasterPipelineOp, void *=nullptr)
std::vector< SlotDebugInfo > fUniformInfo
std::vector< FunctionDebugInfo > fFuncInfo
std::vector< TraceInfo > fTraceInfo
std::vector< SlotDebugInfo > fSlotInfo
void copy_stack_to_slots_indirect(SlotRange fixedRange, int dynamicStackID, SlotRange limitRange)
void copy_slots_unmasked(SlotRange dst, SlotRange src)
void binary_op(BuilderOp op, int32_t slots)
void swizzle_copy_stack_to_slots(SlotRange dst, SkSpan< const int8_t > components, int offsetFromStackTop)
void copy_uniform_to_slots_unmasked(SlotRange dst, SlotRange src)
void copy_constant(Slot slot, int constantValue)
void branch_if_no_lanes_active(int labelID)
void push_clone_indirect_from_stack(SlotRange fixedOffset, int dynamicStackID, int otherStackID, int offsetFromStackTop)
void push_clone(int numSlots, int offsetFromStackTop=0)
void swizzle_copy_stack_to_slots_indirect(SlotRange fixedRange, int dynamicStackID, SlotRange limitRange, SkSpan< const int8_t > components, int offsetFromStackTop)
void zero_slots_unmasked(SlotRange dst)
void swizzle(int consumedSlots, SkSpan< const int8_t > components)
void matrix_resize(int origColumns, int origRows, int newColumns, int newRows)
void branch_if_no_active_lanes_on_stack_top_equal(int value, int labelID)
void push_uniform(SlotRange src)
void transpose(int columns, int rows)
void discard_stack(int32_t count, int stackID)
void trace_var_indirect(int traceMaskStackID, SlotRange fixedRange, int dynamicStackID, SlotRange limitRange)
void copy_stack_to_slots(SlotRange dst)
void copy_immutable_unmasked(SlotRange dst, SlotRange src)
std::unique_ptr< Program > finish(int numValueSlots, int numUniformSlots, int numImmutableSlots, DebugTracePriv *debugTrace=nullptr)
void push_slots_or_immutable_indirect(SlotRange fixedRange, int dynamicStack, SlotRange limitRange, BuilderOp op)
void push_uniform_indirect(SlotRange fixedRange, int dynamicStack, SlotRange limitRange)
void ternary_op(BuilderOp op, int32_t slots)
void diagonal_matrix(int columns, int rows)
void push_clone_from_stack(SlotRange range, int otherStackID, int offsetFromStackTop)
void unary_op(BuilderOp op, int32_t slots)
void push_constant_i(int32_t val, int count=1)
void push_slots_or_immutable(SlotRange src, BuilderOp op)
void matrix_multiply(int leftColumns, int leftRows, int rightColumns, int rightRows)
void branch_if_all_lanes_active(int labelID)
void pop_slots_unmasked(SlotRange dst)
void branch_if_any_lanes_active(int labelID)
void copy_stack_to_slots_unmasked(SlotRange dst)
virtual void toLinearSrgb(const void *color)=0
virtual bool appendShader(int index)=0
virtual bool appendBlender(int index)=0
virtual bool appendColorFilter(int index)=0
virtual void fromLinearSrgb(const void *color)=0
size_t swizzleWidth(SkSpan< T > offsets) const
std::string uniformName(SlotRange range) const
std::string immCtx(const void *ctx, bool showAsFloat=true) const
std::tuple< std::string, std::string > adjacentPtrCtx(const void *ctx, int numSlots) const
std::byte * offsetToPtr(SkRPOffset offset) const
std::tuple< std::string, std::string > swizzleCopyCtx(ProgramOp op, const void *v) const
std::string uniformPtrCtx(const float *ptr, int numSlots) const
std::string slotName(SlotRange range) const
void dump(SkWStream *out, bool writeInstructionCount)
std::tuple< std::string, std::string > copyUniformCtx(const void *v, int numSlots) const
std::string imm(float immFloat, bool showAsFloat=true) const
std::string valuePtrCtx(const float *ptr, int numSlots) const
std::tuple< std::string, std::string > adjacentOffsetCtx(SkRPOffset offset, int numSlots) const
std::tuple< std::string, std::string > constantCtx(const void *v, int slots, bool showAsFloat=true) const
std::string swizzlePtr(const void *ptr, SkSpan< T > offsets) const
std::string slotOrUniformName(SkSpan< const SlotDebugInfo > debugInfo, SkSpan< const std::string > names, SlotRange range) const
std::string swizzleOffsetSpan(SkSpan< T > offsets) const
std::tuple< std::string, std::string > adjacentBinaryOpCtx(const void *v) const
std::tuple< std::string, std::string, std::string > adjacentTernaryOpCtx(const void *v) const
std::tuple< std::string, std::string, std::string > matrixMultiply(const void *v) const
std::string ptrCtx(const void *ctx, int numSlots) const
std::tuple< std::string, std::string, std::string > adjacent3OffsetCtx(SkRPOffset offset, int numSlots) const
std::string multiImmCtx(const float *ptr, int count) const
std::string branchOffset(const SkRasterPipeline_BranchCtx *ctx, int index) const
std::tuple< std::string, std::string, std::string > adjacent3PtrCtx(const void *ctx, int numSlots) const
std::string asRange(int first, int count) const
std::string offsetCtx(SkRPOffset offset, int numSlots) const
std::tuple< std::string, std::string > shuffleCtx(const void *v) const
std::tuple< std::string, std::string > swizzleCtx(ProgramOp op, const void *v) const
std::tuple< std::string, std::string > binaryOpCtx(const void *v, int numSlots) const
std::string immutablePtrCtx(const float *ptr, int numSlots) const
bool appendStages(SkRasterPipeline *pipeline, SkArenaAlloc *alloc, Callbacks *callbacks, SkSpan< const float > uniforms) const
void dump(SkWStream *out, bool writeInstructionCount=false) const
Program(skia_private::TArray< Instruction > instrs, int numValueSlots, int numUniformSlots, int numImmutableSlots, int numLabels, DebugTracePriv *debugTrace)
static std::unique_ptr< Tracer > Make(std::vector< TraceInfo > *traceInfo)
constexpr T * begin() const
Definition SkSpan_impl.h:90
constexpr T * end() const
Definition SkSpan_impl.h:91
constexpr bool empty() const
Definition SkSpan_impl.h:96
constexpr auto rbegin() const
Definition SkSpan_impl.h:92
constexpr auto rend() const
Definition SkSpan_impl.h:93
constexpr size_t size() const
Definition SkSpan_impl.h:95
T * push_back_n(int n)
Definition SkTArray.h:262
void resize(size_t count)
Definition SkTArray.h:418
int size() const
Definition SkTArray.h:416
void reserve_exact(int n)
Definition SkTArray.h:176
int count() const
Definition SkTHash.h:460
V * find(const K &key) const
Definition SkTHash.h:479
struct MyStruct s
glong glong end
uint8_t value
const char * name
Definition fuchsia.cc:50
std::u16string text
double y
double x
size_t raster_pipeline_highp_stride
Definition SkOpts.cpp:26
static void * Pack(const T &ctx, SkArenaAlloc *alloc)
static UnpackedType< T > Unpack(const T *ctx)
static int max_packed_nybble(uint32_t components, size_t numComponents)
constexpr Slot NA
static bool immutable_data_is_splattable(int32_t *immutablePtr, int numSlots)
static bool is_immediate_op(BuilderOp op)
static void unpack_nybbles_to_offsets(uint32_t components, SkSpan< T > offsets)
static bool is_multi_slot_immediate_op(BuilderOp op)
static int pack_nybbles(SkSpan< const int8_t > components)
static void * context_bit_pun(intptr_t val)
@ swizzle_copy_stack_to_slots_indirect
@ branch_if_no_active_lanes_on_stack_top_equal
static bool slot_ranges_overlap(SlotRange x, SlotRange y)
static int stack_usage(const Instruction &inst)
static BuilderOp convert_n_way_op_to_immediate(BuilderOp op, int slots, int32_t *constantValue)
std::string printf(const char *fmt,...) SK_PRINTF_LIKE(1
std::string void void auto Separator()
Definition SkSLString.h:30
dst
Definition cp.py:12
std::string to_string(float value)
Definition ref_ptr.h:256
static const char header[]
Definition skpbench.cpp:88
Point offset