df/d36/SkSLRasterPipelineBuilder_8cpp_source.html

/*

 * Copyright 2022 Google Inc.

 *

 * Use of this source code is governed by a BSD-style license that can be

 * found in the LICENSE file.

 */


#include "src/sksl/codegen/SkSLRasterPipelineBuilder.h"


#include "include/core/SkStream.h"

#include "include/private/base/SkMalloc.h"

#include "include/private/base/SkTo.h"

#include "src/base/SkArenaAlloc.h"

#include "src/core/SkOpts.h"

#include "src/core/SkRasterPipelineContextUtils.h"

#include "src/core/SkRasterPipelineOpContexts.h"

#include "src/core/SkRasterPipelineOpList.h"

#include "src/core/SkTHash.h"

#include "src/sksl/SkSLPosition.h"

#include "src/sksl/SkSLString.h"

#include "src/sksl/tracing/SkSLDebugTracePriv.h"

#include "src/sksl/tracing/SkSLTraceHook.h"

#include "src/utils/SkBitSet.h"


#if !defined(SKSL_STANDALONE)

#include "src/core/SkRasterPipeline.h"

#endif


#include <algorithm>

#include <cmath>

#include <cstddef>

#include <cstring>

#include <iterator>

#include <string>

#include <string_view>

#include <tuple>

#include <utility>

#include <vector>


using namespace skia_private;


namespace SkSL::RP {


#define ALL_SINGLE_SLOT_UNARY_OP_CASES  \

         BuilderOp::acos_float:         \

    case BuilderOp::asin_float:         \

    case BuilderOp::atan_float:         \

    case BuilderOp::cos_float:          \

    case BuilderOp::exp_float:          \

    case BuilderOp::exp2_float:         \

    case BuilderOp::log_float:          \

    case BuilderOp::log2_float:         \

    case BuilderOp::sin_float:          \

    case BuilderOp::sqrt_float:         \

    case BuilderOp::tan_float


#define ALL_MULTI_SLOT_UNARY_OP_CASES        \

         BuilderOp::abs_int:                 \

    case BuilderOp::cast_to_float_from_int:  \

    case BuilderOp::cast_to_float_from_uint: \

    case BuilderOp::cast_to_int_from_float:  \

    case BuilderOp::cast_to_uint_from_float: \

    case BuilderOp::ceil_float:              \

    case BuilderOp::floor_float:             \

    case BuilderOp::invsqrt_float


#define ALL_N_WAY_BINARY_OP_CASES   \

         BuilderOp::atan2_n_floats: \

    case BuilderOp::pow_n_floats


#define ALL_MULTI_SLOT_BINARY_OP_CASES  \

         BuilderOp::add_n_floats:       \

    case BuilderOp::add_n_ints:         \

    case BuilderOp::sub_n_floats:       \

    case BuilderOp::sub_n_ints:         \

    case BuilderOp::mul_n_floats:       \

    case BuilderOp::mul_n_ints:         \

    case BuilderOp::div_n_floats:       \

    case BuilderOp::div_n_ints:         \

    case BuilderOp::div_n_uints:        \

    case BuilderOp::bitwise_and_n_ints: \

    case BuilderOp::bitwise_or_n_ints:  \

    case BuilderOp::bitwise_xor_n_ints: \

    case BuilderOp::mod_n_floats:       \

    case BuilderOp::min_n_floats:       \

    case BuilderOp::min_n_ints:         \

    case BuilderOp::min_n_uints:        \

    case BuilderOp::max_n_floats:       \

    case BuilderOp::max_n_ints:         \

    case BuilderOp::max_n_uints:        \

    case BuilderOp::cmple_n_floats:     \

    case BuilderOp::cmple_n_ints:       \

    case BuilderOp::cmple_n_uints:      \

    case BuilderOp::cmplt_n_floats:     \

    case BuilderOp::cmplt_n_ints:       \

    case BuilderOp::cmplt_n_uints:      \

    case BuilderOp::cmpeq_n_floats:     \

    case BuilderOp::cmpeq_n_ints:       \

    case BuilderOp::cmpne_n_floats:     \

    case BuilderOp::cmpne_n_ints


#define ALL_IMMEDIATE_BINARY_OP_CASES    \

         BuilderOp::add_imm_float:       \

    case BuilderOp::add_imm_int:         \

    case BuilderOp::mul_imm_float:       \

    case BuilderOp::mul_imm_int:         \

    case BuilderOp::bitwise_and_imm_int: \

    case BuilderOp::bitwise_xor_imm_int: \

    case BuilderOp::min_imm_float:       \

    case BuilderOp::max_imm_float:       \

    case BuilderOp::cmple_imm_float:     \

    case BuilderOp::cmple_imm_int:       \

    case BuilderOp::cmple_imm_uint:      \

    case BuilderOp::cmplt_imm_float:     \

    case BuilderOp::cmplt_imm_int:       \

    case BuilderOp::cmplt_imm_uint:      \

    case BuilderOp::cmpeq_imm_float:     \

    case BuilderOp::cmpeq_imm_int:       \

    case BuilderOp::cmpne_imm_float:     \

    case BuilderOp::cmpne_imm_int


#define ALL_IMMEDIATE_MULTI_SLOT_BINARY_OP_CASES \

         BuilderOp::bitwise_and_imm_int


#define ALL_N_WAY_TERNARY_OP_CASES       \

         BuilderOp::smoothstep_n_floats


#define ALL_MULTI_SLOT_TERNARY_OP_CASES \

         BuilderOp::mix_n_floats:       \

    case BuilderOp::mix_n_ints


static bool is_immediate_op(BuilderOp op) {

    switch (op) {

        case ALL_IMMEDIATE_BINARY_OP_CASES: return true;

        default:                            return false;

    }

}


static bool is_multi_slot_immediate_op(BuilderOp op) {

    switch (op) {

        case ALL_IMMEDIATE_MULTI_SLOT_BINARY_OP_CASES: return true;

        default:                                       return false;

    }

}


static BuilderOp convert_n_way_op_to_immediate(BuilderOp op, int slots, int32_t* constantValue) {

    // We rely on the exact ordering of SkRP ops here; the immediate-mode op must always come

    // directly before the n-way op. (If we have more than one, the increasing-slot variations

    // continue backwards from there.)

    BuilderOp immOp = (BuilderOp)((int)op - 1);


    // Some immediate ops support multiple slots.

    if (is_multi_slot_immediate_op(immOp)) {

        return immOp;

    }


    // Most immediate ops only directly support a single slot. However, it's still faster to execute

    // `add_imm_int, add_imm_int` instead of `splat_2_ints, add_2_ints`, so we allow those

    // conversions as well.

    if (slots <= 2) {

        if (is_immediate_op(immOp)) {

            return immOp;

        }


        // We also allow for immediate-mode subtraction, by adding a negative value.

        switch (op) {

            case BuilderOp::sub_n_ints:

                *constantValue *= -1;

                return BuilderOp::add_imm_int;


            case BuilderOp::sub_n_floats: {

                // This negates the floating-point value by inverting its sign bit.

                *constantValue ^= 0x80000000;

                return BuilderOp::add_imm_float;

            }

            default:

                break;

        }

    }


    // We don't have an immediate-mode version of this op.

    return op;

}


void Builder::appendInstruction(BuilderOp op, SlotList slots,

                                int immA, int immB, int immC, int immD) {

    fInstructions.push_back({op, slots.fSlotA, slots.fSlotB,

                             immA, immB, immC, immD, fCurrentStackID});

}


Instruction* Builder::lastInstruction(int fromBack) {

    if (fInstructions.size() <= fromBack) {

        return nullptr;

    }

    Instruction* inst = &fInstructions.fromBack(fromBack);

    if (inst->fStackID != fCurrentStackID) {

        return nullptr;

    }

    return inst;

}


Instruction* Builder::lastInstructionOnAnyStack(int fromBack) {

    if (fInstructions.size() <= fromBack) {

        return nullptr;

    }

    return &fInstructions.fromBack(fromBack);

}


void Builder::unary_op(BuilderOp op, int32_t slots) {

    switch (op) {

        case ALL_SINGLE_SLOT_UNARY_OP_CASES:

        case ALL_MULTI_SLOT_UNARY_OP_CASES:

            this->appendInstruction(op, {}, slots);

            break;


        default:

            SkDEBUGFAIL("not a unary op");

            break;

    }

}


void Builder::binary_op(BuilderOp op, int32_t slots) {

    if (Instruction* lastInstruction = this->lastInstruction()) {

        // If we just pushed or splatted a constant onto the stack...

        if (lastInstruction->fOp == BuilderOp::push_constant &&

            lastInstruction->fImmA >= slots) {

            // ... and this op has an immediate-mode equivalent...

            int32_t constantValue = lastInstruction->fImmB;

            BuilderOp immOp = convert_n_way_op_to_immediate(op, slots, &constantValue);

            if (immOp != op) {

                // ... discard the constants from the stack, and use an immediate-mode op.

                this->discard_stack(slots);

                this->appendInstruction(immOp, {}, slots, constantValue);

                return;

            }

        }

    }


    switch (op) {

        case ALL_N_WAY_BINARY_OP_CASES:

        case ALL_MULTI_SLOT_BINARY_OP_CASES:

            this->appendInstruction(op, {}, slots);

            break;


        default:

            SkDEBUGFAIL("not a binary op");

            break;

    }

}


void Builder::ternary_op(BuilderOp op, int32_t slots) {

    switch (op) {

        case ALL_N_WAY_TERNARY_OP_CASES:

        case ALL_MULTI_SLOT_TERNARY_OP_CASES:

            this->appendInstruction(op, {}, slots);

            break;


        default:

            SkDEBUGFAIL("not a ternary op");

            break;

    }

}


void Builder::dot_floats(int32_t slots) {

    switch (slots) {

        case 1: this->appendInstruction(BuilderOp::mul_n_floats, {}, slots); break;

        case 2: this->appendInstruction(BuilderOp::dot_2_floats, {}, slots); break;

        case 3: this->appendInstruction(BuilderOp::dot_3_floats, {}, slots); break;

        case 4: this->appendInstruction(BuilderOp::dot_4_floats, {}, slots); break;


        default:

            SkDEBUGFAIL("invalid number of slots");

            break;

    }

}


void Builder::refract_floats() {

    this->appendInstruction(BuilderOp::refract_4_floats, {});

}


void Builder::inverse_matrix(int32_t n) {

    switch (n) {

        case 2:  this->appendInstruction(BuilderOp::inverse_mat2, {}, 4);  break;

        case 3:  this->appendInstruction(BuilderOp::inverse_mat3, {}, 9);  break;

        case 4:  this->appendInstruction(BuilderOp::inverse_mat4, {}, 16); break;

        default: SkUNREACHABLE;

    }

}


void Builder::pad_stack(int32_t count) {

    if (count > 0) {

        this->appendInstruction(BuilderOp::pad_stack, {}, count);

    }

}


bool Builder::simplifyImmediateUnmaskedOp() {

    if (fInstructions.size() < 3) {

        return false;

    }


    // If we detect a pattern of 'push, immediate-op, unmasked pop', then we can

    // convert it into an immediate-op directly onto the value slots and take the

    // stack entirely out of the equation.

    Instruction* popInstruction  = this->lastInstruction(/*fromBack=*/0);

    Instruction* immInstruction  = this->lastInstruction(/*fromBack=*/1);

    Instruction* pushInstruction = this->lastInstruction(/*fromBack=*/2);


    // If the last instruction is an unmasked pop...

    if (popInstruction && immInstruction && pushInstruction &&

        popInstruction->fOp == BuilderOp::copy_stack_to_slots_unmasked) {

        // ... and the prior instruction was an immediate-mode op, with the same number of slots...

        if (is_immediate_op(immInstruction->fOp) &&

            immInstruction->fImmA == popInstruction->fImmA) {

            // ... and we support multiple-slot immediates (if this op calls for it)...

            if (immInstruction->fImmA == 1 || is_multi_slot_immediate_op(immInstruction->fOp)) {

                // ... and the prior instruction was `push_slots` or `push_immutable` of at least

                // that many slots...

                if ((pushInstruction->fOp == BuilderOp::push_slots ||

                     pushInstruction->fOp == BuilderOp::push_immutable) &&

                    pushInstruction->fImmA >= popInstruction->fImmA) {

                    // ... onto the same slot range...

                    Slot immSlot = popInstruction->fSlotA + popInstruction->fImmA;

                    Slot pushSlot = pushInstruction->fSlotA + pushInstruction->fImmA;

                    if (immSlot == pushSlot) {

                        // ... we can shrink the push, eliminate the pop, and perform the immediate

                        // op in-place instead.

                        pushInstruction->fImmA -= immInstruction->fImmA;

                        immInstruction->fSlotA = immSlot - immInstruction->fImmA;

                        fInstructions.pop_back();

                        return true;

                    }

                }

            }

        }

    }


    return false;

}


void Builder::discard_stack(int32_t count, int stackID) {

    // If we pushed something onto the stack and then immediately discarded part of it, we can

    // shrink or eliminate the push.

    while (count > 0) {

        Instruction* lastInstruction = this->lastInstructionOnAnyStack();

        if (!lastInstruction || lastInstruction->fStackID != stackID) {

            break;

        }


        switch (lastInstruction->fOp) {

            case BuilderOp::discard_stack:

                // Our last op was actually a separate discard_stack; combine the discards.

                lastInstruction->fImmA += count;

                return;


            case BuilderOp::push_clone:

            case BuilderOp::push_clone_from_stack:

            case BuilderOp::push_clone_indirect_from_stack:

            case BuilderOp::push_constant:

            case BuilderOp::push_immutable:

            case BuilderOp::push_immutable_indirect:

            case BuilderOp::push_slots:

            case BuilderOp::push_slots_indirect:

            case BuilderOp::push_uniform:

            case BuilderOp::push_uniform_indirect:

            case BuilderOp::pad_stack: {

                // Our last op was a multi-slot push; these cancel out. Eliminate the op if its

                // count reached zero.

                int cancelOut = std::min(count, lastInstruction->fImmA);

                count                  -= cancelOut;

                lastInstruction->fImmA -= cancelOut;

                if (lastInstruction->fImmA == 0) {

                    fInstructions.pop_back();

                }

                continue;

            }

            case BuilderOp::push_condition_mask:

            case BuilderOp::push_loop_mask:

            case BuilderOp::push_return_mask:

                // Our last op was a single-slot push; cancel out one discard and eliminate the op.

                --count;

                fInstructions.pop_back();

                continue;


            case BuilderOp::copy_stack_to_slots_unmasked: {

                // Look for a pattern of `push, immediate-ops, pop` and simplify it down to an

                // immediate-op directly to the value slot.

                if (count == 1) {

                    if (this->simplifyImmediateUnmaskedOp()) {

                        return;

                    }

                }


                // A `copy_stack_to_slots_unmasked` op, followed immediately by a `discard_stack`

                // op with an equal number of slots, is interpreted as an unmasked stack pop.

                // We can simplify pops in a variety of ways. First, temporarily get rid of

                // `copy_stack_to_slots_unmasked`.

                if (count == lastInstruction->fImmA) {

                    SlotRange dst{lastInstruction->fSlotA, lastInstruction->fImmA};

                    fInstructions.pop_back();


                    // See if we can write this pop in a simpler way.

                    this->simplifyPopSlotsUnmasked(&dst);


                    // If simplification consumed the entire range, we're done!

                    if (dst.count == 0) {

                        return;

                    }


                    // Simplification did not consume the entire range. We are still responsible for

                    // copying-back and discarding any remaining slots.

                    this->copy_stack_to_slots_unmasked(dst);

                    count = dst.count;

                }

                break;

            }

            default:

                break;

        }


        // This instruction wasn't a push.

        break;

    }


    if (count > 0) {

        this->appendInstruction(BuilderOp::discard_stack, {}, count);

    }

}


void Builder::label(int labelID) {

    SkASSERT(labelID >= 0 && labelID < fNumLabels);


    // If the previous instruction was a branch to this label, it's a no-op; jumping to the very

    // next instruction is effectively meaningless.

    while (const Instruction* lastInstruction = this->lastInstructionOnAnyStack()) {

        switch (lastInstruction->fOp) {

            case BuilderOp::jump:

            case BuilderOp::branch_if_all_lanes_active:

            case BuilderOp::branch_if_any_lanes_active:

            case BuilderOp::branch_if_no_lanes_active:

            case BuilderOp::branch_if_no_active_lanes_on_stack_top_equal:

                if (lastInstruction->fImmA == labelID) {

                    fInstructions.pop_back();

                    continue;

                }

                break;


            default:

                break;

        }

        break;

    }

    this->appendInstruction(BuilderOp::label, {}, labelID);

}


void Builder::jump(int labelID) {

    SkASSERT(labelID >= 0 && labelID < fNumLabels);

    if (const Instruction* lastInstruction = this->lastInstructionOnAnyStack()) {

        if (lastInstruction->fOp == BuilderOp::jump) {

            // The previous instruction was also `jump`, so this branch could never possibly occur.

            return;

        }

    }

    this->appendInstruction(BuilderOp::jump, {}, labelID);

}


void Builder::branch_if_any_lanes_active(int labelID) {

    if (!this->executionMaskWritesAreEnabled()) {

        this->jump(labelID);

        return;

    }


    SkASSERT(labelID >= 0 && labelID < fNumLabels);

    if (const Instruction* lastInstruction = this->lastInstructionOnAnyStack()) {

        if (lastInstruction->fOp == BuilderOp::branch_if_any_lanes_active ||

            lastInstruction->fOp == BuilderOp::jump) {

            // The previous instruction was `jump` or `branch_if_any_lanes_active`, so this branch

            // could never possibly occur.

            return;

        }

    }

    this->appendInstruction(BuilderOp::branch_if_any_lanes_active, {}, labelID);

}


void Builder::branch_if_all_lanes_active(int labelID) {

    if (!this->executionMaskWritesAreEnabled()) {

        this->jump(labelID);

        return;

    }


    SkASSERT(labelID >= 0 && labelID < fNumLabels);

    if (const Instruction* lastInstruction = this->lastInstructionOnAnyStack()) {

        if (lastInstruction->fOp == BuilderOp::branch_if_all_lanes_active ||

            lastInstruction->fOp == BuilderOp::jump) {

            // The previous instruction was `jump` or `branch_if_all_lanes_active`, so this branch

            // could never possibly occur.

            return;

        }

    }

    this->appendInstruction(BuilderOp::branch_if_all_lanes_active, {}, labelID);

}


void Builder::branch_if_no_lanes_active(int labelID) {

    if (!this->executionMaskWritesAreEnabled()) {

        return;

    }


    SkASSERT(labelID >= 0 && labelID < fNumLabels);

    if (const Instruction* lastInstruction = this->lastInstructionOnAnyStack()) {

        if (lastInstruction->fOp == BuilderOp::branch_if_no_lanes_active ||

            lastInstruction->fOp == BuilderOp::jump) {

            // The previous instruction was `jump` or `branch_if_no_lanes_active`, so this branch

            // could never possibly occur.

            return;

        }

    }

    this->appendInstruction(BuilderOp::branch_if_no_lanes_active, {}, labelID);

}


void Builder::branch_if_no_active_lanes_on_stack_top_equal(int value, int labelID) {

    SkASSERT(labelID >= 0 && labelID < fNumLabels);

    if (const Instruction* lastInstruction = this->lastInstructionOnAnyStack()) {

        if (lastInstruction->fOp == BuilderOp::jump ||

            (lastInstruction->fOp == BuilderOp::branch_if_no_active_lanes_on_stack_top_equal &&

             lastInstruction->fImmB == value)) {

            // The previous instruction was `jump` or `branch_if_no_active_lanes_on_stack_top_equal`

            // (checking against the same value), so this branch could never possibly occur.

            return;

        }

    }

    this->appendInstruction(BuilderOp::branch_if_no_active_lanes_on_stack_top_equal,

                            {}, labelID, value);

}


void Builder::push_slots_or_immutable(SlotRange src, BuilderOp op) {

    SkASSERT(src.count >= 0);

    if (Instruction* lastInstruction = this->lastInstruction()) {

        // If the previous instruction was pushing slots contiguous to this range, we can collapse

        // the two pushes into one larger push.

        if (lastInstruction->fOp == op &&

            lastInstruction->fSlotA + lastInstruction->fImmA == src.index) {

            lastInstruction->fImmA += src.count;

            src.count = 0;

        }

    }


    if (src.count > 0) {

        this->appendInstruction(op, {src.index}, src.count);

    }


    // Look for a sequence of "copy stack to X, discard stack, copy X to stack". This is a common

    // pattern when multiple operations in a row affect the same variable. When we see this, we can

    // eliminate both the discard and the push.

    if (fInstructions.size() >= 3) {

        const Instruction* pushInst        = this->lastInstruction(/*fromBack=*/0);

        const Instruction* discardInst     = this->lastInstruction(/*fromBack=*/1);

        const Instruction* copyToSlotsInst = this->lastInstruction(/*fromBack=*/2);


        if (pushInst && discardInst && copyToSlotsInst && pushInst->fOp == BuilderOp::push_slots) {

            int pushIndex = pushInst->fSlotA;

            int pushCount = pushInst->fImmA;


            // Look for a `discard_stack` matching our push count.

            if (discardInst->fOp == BuilderOp::discard_stack && discardInst->fImmA == pushCount) {

                // Look for a `copy_stack_to_slots` matching our push.

                if ((copyToSlotsInst->fOp == BuilderOp::copy_stack_to_slots ||

                     copyToSlotsInst->fOp == BuilderOp::copy_stack_to_slots_unmasked) &&

                    copyToSlotsInst->fSlotA == pushIndex && copyToSlotsInst->fImmA == pushCount) {

                    // We found a matching sequence. Remove the discard and push.

                    fInstructions.pop_back();

                    fInstructions.pop_back();

                    return;

                }

            }

        }

    }

}


void Builder::push_slots_or_immutable_indirect(SlotRange fixedRange,

                                               int dynamicStackID,

                                               SlotRange limitRange,

                                               BuilderOp op) {

    // SlotA: fixed-range start

    // SlotB: limit-range end

    // immA: number of slots

    // immB: dynamic stack ID

    this->appendInstruction(op,

                            {fixedRange.index, limitRange.index + limitRange.count},

                            fixedRange.count,

                            dynamicStackID);

}


void Builder::push_uniform(SlotRange src) {

    SkASSERT(src.count >= 0);

    if (Instruction* lastInstruction = this->lastInstruction()) {

        // If the previous instruction was pushing uniforms contiguous to this range, we can

        // collapse the two pushes into one larger push.

        if (lastInstruction->fOp == BuilderOp::push_uniform &&

            lastInstruction->fSlotA + lastInstruction->fImmA == src.index) {

            lastInstruction->fImmA += src.count;

            return;

        }

    }


    if (src.count > 0) {

        this->appendInstruction(BuilderOp::push_uniform, {src.index}, src.count);

    }

}


void Builder::push_uniform_indirect(SlotRange fixedRange,

                                    int dynamicStackID,

                                    SlotRange limitRange) {

    // SlotA: fixed-range start

    // SlotB: limit-range end

    // immA: number of slots

    // immB: dynamic stack ID

    this->appendInstruction(BuilderOp::push_uniform_indirect,

                            {fixedRange.index, limitRange.index + limitRange.count},

                            fixedRange.count,

                            dynamicStackID);

}


void Builder::trace_var_indirect(int traceMaskStackID,

                                 SlotRange fixedRange,

                                 int dynamicStackID,

                                 SlotRange limitRange) {

    // SlotA: fixed-range start

    // SlotB: limit-range end

    // immA: trace-mask stack ID

    // immB: number of slots

    // immC: dynamic stack ID

    this->appendInstruction(BuilderOp::trace_var_indirect,

                            {fixedRange.index, limitRange.index + limitRange.count},

                            traceMaskStackID,

                            fixedRange.count,

                            dynamicStackID);

}


void Builder::push_constant_i(int32_t val, int count) {

    SkASSERT(count >= 0);

    if (count > 0) {

        if (Instruction* lastInstruction = this->lastInstruction()) {

            // If the previous op is pushing the same value, we can just push more of them.

            if (lastInstruction->fOp == BuilderOp::push_constant && lastInstruction->fImmB == val) {

                lastInstruction->fImmA += count;

                return;

            }

        }

        this->appendInstruction(BuilderOp::push_constant, {}, count, val);

    }

}


void Builder::push_duplicates(int count) {

    if (Instruction* lastInstruction = this->lastInstruction()) {

        // If the previous op is pushing a constant, we can just push more of them.

        if (lastInstruction->fOp == BuilderOp::push_constant) {

            lastInstruction->fImmA += count;

            return;

        }

    }

    SkASSERT(count >= 0);

    if (count >= 3) {

        // Use a swizzle to splat the input into a 4-slot value.

        this->swizzle(/*consumedSlots=*/1, {0, 0, 0, 0});

        count -= 3;

    }

    for (; count >= 4; count -= 4) {

        // Clone the splatted value four slots at a time.

        this->push_clone(/*numSlots=*/4);

    }

    // Use a swizzle or clone to handle the trailing items.

    switch (count) {

        case 3:  this->swizzle(/*consumedSlots=*/1, {0, 0, 0, 0}); break;

        case 2:  this->swizzle(/*consumedSlots=*/1, {0, 0, 0});    break;

        case 1:  this->push_clone(/*numSlots=*/1);                 break;

        default: break;

    }

}


void Builder::push_clone(int numSlots, int offsetFromStackTop) {

    // If we are cloning the stack top...

    if (numSlots == 1 && offsetFromStackTop == 0) {

        // ... and the previous op is pushing a constant...

        if (Instruction* lastInstruction = this->lastInstruction()) {

            if (lastInstruction->fOp == BuilderOp::push_constant) {

                // ... we can just push more of them.

                lastInstruction->fImmA += 1;

                return;

            }

        }

    }

    this->appendInstruction(BuilderOp::push_clone, {}, numSlots, numSlots + offsetFromStackTop);

}


void Builder::push_clone_from_stack(SlotRange range, int otherStackID, int offsetFromStackTop) {

    // immA: number of slots

    // immB: other stack ID

    // immC: offset from stack top

    offsetFromStackTop -= range.index;


    if (Instruction* lastInstruction = this->lastInstruction()) {

        // If the previous op is also pushing a clone...

        if (lastInstruction->fOp == BuilderOp::push_clone_from_stack &&

            // ... from the same stack...

            lastInstruction->fImmB == otherStackID &&

            // ... and this clone starts at the same place that the last clone ends...

            lastInstruction->fImmC - lastInstruction->fImmA == offsetFromStackTop) {

            // ... just extend the existing clone-op.

            lastInstruction->fImmA += range.count;

            return;

        }

    }


    this->appendInstruction(BuilderOp::push_clone_from_stack, {},

                            range.count, otherStackID, offsetFromStackTop);

}


void Builder::push_clone_indirect_from_stack(SlotRange fixedOffset,

                                             int dynamicStackID,

                                             int otherStackID,

                                             int offsetFromStackTop) {

    // immA: number of slots

    // immB: other stack ID

    // immC: offset from stack top

    // immD: dynamic stack ID

    offsetFromStackTop -= fixedOffset.index;


    this->appendInstruction(BuilderOp::push_clone_indirect_from_stack, {},

                            fixedOffset.count, otherStackID, offsetFromStackTop, dynamicStackID);

}


void Builder::pop_slots(SlotRange dst) {

    if (!this->executionMaskWritesAreEnabled()) {

        this->pop_slots_unmasked(dst);

        return;

    }


    this->copy_stack_to_slots(dst);

    this->discard_stack(dst.count);

}


void Builder::simplifyPopSlotsUnmasked(SlotRange* dst) {

    if (!dst->count) {

        // There's nothing left to simplify.

        return;

    }

    Instruction* lastInstruction = this->lastInstruction();

    if (!lastInstruction) {

        // There's nothing left to simplify.

        return;

    }

    BuilderOp lastOp = lastInstruction->fOp;


    // If the last instruction is pushing a constant, we can simplify it by copying the constant

    // directly into the destination slot.

    if (lastOp == BuilderOp::push_constant) {

        // Get the last slot.

        int32_t value = lastInstruction->fImmB;

        lastInstruction->fImmA--;

        if (lastInstruction->fImmA == 0) {

            fInstructions.pop_back();

        }


        // Consume one destination slot.

        dst->count--;

        Slot destinationSlot = dst->index + dst->count;


        // Continue simplifying if possible.

        this->simplifyPopSlotsUnmasked(dst);


        // Write the constant directly to the destination slot.

        this->copy_constant(destinationSlot, value);

        return;

    }


    // If the last instruction is pushing a uniform, we can simplify it by copying the uniform

    // directly into the destination slot.

    if (lastOp == BuilderOp::push_uniform) {

        // Get the last slot.

        Slot sourceSlot = lastInstruction->fSlotA + lastInstruction->fImmA - 1;

        lastInstruction->fImmA--;

        if (lastInstruction->fImmA == 0) {

            fInstructions.pop_back();

        }


        // Consume one destination slot.

        dst->count--;

        Slot destinationSlot = dst->index + dst->count;


        // Continue simplifying if possible.

        this->simplifyPopSlotsUnmasked(dst);


        // Write the constant directly to the destination slot.

        this->copy_uniform_to_slots_unmasked({destinationSlot, 1}, {sourceSlot, 1});

        return;

    }


    // If the last instruction is pushing a slot or immutable, we can just copy that slot.

    if (lastOp == BuilderOp::push_slots || lastOp == BuilderOp::push_immutable) {

        // Get the last slot.

        Slot sourceSlot = lastInstruction->fSlotA + lastInstruction->fImmA - 1;

        lastInstruction->fImmA--;

        if (lastInstruction->fImmA == 0) {

            fInstructions.pop_back();

        }


        // Consume one destination slot.

        dst->count--;

        Slot destinationSlot = dst->index + dst->count;


        // Try once more.

        this->simplifyPopSlotsUnmasked(dst);


        // Copy the slot directly.

        if (lastOp == BuilderOp::push_slots) {

            if (destinationSlot != sourceSlot) {

                this->copy_slots_unmasked({destinationSlot, 1}, {sourceSlot, 1});

            } else {

                // Copying from a value-slot into the same value-slot is a no-op.

            }

        } else {

            // Copy from immutable data directly to the destination slot.

            this->copy_immutable_unmasked({destinationSlot, 1}, {sourceSlot, 1});

        }

        return;

    }

}


void Builder::pop_slots_unmasked(SlotRange dst) {

    SkASSERT(dst.count >= 0);

    this->copy_stack_to_slots_unmasked(dst);

    this->discard_stack(dst.count);

}


void Builder::exchange_src() {

    if (Instruction* lastInstruction = this->lastInstruction()) {

        // If the previous op is also an exchange-src...

        if (lastInstruction->fOp == BuilderOp::exchange_src) {

            // ... both ops can be eliminated. A double-swap is a no-op.

            fInstructions.pop_back();

            return;

        }

    }


    this->appendInstruction(BuilderOp::exchange_src, {});

}


void Builder::pop_src_rgba() {

    if (Instruction* lastInstruction = this->lastInstruction()) {

        // If the previous op is exchanging src.rgba with the stack...

        if (lastInstruction->fOp == BuilderOp::exchange_src) {

            // ... both ops can be eliminated. It's just sliding the color back and forth.

            fInstructions.pop_back();

            this->discard_stack(4);

            return;

        }

    }


    this->appendInstruction(BuilderOp::pop_src_rgba, {});

}


void Builder::copy_stack_to_slots(SlotRange dst, int offsetFromStackTop) {

    // If the execution mask is known to be all-true, then we can ignore the write mask.

    if (!this->executionMaskWritesAreEnabled()) {

        this->copy_stack_to_slots_unmasked(dst, offsetFromStackTop);

        return;

    }


    // If the last instruction copied the previous stack slots, just extend it.

    if (Instruction* lastInstruction = this->lastInstruction()) {

        // If the last op is copy-stack-to-slots...

        if (lastInstruction->fOp == BuilderOp::copy_stack_to_slots &&

            // and this op's destination is immediately after the last copy-slots-op's destination

            lastInstruction->fSlotA + lastInstruction->fImmA == dst.index &&

            // and this op's source is immediately after the last copy-slots-op's source

            lastInstruction->fImmB - lastInstruction->fImmA == offsetFromStackTop) {

            // then we can just extend the copy!

            lastInstruction->fImmA += dst.count;

            return;

        }

    }


    this->appendInstruction(BuilderOp::copy_stack_to_slots, {dst.index},

                            dst.count, offsetFromStackTop);

}


void Builder::copy_stack_to_slots_indirect(SlotRange fixedRange,

                                           int dynamicStackID,

                                           SlotRange limitRange) {

    // SlotA: fixed-range start

    // SlotB: limit-range end

    // immA: number of slots

    // immB: dynamic stack ID

    this->appendInstruction(BuilderOp::copy_stack_to_slots_indirect,

                            {fixedRange.index, limitRange.index + limitRange.count},

                            fixedRange.count,

                            dynamicStackID);

}


static bool slot_ranges_overlap(SlotRange x, SlotRange y) {

    return x.index < y.index + y.count &&

           y.index < x.index + x.count;

}


void Builder::copy_constant(Slot slot, int constantValue) {

    // If the last instruction copied the same constant, just extend it.

    if (Instruction* lastInstr = this->lastInstruction()) {

        // If the last op is copy-constant...

        if (lastInstr->fOp == BuilderOp::copy_constant &&

            // ... and has the same value...

            lastInstr->fImmB == constantValue &&

            // ... and the slot is immediately after the last copy-constant's destination...

            lastInstr->fSlotA + lastInstr->fImmA == slot) {

            // ... then we can extend the copy!

            lastInstr->fImmA += 1;

            return;

        }

    }


    this->appendInstruction(BuilderOp::copy_constant, {slot}, 1, constantValue);

}


void Builder::copy_slots_unmasked(SlotRange dst, SlotRange src) {

    // If the last instruction copied adjacent slots, just extend it.

    if (Instruction* lastInstr = this->lastInstruction()) {

        // If the last op is a match...

        if (lastInstr->fOp == BuilderOp::copy_slot_unmasked &&

            // and this op's destination is immediately after the last copy-slots-op's destination

            lastInstr->fSlotA + lastInstr->fImmA == dst.index &&

            // and this op's source is immediately after the last copy-slots-op's source

            lastInstr->fSlotB + lastInstr->fImmA == src.index &&

            // and the source/dest ranges will not overlap

            !slot_ranges_overlap({lastInstr->fSlotB, lastInstr->fImmA + dst.count},

                                 {lastInstr->fSlotA, lastInstr->fImmA + dst.count})) {

            // then we can just extend the copy!

            lastInstr->fImmA += dst.count;

            return;

        }

    }


    SkASSERT(dst.count == src.count);

    this->appendInstruction(BuilderOp::copy_slot_unmasked, {dst.index, src.index}, dst.count);

}


void Builder::copy_immutable_unmasked(SlotRange dst, SlotRange src) {

    // If the last instruction copied adjacent immutable data, just extend it.

    if (Instruction* lastInstr = this->lastInstruction()) {

        // If the last op is a match...

        if (lastInstr->fOp == BuilderOp::copy_immutable_unmasked &&

            // and this op's destination is immediately after the last copy-slots-op's destination

            lastInstr->fSlotA + lastInstr->fImmA == dst.index &&

            // and this op's source is immediately after the last copy-slots-op's source

            lastInstr->fSlotB + lastInstr->fImmA == src.index) {

            // then we can just extend the copy!

            lastInstr->fImmA += dst.count;

            return;

        }

    }


    SkASSERT(dst.count == src.count);

    this->appendInstruction(BuilderOp::copy_immutable_unmasked, {dst.index, src.index}, dst.count);

}


void Builder::copy_uniform_to_slots_unmasked(SlotRange dst, SlotRange src) {

    // If the last instruction copied adjacent uniforms, just extend it.

    if (Instruction* lastInstr = this->lastInstruction()) {

        // If the last op is copy-constant...

        if (lastInstr->fOp == BuilderOp::copy_uniform_to_slots_unmasked &&

            // and this op's destination is immediately after the last copy-constant's destination

            lastInstr->fSlotB + lastInstr->fImmA == dst.index &&

            // and this op's source is immediately after the last copy-constant's source

            lastInstr->fSlotA + lastInstr->fImmA == src.index) {

            // then we can just extend the copy!

            lastInstr->fImmA += dst.count;

            return;

        }

    }


    SkASSERT(dst.count == src.count);

    this->appendInstruction(BuilderOp::copy_uniform_to_slots_unmasked, {src.index, dst.index},

                            dst.count);

}


void Builder::copy_stack_to_slots_unmasked(SlotRange dst, int offsetFromStackTop) {

    // If the last instruction copied the previous stack slots, just extend it.

    if (Instruction* lastInstr = this->lastInstruction()) {

        // If the last op is copy-stack-to-slots-unmasked...

        if (lastInstr->fOp == BuilderOp::copy_stack_to_slots_unmasked &&

            // and this op's destination is immediately after the last copy-slots-op's destination

            lastInstr->fSlotA + lastInstr->fImmA == dst.index &&

            // and this op's source is immediately after the last copy-slots-op's source

            lastInstr->fImmB - lastInstr->fImmA == offsetFromStackTop) {

            // then we can just extend the copy!

            lastInstr->fImmA += dst.count;

            return;

        }

    }


    this->appendInstruction(BuilderOp::copy_stack_to_slots_unmasked, {dst.index},

                            dst.count, offsetFromStackTop);

}


void Builder::pop_return_mask() {

    SkASSERT(this->executionMaskWritesAreEnabled());


    // This instruction is going to overwrite the return mask. If the previous instruction was

    // masking off the return mask, that's wasted work and it can be eliminated.

    if (Instruction* lastInstruction = this->lastInstructionOnAnyStack()) {

        if (lastInstruction->fOp == BuilderOp::mask_off_return_mask) {

            fInstructions.pop_back();

        }

    }


    this->appendInstruction(BuilderOp::pop_return_mask, {});

}


void Builder::push_condition_mask() {

    SkASSERT(this->executionMaskWritesAreEnabled());


    // If the previous instruction is popping the condition mask, we can restore it onto the stack

    // "for free" instead of copying it.

    if (Instruction* lastInstruction = this->lastInstruction()) {

        if (lastInstruction->fOp == BuilderOp::pop_condition_mask) {

            this->pad_stack(1);

            return;

        }

    }

    this->appendInstruction(BuilderOp::push_condition_mask, {});

}


void Builder::merge_condition_mask() {

    SkASSERT(this->executionMaskWritesAreEnabled());


    // This instruction is going to overwrite the condition mask. If the previous instruction was

    // loading the condition mask, that's wasted work and it can be eliminated.

    if (Instruction* lastInstruction = this->lastInstructionOnAnyStack()) {

        if (lastInstruction->fOp == BuilderOp::pop_condition_mask) {

            int stackID = lastInstruction->fStackID;

            fInstructions.pop_back();

            this->discard_stack(/*count=*/1, stackID);

        }

    }


    this->appendInstruction(BuilderOp::merge_condition_mask, {});

}


void Builder::zero_slots_unmasked(SlotRange dst) {

    if (Instruction* lastInstruction = this->lastInstruction()) {

        if (lastInstruction->fOp == BuilderOp::copy_constant && lastInstruction->fImmB == 0) {

            if (lastInstruction->fSlotA + lastInstruction->fImmA == dst.index) {

                // The previous instruction was zeroing the range immediately before this range.

                // Combine the ranges.

                lastInstruction->fImmA += dst.count;

                return;

            }


            if (lastInstruction->fSlotA == dst.index + dst.count) {

                // The previous instruction was zeroing the range immediately after this range.

                // Combine the ranges.

                lastInstruction->fSlotA = dst.index;

                lastInstruction->fImmA += dst.count;

                return;

            }

        }

    }


    this->appendInstruction(BuilderOp::copy_constant, {dst.index}, dst.count, 0);

}


static int pack_nybbles(SkSpan<const int8_t> components) {

    // Pack up to 8 elements into nybbles, in reverse order.

    int packed = 0;

    for (auto iter = components.rbegin(); iter != components.rend(); ++iter) {

        SkASSERT(*iter >= 0 && *iter <= 0xF);

        packed <<= 4;

        packed |= *iter;

    }

    return packed;

}


template <typename T>

static void unpack_nybbles_to_offsets(uint32_t components, SkSpan<T> offsets) {

    // Unpack component nybbles into byte-offsets pointing at stack slots.

    for (size_t index = 0; index < offsets.size(); ++index) {

        offsets[index] = (components & 0xF) * SkOpts::raster_pipeline_highp_stride * sizeof(float);

        components >>= 4;

    }

}


static int max_packed_nybble(uint32_t components, size_t numComponents) {

    int largest = 0;

    for (size_t index = 0; index < numComponents; ++index) {

        largest = std::max<int>(largest, components & 0xF);

        components >>= 4;

    }

    return largest;

}


void Builder::swizzle_copy_stack_to_slots(SlotRange dst,

                                          SkSpan<const int8_t> components,

                                          int offsetFromStackTop) {

    // When the execution-mask writes-enabled flag is off, we could squeeze out a little bit of

    // extra speed here by implementing and using an unmasked version of this op.


    // SlotA: fixed-range start

    // immA: number of swizzle components

    // immB: swizzle components

    // immC: offset from stack top

    this->appendInstruction(BuilderOp::swizzle_copy_stack_to_slots, {dst.index},

                            (int)components.size(),

                            pack_nybbles(components),

                            offsetFromStackTop);

}


void Builder::swizzle_copy_stack_to_slots_indirect(SlotRange fixedRange,

                                                   int dynamicStackID,

                                                   SlotRange limitRange,

                                                   SkSpan<const int8_t> components,

                                                   int offsetFromStackTop) {

    // When the execution-mask writes-enabled flag is off, we could squeeze out a little bit of

    // extra speed here by implementing and using an unmasked version of this op.


    // SlotA: fixed-range start

    // SlotB: limit-range end

    // immA: number of swizzle components

    // immB: swizzle components

    // immC: offset from stack top

    // immD: dynamic stack ID

    this->appendInstruction(BuilderOp::swizzle_copy_stack_to_slots_indirect,

                            {fixedRange.index, limitRange.index + limitRange.count},

                            (int)components.size(),

                            pack_nybbles(components),

                            offsetFromStackTop,

                            dynamicStackID);

}


void Builder::swizzle(int consumedSlots, SkSpan<const int8_t> components) {

    // Consumes `consumedSlots` elements on the stack, then generates `elementSpan.size()` elements.

    SkASSERT(consumedSlots >= 0);


    // We only allow up to 16 elements, and they can only reach 0-15 slots, due to nybble packing.

    int numElements = components.size();

    SkASSERT(numElements <= 16);

    SkASSERT(std::all_of(components.begin(), components.end(), [](int8_t e){ return e >= 0; }));

    SkASSERT(std::all_of(components.begin(), components.end(), [](int8_t e){ return e <= 0xF; }));


    // Make a local copy of the element array.

    int8_t elements[16] = {};

    std::copy(components.begin(), components.end(), std::begin(elements));


    while (numElements > 0) {

        // If the first element of the swizzle is zero...

        if (elements[0] != 0) {

            break;

        }

        // ...and zero isn't used elsewhere in the swizzle...

        if (std::any_of(&elements[1], &elements[numElements], [](int8_t e) { return e == 0; })) {

            break;

        }

        // We can omit the first slot from the swizzle entirely.

        // Slide everything forward by one slot, and reduce the element index by one.

        for (int index = 1; index < numElements; ++index) {

            elements[index - 1] = elements[index] - 1;

        }

        elements[numElements - 1] = 0;

        --consumedSlots;

        --numElements;

    }


    // A completely empty swizzle is a discard.

    if (numElements == 0) {

        this->discard_stack(consumedSlots);

        return;

    }


    if (consumedSlots <= 4 && numElements <= 4) {

        // We can fit everything into a little swizzle.

        int op = (int)BuilderOp::swizzle_1 + numElements - 1;

        this->appendInstruction((BuilderOp)op, {}, consumedSlots,

                                pack_nybbles(SkSpan(elements, numElements)));

        return;

    }


    // This is a big swizzle. We use the `shuffle` op to handle these. immA counts the consumed

    // slots. immB counts the generated slots. immC and immD hold packed-nybble shuffle values.

    this->appendInstruction(BuilderOp::shuffle, {},

                            consumedSlots, numElements,

                            pack_nybbles(SkSpan(&elements[0], 8)),

                            pack_nybbles(SkSpan(&elements[8], 8)));

}


void Builder::transpose(int columns, int rows) {

    // Transposes a matrix of size CxR on the stack (into a matrix of size RxC).

    int8_t elements[16] = {};

    size_t index = 0;

    for (int r = 0; r < rows; ++r) {

        for (int c = 0; c < columns; ++c) {

            elements[index++] = (c * rows) + r;

        }

    }

    this->swizzle(/*consumedSlots=*/columns * rows, SkSpan(elements, index));

}


void Builder::diagonal_matrix(int columns, int rows) {

    // Generates a CxR diagonal matrix from the top two scalars on the stack.

    int8_t elements[16] = {};

    size_t index = 0;

    for (int c = 0; c < columns; ++c) {

        for (int r = 0; r < rows; ++r) {

            elements[index++] = (c == r) ? 1 : 0;

        }

    }

    this->swizzle(/*consumedSlots=*/2, SkSpan(elements, index));

}


void Builder::matrix_resize(int origColumns, int origRows, int newColumns, int newRows) {

    // Resizes a CxR matrix at the top of the stack to C'xR'.

    int8_t elements[16] = {};

    size_t index = 0;


    size_t consumedSlots = origColumns * origRows;

    size_t zeroOffset = 0, oneOffset = 0;


    for (int c = 0; c < newColumns; ++c) {

        for (int r = 0; r < newRows; ++r) {

            if (c < origColumns && r < origRows) {

                // Push an element from the original matrix.

                elements[index++] = (c * origRows) + r;

            } else {

                // This element is outside the original matrix; push 1 or 0.

                if (c == r) {

                    // We need to synthesize a literal 1.

                    if (oneOffset == 0) {

                        this->push_constant_f(1.0f);

                        oneOffset = consumedSlots++;

                    }

                    elements[index++] = oneOffset;

                } else {

                    // We need to synthesize a literal 0.

                    if (zeroOffset == 0) {

                        this->push_constant_f(0.0f);

                        zeroOffset = consumedSlots++;

                    }

                    elements[index++] = zeroOffset;

                }

            }

        }

    }

    this->swizzle(consumedSlots, SkSpan(elements, index));

}


void Builder::matrix_multiply(int leftColumns, int leftRows, int rightColumns, int rightRows) {

    BuilderOp op;

    switch (leftColumns) {

        case 2:  op = BuilderOp::matrix_multiply_2; break;

        case 3:  op = BuilderOp::matrix_multiply_3; break;

        case 4:  op = BuilderOp::matrix_multiply_4; break;

        default: SkDEBUGFAIL("unsupported matrix dimensions"); return;

    }


    this->appendInstruction(op, {}, leftColumns, leftRows, rightColumns, rightRows);

}


std::unique_ptr<Program> Builder::finish(int numValueSlots,

                                         int numUniformSlots,

                                         int numImmutableSlots,

                                         DebugTracePriv* debugTrace) {

    // Verify that calls to enableExecutionMaskWrites and disableExecutionMaskWrites are balanced.

    SkASSERT(fExecutionMaskWritesEnabled == 0);


    return std::make_unique<Program>(std::move(fInstructions), numValueSlots, numUniformSlots,

                                     numImmutableSlots, fNumLabels, debugTrace);

}


void Program::optimize() {

    // TODO(johnstiles): perform any last-minute cleanup of the instruction stream here

}


static int stack_usage(const Instruction& inst) {

    switch (inst.fOp) {

        case BuilderOp::push_condition_mask:

        case BuilderOp::push_loop_mask:

        case BuilderOp::push_return_mask:

            return 1;


        case BuilderOp::push_src_rgba:

        case BuilderOp::push_dst_rgba:

        case BuilderOp::push_device_xy01:

            return 4;


        case BuilderOp::push_immutable:

        case BuilderOp::push_immutable_indirect:

        case BuilderOp::push_constant:

        case BuilderOp::push_slots:

        case BuilderOp::push_slots_indirect:

        case BuilderOp::push_uniform:

        case BuilderOp::push_uniform_indirect:

        case BuilderOp::push_clone:

        case BuilderOp::push_clone_from_stack:

        case BuilderOp::push_clone_indirect_from_stack:

        case BuilderOp::pad_stack:

            return inst.fImmA;


        case BuilderOp::pop_condition_mask:

        case BuilderOp::pop_loop_mask:

        case BuilderOp::pop_and_reenable_loop_mask:

        case BuilderOp::pop_return_mask:

            return -1;


        case BuilderOp::pop_src_rgba:

        case BuilderOp::pop_dst_rgba:

            return -4;


        case ALL_N_WAY_BINARY_OP_CASES:

        case ALL_MULTI_SLOT_BINARY_OP_CASES:

        case BuilderOp::discard_stack:

        case BuilderOp::select:

            return -inst.fImmA;


        case ALL_N_WAY_TERNARY_OP_CASES:

        case ALL_MULTI_SLOT_TERNARY_OP_CASES:

            return 2 * -inst.fImmA;


        case BuilderOp::swizzle_1:

            return 1 - inst.fImmA;  // consumes immA slots and emits a scalar

        case BuilderOp::swizzle_2:

            return 2 - inst.fImmA;  // consumes immA slots and emits a 2-slot vector

        case BuilderOp::swizzle_3:

            return 3 - inst.fImmA;  // consumes immA slots and emits a 3-slot vector

        case BuilderOp::swizzle_4:

            return 4 - inst.fImmA;  // consumes immA slots and emits a 4-slot vector


        case BuilderOp::dot_2_floats:

            return -3;  // consumes two 2-slot vectors and emits one scalar

        case BuilderOp::dot_3_floats:

            return -5;  // consumes two 3-slot vectors and emits one scalar

        case BuilderOp::dot_4_floats:

            return -7;  // consumes two 4-slot vectors and emits one scalar


        case BuilderOp::refract_4_floats:

            return -5;  // consumes nine slots (N + I + eta) and emits a 4-slot vector (R)


        case BuilderOp::matrix_multiply_2:

        case BuilderOp::matrix_multiply_3:

        case BuilderOp::matrix_multiply_4:

            // consumes the left- and right-matrices; emits result over existing padding slots

            return -(inst.fImmA * inst.fImmB + inst.fImmC * inst.fImmD);


        case BuilderOp::shuffle: {

            int consumed = inst.fImmA;

            int generated = inst.fImmB;

            return generated - consumed;

        }

        case ALL_SINGLE_SLOT_UNARY_OP_CASES:

        case ALL_MULTI_SLOT_UNARY_OP_CASES:

        case ALL_IMMEDIATE_BINARY_OP_CASES:

        default:

            return 0;

    }

}


Program::StackDepths Program::tempStackMaxDepths() const {

    // Count the number of separate temp stacks that the program uses.

    int numStacks = 1;

    for (const Instruction& inst : fInstructions) {

        numStacks = std::max(numStacks, inst.fStackID + 1);

    }


    // Walk the program and calculate how deep each stack can potentially get.

    StackDepths largest, current;

    largest.push_back_n(numStacks, 0);

    current.push_back_n(numStacks, 0);


    for (const Instruction& inst : fInstructions) {

        int stackID = inst.fStackID;

        current[stackID] += stack_usage(inst);

        largest[stackID] = std::max(current[stackID], largest[stackID]);

        // If we assert here, the generated program has popped off the top of the stack.

        SkASSERTF(current[stackID] >= 0, "unbalanced temp stack push/pop on stack %d", stackID);

    }


    // Ensure that when the program is complete, our stacks are fully balanced.

    for (int stackID = 0; stackID < numStacks; ++stackID) {

        // If we assert here, the generated program has pushed more data than it has popped.

        SkASSERTF(current[stackID] == 0, "unbalanced temp stack push/pop on stack %d", stackID);

    }


    return largest;

}


Program::Program(TArray<Instruction> instrs,

                 int numValueSlots,

                 int numUniformSlots,

                 int numImmutableSlots,

                 int numLabels,

                 DebugTracePriv* debugTrace)

        : fInstructions(std::move(instrs))

        , fNumValueSlots(numValueSlots)

        , fNumUniformSlots(numUniformSlots)

        , fNumImmutableSlots(numImmutableSlots)

        , fNumLabels(numLabels)

        , fDebugTrace(debugTrace) {

    this->optimize();


    fTempStackMaxDepths = this->tempStackMaxDepths();


    fNumTempStackSlots = 0;

    for (const int depth : fTempStackMaxDepths) {

        fNumTempStackSlots += depth;

    }


    if (fDebugTrace) {

        fTraceHook = SkSL::Tracer::Make(&fDebugTrace->fTraceInfo);

    }

}


Program::~Program() = default;


static bool immutable_data_is_splattable(int32_t* immutablePtr, int numSlots) {

    // If every value between `immutablePtr[0]` and `immutablePtr[numSlots]` is bit-identical, we

    // can use a splat.

    for (int index = 1; index < numSlots; ++index) {

        if (immutablePtr[0] != immutablePtr[index]) {

            return false;

        }

    }

    return true;

}


void Program::appendCopy(TArray<Stage>* pipeline,

                         SkArenaAlloc* alloc,

                         std::byte* basePtr,  // only used for immutable-value copies

                         ProgramOp baseStage,

                         SkRPOffset dst, int dstStride,

                         SkRPOffset src, int srcStride,

                         int numSlots) const {

    SkASSERT(numSlots >= 0);

    while (numSlots > 4) {

        // If we are appending a large copy, split it up into groups of four at a time.

        this->appendCopy(pipeline, alloc, basePtr,

                         baseStage,

                         dst, dstStride,

                         src, srcStride,

                         /*numSlots=*/4);

        dst += 4 * dstStride * sizeof(float);

        src += 4 * srcStride * sizeof(float);

        numSlots -= 4;

    }


    SkASSERT(numSlots <= 4);


    if (numSlots > 0) {

        // If we are copying immutable data, it might be representable by a splat; this is

        // preferable, since splats are a tiny bit faster than regular copies.

        if (basePtr) {

            SkASSERT(srcStride == 1);

            int32_t* immutablePtr = reinterpret_cast<int32_t*>(basePtr + src);

            if (immutable_data_is_splattable(immutablePtr, numSlots)) {

                auto stage = (ProgramOp)((int)ProgramOp::copy_constant + numSlots - 1);

                SkRasterPipeline_ConstantCtx ctx;

                ctx.dst = dst;

                ctx.value = *immutablePtr;

                pipeline->push_back({stage, SkRPCtxUtils::Pack(ctx, alloc)});

                return;

            }

        }


        // We can't use a splat, so emit the requested copy op.

        auto stage = (ProgramOp)((int)baseStage + numSlots - 1);

        SkRasterPipeline_BinaryOpCtx ctx;

        ctx.dst = dst;

        ctx.src = src;

        pipeline->push_back({stage, SkRPCtxUtils::Pack(ctx, alloc)});

    }

}


void Program::appendCopySlotsUnmasked(TArray<Stage>* pipeline,

                                      SkArenaAlloc* alloc,

                                      SkRPOffset dst,

                                      SkRPOffset src,

                                      int numSlots) const {

    this->appendCopy(pipeline, alloc, /*basePtr=*/nullptr,

                     ProgramOp::copy_slot_unmasked,

                     dst, SkOpts::raster_pipeline_highp_stride,

                     src, SkOpts::raster_pipeline_highp_stride,

                     numSlots);

}


void Program::appendCopyImmutableUnmasked(TArray<Stage>* pipeline,

                                          SkArenaAlloc* alloc,

                                          std::byte* basePtr,

                                          SkRPOffset dst,

                                          SkRPOffset src,

                                          int numSlots) const {

    this->appendCopy(pipeline, alloc, basePtr,

                     ProgramOp::copy_immutable_unmasked,

                     dst, SkOpts::raster_pipeline_highp_stride,

                     src, 1,

                     numSlots);

}


void Program::appendCopySlotsMasked(TArray<Stage>* pipeline,

                                    SkArenaAlloc* alloc,

                                    SkRPOffset dst,

                                    SkRPOffset src,

                                    int numSlots) const {

    this->appendCopy(pipeline, alloc, /*basePtr=*/nullptr,

                     ProgramOp::copy_slot_masked,

                     dst, SkOpts::raster_pipeline_highp_stride,

                     src, SkOpts::raster_pipeline_highp_stride,

                     numSlots);

}


void Program::appendSingleSlotUnaryOp(TArray<Stage>* pipeline, ProgramOp stage,

                                      float* dst, int numSlots) const {

    SkASSERT(numSlots >= 0);

    while (numSlots--) {

        pipeline->push_back({stage, dst});

        dst += SkOpts::raster_pipeline_highp_stride;

    }

}


void Program::appendMultiSlotUnaryOp(TArray<Stage>* pipeline, ProgramOp baseStage,

                                     float* dst, int numSlots) const {

    SkASSERT(numSlots >= 0);

    while (numSlots > 0) {

        int currentSlots = std::min(numSlots, 4);

        auto stage = (ProgramOp)((int)baseStage + currentSlots - 1);

        pipeline->push_back({stage, dst});


        dst += 4 * SkOpts::raster_pipeline_highp_stride;

        numSlots -= 4;

    }

}


void Program::appendImmediateBinaryOp(TArray<Stage>* pipeline, SkArenaAlloc* alloc,

                                      ProgramOp baseStage,

                                      SkRPOffset dst, int32_t value, int numSlots) const {

    SkASSERT(is_immediate_op((BuilderOp)baseStage));

    int slotsPerStage = is_multi_slot_immediate_op((BuilderOp)baseStage) ? 4 : 1;


    SkRasterPipeline_ConstantCtx ctx;

    ctx.dst = dst;

    ctx.value = value;


    SkASSERT(numSlots >= 0);

    while (numSlots > 0) {

        int currentSlots = std::min(numSlots, slotsPerStage);

        auto stage = (ProgramOp)((int)baseStage - (currentSlots - 1));

        pipeline->push_back({stage, SkRPCtxUtils::Pack(ctx, alloc)});


        ctx.dst += slotsPerStage * SkOpts::raster_pipeline_highp_stride * sizeof(float);

        numSlots -= slotsPerStage;

    }

}


void Program::appendAdjacentNWayBinaryOp(TArray<Stage>* pipeline, SkArenaAlloc* alloc,

                                         ProgramOp stage,

                                         SkRPOffset dst, SkRPOffset src, int numSlots) const {

    // The source and destination must be directly next to one another.

    SkASSERT(numSlots >= 0);

    SkASSERT((dst + SkOpts::raster_pipeline_highp_stride * numSlots * sizeof(float)) == src);


    if (numSlots > 0) {

        SkRasterPipeline_BinaryOpCtx ctx;

        ctx.dst = dst;

        ctx.src = src;

        pipeline->push_back({stage, SkRPCtxUtils::Pack(ctx, alloc)});

    }

}


void Program::appendAdjacentMultiSlotBinaryOp(TArray<Stage>* pipeline, SkArenaAlloc* alloc,

                                              ProgramOp baseStage, std::byte* basePtr,

                                              SkRPOffset dst, SkRPOffset src, int numSlots) const {

    // The source and destination must be directly next to one another.

    SkASSERT(numSlots >= 0);

    SkASSERT((dst + SkOpts::raster_pipeline_highp_stride * numSlots * sizeof(float)) == src);


    if (numSlots > 4) {

        this->appendAdjacentNWayBinaryOp(pipeline, alloc, baseStage, dst, src, numSlots);

        return;

    }

    if (numSlots > 0) {

        auto specializedStage = (ProgramOp)((int)baseStage + numSlots);

        pipeline->push_back({specializedStage, basePtr + dst});

    }

}


void Program::appendAdjacentNWayTernaryOp(TArray<Stage>* pipeline, SkArenaAlloc* alloc,

                                          ProgramOp stage, std::byte* basePtr, SkRPOffset dst,

                                          SkRPOffset src0, SkRPOffset src1, int numSlots) const {

    // The float pointers must all be immediately adjacent to each other.

    SkASSERT(numSlots >= 0);

    SkASSERT((dst  + SkOpts::raster_pipeline_highp_stride * numSlots * sizeof(float)) == src0);

    SkASSERT((src0 + SkOpts::raster_pipeline_highp_stride * numSlots * sizeof(float)) == src1);


    if (numSlots > 0) {

        SkRasterPipeline_TernaryOpCtx ctx;

        ctx.dst = dst;

        ctx.delta = src0 - dst;

        pipeline->push_back({stage, SkRPCtxUtils::Pack(ctx, alloc)});

    }

}


void Program::appendAdjacentMultiSlotTernaryOp(TArray<Stage>* pipeline, SkArenaAlloc* alloc,

                                               ProgramOp baseStage, std::byte* basePtr,

                                               SkRPOffset dst, SkRPOffset src0, SkRPOffset src1,

                                               int numSlots) const {

    // The float pointers must all be immediately adjacent to each other.

    SkASSERT(numSlots >= 0);

    SkASSERT((dst  + SkOpts::raster_pipeline_highp_stride * numSlots * sizeof(float)) == src0);

    SkASSERT((src0 + SkOpts::raster_pipeline_highp_stride * numSlots * sizeof(float)) == src1);


    if (numSlots > 4) {

        this->appendAdjacentNWayTernaryOp(pipeline, alloc, baseStage, basePtr,

                                          dst, src0, src1, numSlots);

        return;

    }

    if (numSlots > 0) {

        auto specializedStage = (ProgramOp)((int)baseStage + numSlots);

        pipeline->push_back({specializedStage, basePtr + dst});

    }

}


void Program::appendStackRewind(TArray<Stage>* pipeline) const {

#if defined(SKSL_STANDALONE) || !SK_HAS_MUSTTAIL

    pipeline->push_back({ProgramOp::stack_rewind, nullptr});

#endif

}


static void* context_bit_pun(intptr_t val) {

    return sk_bit_cast<void*>(val);

}


Program::SlotData Program::allocateSlotData(SkArenaAlloc* alloc) const {

    // Allocate a contiguous slab of slot data for immutables, values, and stack entries.

    const int N = SkOpts::raster_pipeline_highp_stride;

    const int scalarWidth = 1 * sizeof(float);

    const int vectorWidth = N * sizeof(float);

    const int allocSize = vectorWidth * (fNumValueSlots + fNumTempStackSlots) +

                          scalarWidth * fNumImmutableSlots;

    float* slotPtr = static_cast<float*>(alloc->makeBytesAlignedTo(allocSize, vectorWidth));

    sk_bzero(slotPtr, allocSize);


    // Store the temp stack immediately after the values, and immutable data after the stack.

    SlotData s;

    s.values    = SkSpan{slotPtr,        N * fNumValueSlots};

    s.stack     = SkSpan{s.values.end(), N * fNumTempStackSlots};

    s.immutable = SkSpan{s.stack.end(),  1 * fNumImmutableSlots};

    return s;

}


bool Program::appendStages(SkRasterPipeline* pipeline,

                           SkArenaAlloc* alloc,

                           RP::Callbacks* callbacks,

                           SkSpan<const float> uniforms) const {

#if defined(SKSL_STANDALONE)

    return false;

#else

    // Convert our Instruction list to an array of ProgramOps.

    TArray<Stage> stages;

    SlotData slotData = this->allocateSlotData(alloc);

    this->makeStages(&stages, alloc, uniforms, slotData);


    // Allocate buffers for branch targets and labels; these are needed to convert labels into

    // actual offsets into the pipeline and fix up branches.

    TArray<SkRasterPipeline_BranchCtx*> branchContexts;

    branchContexts.reserve_exact(fNumLabels);

    TArray<int> labelOffsets;

    labelOffsets.push_back_n(fNumLabels, -1);

    TArray<int> branchGoesToLabel;

    branchGoesToLabel.reserve_exact(fNumLabels);


    auto resetBasePointer = [&]() {

        // Whenever we hand off control to another shader, we have to assume that it might overwrite

        // the base pointer (if it uses SkSL, it will!), so we reset it on return.

        pipeline->append(SkRasterPipelineOp::set_base_pointer, slotData.values.data());

    };


    resetBasePointer();


    for (const Stage& stage : stages) {

        switch (stage.op) {

            case ProgramOp::stack_rewind:

                pipeline->appendStackRewind();

                break;


            case ProgramOp::invoke_shader:

                if (!callbacks || !callbacks->appendShader(sk_bit_cast<intptr_t>(stage.ctx))) {

                    return false;

                }

                resetBasePointer();

                break;


            case ProgramOp::invoke_color_filter:

                if (!callbacks || !callbacks->appendColorFilter(sk_bit_cast<intptr_t>(stage.ctx))) {

                    return false;

                }

                resetBasePointer();

                break;


            case ProgramOp::invoke_blender:

                if (!callbacks || !callbacks->appendBlender(sk_bit_cast<intptr_t>(stage.ctx))) {

                    return false;

                }

                resetBasePointer();

                break;


            case ProgramOp::invoke_to_linear_srgb:

                if (!callbacks) {

                    return false;

                }

                callbacks->toLinearSrgb(stage.ctx);

                // A ColorSpaceXform shouldn't ever alter the base pointer, so we don't need to call

                // resetBasePointer here.

                break;


            case ProgramOp::invoke_from_linear_srgb:

                if (!callbacks) {

                    return false;

                }

                callbacks->fromLinearSrgb(stage.ctx);

                // A ColorSpaceXform shouldn't ever alter the base pointer, so we don't need to call

                // resetBasePointer here.

                break;


            case ProgramOp::label: {

                // Remember the absolute pipeline position of this label.

                int labelID = sk_bit_cast<intptr_t>(stage.ctx);

                SkASSERT(labelID >= 0 && labelID < fNumLabels);

                labelOffsets[labelID] = pipeline->getNumStages();

                break;

            }

            case ProgramOp::jump:

            case ProgramOp::branch_if_all_lanes_active:

            case ProgramOp::branch_if_any_lanes_active:

            case ProgramOp::branch_if_no_lanes_active:

            case ProgramOp::branch_if_no_active_lanes_eq: {

                // The branch context contain a valid label ID at this point.

                auto* branchCtx = static_cast<SkRasterPipeline_BranchCtx*>(stage.ctx);

                int labelID = branchCtx->offset;

                SkASSERT(labelID >= 0 && labelID < fNumLabels);


                // Replace the label ID in the branch context with the absolute pipeline position.

                // We will go back over the branch targets at the end and fix them up.

                branchCtx->offset = pipeline->getNumStages();


                SkASSERT(branchContexts.size() == branchGoesToLabel.size());

                branchContexts.push_back(branchCtx);

                branchGoesToLabel.push_back(labelID);

                [[fallthrough]];

            }

            default:

                // Append a regular op to the program.

                SkASSERT((int)stage.op < kNumRasterPipelineHighpOps);

                pipeline->append((SkRasterPipelineOp)stage.op, stage.ctx);

                break;

        }

    }


    // Now that we have assembled the program and know the pipeline positions of each label and

    // branch, fix up every branch target.

    SkASSERT(branchContexts.size() == branchGoesToLabel.size());

    for (int index = 0; index < branchContexts.size(); ++index) {

        int branchFromIdx = branchContexts[index]->offset;

        int branchToIdx = labelOffsets[branchGoesToLabel[index]];

        branchContexts[index]->offset = branchToIdx - branchFromIdx;

    }


    return true;

#endif

}


void Program::makeStages(TArray<Stage>* pipeline,

                         SkArenaAlloc* alloc,

                         SkSpan<const float> uniforms,

                         const SlotData& slots) const {

    SkASSERT(fNumUniformSlots == SkToInt(uniforms.size()));


    const int N = SkOpts::raster_pipeline_highp_stride;

    int mostRecentRewind = 0;


    // Assemble a map holding the current stack-top for each temporary stack. Position each temp

    // stack immediately after the previous temp stack; temp stacks are never allowed to overlap.

    int pos = 0;

    TArray<float*> tempStackMap;

    tempStackMap.resize(fTempStackMaxDepths.size());

    for (int idx = 0; idx < fTempStackMaxDepths.size(); ++idx) {

        tempStackMap[idx] = slots.stack.begin() + (pos * N);

        pos += fTempStackMaxDepths[idx];

    }


    // Track labels that we have reached in processing.

    SkBitSet labelsEncountered(fNumLabels);


    auto EmitStackRewindForBackwardsBranch = [&](int labelID) {

        // If we have already encountered the label associated with this branch, this is a

        // backwards branch. Add a stack-rewind immediately before the branch to ensure that

        // long-running loops don't use an unbounded amount of stack space.

        if (labelsEncountered.test(labelID)) {

            this->appendStackRewind(pipeline);

            mostRecentRewind = pipeline->size();

        }

    };


    auto* const basePtr = (std::byte*)slots.values.data();

    auto OffsetFromBase = [&](const void* ptr) -> SkRPOffset {

        return (SkRPOffset)((const std::byte*)ptr - basePtr);

    };


    // Copy all immutable values into the immutable slots.

    for (const Instruction& inst : fInstructions) {

        if (inst.fOp == BuilderOp::store_immutable_value) {

            slots.immutable[inst.fSlotA] = sk_bit_cast<float>(inst.fImmA);

        }

    }


    // Write each BuilderOp to the pipeline array.

    pipeline->reserve_exact(pipeline->size() + fInstructions.size());

    for (const Instruction& inst : fInstructions) {

        auto ImmutableA = [&]() { return &slots.immutable[1 * inst.fSlotA]; };

        auto ImmutableB = [&]() { return &slots.immutable[1 * inst.fSlotB]; };

        auto SlotA      = [&]() { return &slots.values[N * inst.fSlotA]; };

        auto SlotB      = [&]() { return &slots.values[N * inst.fSlotB]; };

        auto UniformA   = [&]() { return &uniforms[inst.fSlotA]; };

        auto AllocTraceContext = [&](auto* ctx) {

            // We pass `ctx` solely for its type; the value is unused.

            using ContextType = typename std::remove_reference<decltype(*ctx)>::type;

            ctx = alloc->make<ContextType>();

            ctx->traceMask = reinterpret_cast<int*>(tempStackMap[inst.fImmA] - N);

            ctx->traceHook = fTraceHook.get();

            return ctx;

        };

        float*& tempStackPtr = tempStackMap[inst.fStackID];


        switch (inst.fOp) {

            case BuilderOp::label:

                SkASSERT(inst.fImmA >= 0 && inst.fImmA < fNumLabels);

                labelsEncountered.set(inst.fImmA);

                pipeline->push_back({ProgramOp::label, context_bit_pun(inst.fImmA)});

                break;


            case BuilderOp::jump:

            case BuilderOp::branch_if_any_lanes_active:

            case BuilderOp::branch_if_no_lanes_active: {

                SkASSERT(inst.fImmA >= 0 && inst.fImmA < fNumLabels);

                EmitStackRewindForBackwardsBranch(inst.fImmA);


                auto* ctx = alloc->make<SkRasterPipeline_BranchCtx>();

                ctx->offset = inst.fImmA;

                pipeline->push_back({(ProgramOp)inst.fOp, ctx});

                break;

            }

            case BuilderOp::branch_if_all_lanes_active: {

                SkASSERT(inst.fImmA >= 0 && inst.fImmA < fNumLabels);

                EmitStackRewindForBackwardsBranch(inst.fImmA);


                auto* ctx = alloc->make<SkRasterPipeline_BranchIfAllLanesActiveCtx>();

                ctx->offset = inst.fImmA;

                pipeline->push_back({ProgramOp::branch_if_all_lanes_active, ctx});

                break;

            }

            case BuilderOp::branch_if_no_active_lanes_on_stack_top_equal: {

                SkASSERT(inst.fImmA >= 0 && inst.fImmA < fNumLabels);

                EmitStackRewindForBackwardsBranch(inst.fImmA);


                auto* ctx = alloc->make<SkRasterPipeline_BranchIfEqualCtx>();

                ctx->offset = inst.fImmA;

                ctx->value = inst.fImmB;

                ctx->ptr = reinterpret_cast<int*>(tempStackPtr - N);

                pipeline->push_back({ProgramOp::branch_if_no_active_lanes_eq, ctx});

                break;

            }

            case BuilderOp::init_lane_masks: {

                auto* ctx = alloc->make<SkRasterPipeline_InitLaneMasksCtx>();

                pipeline->push_back({ProgramOp::init_lane_masks, ctx});

                break;

            }

            case BuilderOp::store_src_rg:

                pipeline->push_back({ProgramOp::store_src_rg, SlotA()});

                break;


            case BuilderOp::store_src:

                pipeline->push_back({ProgramOp::store_src, SlotA()});

                break;


            case BuilderOp::store_dst:

                pipeline->push_back({ProgramOp::store_dst, SlotA()});

                break;


            case BuilderOp::store_device_xy01:

                pipeline->push_back({ProgramOp::store_device_xy01, SlotA()});

                break;


            case BuilderOp::store_immutable_value:

                // The immutable slots were populated in an earlier pass.

                break;


            case BuilderOp::load_src:

                pipeline->push_back({ProgramOp::load_src, SlotA()});

                break;


            case BuilderOp::load_dst:

                pipeline->push_back({ProgramOp::load_dst, SlotA()});

                break;


            case ALL_SINGLE_SLOT_UNARY_OP_CASES: {

                float* dst = tempStackPtr - (inst.fImmA * N);

                this->appendSingleSlotUnaryOp(pipeline, (ProgramOp)inst.fOp, dst, inst.fImmA);

                break;

            }

            case ALL_MULTI_SLOT_UNARY_OP_CASES: {

                float* dst = tempStackPtr - (inst.fImmA * N);

                this->appendMultiSlotUnaryOp(pipeline, (ProgramOp)inst.fOp, dst, inst.fImmA);

                break;

            }

            case ALL_IMMEDIATE_BINARY_OP_CASES: {

                float* dst = (inst.fSlotA == NA) ? tempStackPtr - (inst.fImmA * N)

                                                 : SlotA();


                this->appendImmediateBinaryOp(pipeline, alloc, (ProgramOp)inst.fOp,

                                              OffsetFromBase(dst), inst.fImmB, inst.fImmA);

                break;

            }

            case ALL_N_WAY_BINARY_OP_CASES: {

                float* src = tempStackPtr - (inst.fImmA * N);

                float* dst = tempStackPtr - (inst.fImmA * 2 * N);

                this->appendAdjacentNWayBinaryOp(pipeline, alloc, (ProgramOp)inst.fOp,

                                                 OffsetFromBase(dst), OffsetFromBase(src),

                                                 inst.fImmA);

                break;

            }

            case ALL_MULTI_SLOT_BINARY_OP_CASES: {

                float* src = tempStackPtr - (inst.fImmA * N);

                float* dst = tempStackPtr - (inst.fImmA * 2 * N);

                this->appendAdjacentMultiSlotBinaryOp(pipeline, alloc, (ProgramOp)inst.fOp,

                                                      basePtr,

                                                      OffsetFromBase(dst),

                                                      OffsetFromBase(src),

                                                      inst.fImmA);

                break;

            }

            case ALL_N_WAY_TERNARY_OP_CASES: {

                float* src1 = tempStackPtr - (inst.fImmA * N);

                float* src0 = tempStackPtr - (inst.fImmA * 2 * N);

                float* dst  = tempStackPtr - (inst.fImmA * 3 * N);

                this->appendAdjacentNWayTernaryOp(pipeline, alloc, (ProgramOp)inst.fOp, basePtr,

                                                  OffsetFromBase(dst),

                                                  OffsetFromBase(src0),

                                                  OffsetFromBase(src1),

                                                  inst.fImmA);

                break;

            }

            case ALL_MULTI_SLOT_TERNARY_OP_CASES: {

                float* src1 = tempStackPtr - (inst.fImmA * N);

                float* src0 = tempStackPtr - (inst.fImmA * 2 * N);

                float* dst  = tempStackPtr - (inst.fImmA * 3 * N);

                this->appendAdjacentMultiSlotTernaryOp(pipeline, alloc,(ProgramOp)inst.fOp, basePtr,

                                                       OffsetFromBase(dst),

                                                       OffsetFromBase(src0),

                                                       OffsetFromBase(src1),

                                                       inst.fImmA);

                break;

            }

            case BuilderOp::select: {

                float* src = tempStackPtr - (inst.fImmA * N);

                float* dst = tempStackPtr - (inst.fImmA * 2 * N);

                this->appendCopySlotsMasked(pipeline, alloc,

                                            OffsetFromBase(dst),

                                            OffsetFromBase(src),

                                            inst.fImmA);

                break;

            }

            case BuilderOp::copy_slot_masked:

                this->appendCopySlotsMasked(pipeline, alloc,

                                            OffsetFromBase(SlotA()),

                                            OffsetFromBase(SlotB()),

                                            inst.fImmA);

                break;


            case BuilderOp::copy_slot_unmasked:

                this->appendCopySlotsUnmasked(pipeline, alloc,

                                              OffsetFromBase(SlotA()),

                                              OffsetFromBase(SlotB()),

                                              inst.fImmA);

                break;


            case BuilderOp::copy_immutable_unmasked:

                this->appendCopyImmutableUnmasked(pipeline, alloc, basePtr,

                                                  OffsetFromBase(SlotA()),

                                                  OffsetFromBase(ImmutableB()),

                                                  inst.fImmA);

                break;


            case BuilderOp::refract_4_floats: {

                float* dst = tempStackPtr - (9 * N);

                pipeline->push_back({ProgramOp::refract_4_floats, dst});

                break;

            }

            case BuilderOp::inverse_mat2:

            case BuilderOp::inverse_mat3:

            case BuilderOp::inverse_mat4: {

                float* dst = tempStackPtr - (inst.fImmA * N);

                pipeline->push_back({(ProgramOp)inst.fOp, dst});

                break;

            }

            case BuilderOp::dot_2_floats:

            case BuilderOp::dot_3_floats:

            case BuilderOp::dot_4_floats: {

                float* dst = tempStackPtr - (inst.fImmA * 2 * N);

                pipeline->push_back({(ProgramOp)inst.fOp, dst});

                break;

            }

            case BuilderOp::swizzle_1: {

                // A single-component swizzle just copies a slot and shrinks the stack; we can

                // slightly improve codegen by making that simplification here.

                int offset = inst.fImmB;

                SkASSERT(offset >= 0 && offset <= 15);

                float* dst = tempStackPtr - (inst.fImmA * N);

                float* src = dst + (offset * N);

                if (src != dst) {

                    this->appendCopySlotsUnmasked(pipeline, alloc,

                                                  OffsetFromBase(dst),

                                                  OffsetFromBase(src),

                                                  /*numSlots=*/1);

                }

                break;

            }

            case BuilderOp::swizzle_2:

            case BuilderOp::swizzle_3:

            case BuilderOp::swizzle_4: {

                SkRasterPipeline_SwizzleCtx ctx;

                ctx.dst = OffsetFromBase(tempStackPtr - (N * inst.fImmA));

                // Unpack component nybbles into byte-offsets pointing at stack slots.

                unpack_nybbles_to_offsets(inst.fImmB, SkSpan(ctx.offsets));

                pipeline->push_back({(ProgramOp)inst.fOp, SkRPCtxUtils::Pack(ctx, alloc)});

                break;

            }

            case BuilderOp::shuffle: {

                int consumed = inst.fImmA;

                int generated = inst.fImmB;


                auto* ctx = alloc->make<SkRasterPipeline_ShuffleCtx>();

                ctx->ptr = reinterpret_cast<int32_t*>(tempStackPtr) - (N * consumed);

                ctx->count = generated;

                // Unpack immB and immC from nybble form into the offset array.

                unpack_nybbles_to_offsets(inst.fImmC, SkSpan(&ctx->offsets[0], 8));

                unpack_nybbles_to_offsets(inst.fImmD, SkSpan(&ctx->offsets[8], 8));

                pipeline->push_back({ProgramOp::shuffle, ctx});

                break;

            }

            case BuilderOp::matrix_multiply_2:

            case BuilderOp::matrix_multiply_3:

            case BuilderOp::matrix_multiply_4: {

                int consumed = (inst.fImmB * inst.fImmC) +  // result

                               (inst.fImmA * inst.fImmB) +  // left-matrix

                               (inst.fImmC * inst.fImmD);   // right-matrix


                SkRasterPipeline_MatrixMultiplyCtx ctx;

                ctx.dst = OffsetFromBase(tempStackPtr - (N * consumed));

                ctx.leftColumns  = inst.fImmA;

                ctx.leftRows     = inst.fImmB;

                ctx.rightColumns = inst.fImmC;

                ctx.rightRows    = inst.fImmD;

                pipeline->push_back({(ProgramOp)inst.fOp, SkRPCtxUtils::Pack(ctx, alloc)});

                break;

            }

            case BuilderOp::exchange_src: {

                float* dst = tempStackPtr - (4 * N);

                pipeline->push_back({ProgramOp::exchange_src, dst});

                break;

            }

            case BuilderOp::push_src_rgba: {

                float* dst = tempStackPtr;

                pipeline->push_back({ProgramOp::store_src, dst});

                break;

            }

            case BuilderOp::push_dst_rgba: {

                float* dst = tempStackPtr;

                pipeline->push_back({ProgramOp::store_dst, dst});

                break;

            }

            case BuilderOp::push_device_xy01: {

                float* dst = tempStackPtr;

                pipeline->push_back({ProgramOp::store_device_xy01, dst});

                break;

            }

            case BuilderOp::pop_src_rgba: {

                float* src = tempStackPtr - (4 * N);

                pipeline->push_back({ProgramOp::load_src, src});

                break;

            }

            case BuilderOp::pop_dst_rgba: {

                float* src = tempStackPtr - (4 * N);

                pipeline->push_back({ProgramOp::load_dst, src});

                break;

            }

            case BuilderOp::push_slots: {

                float* dst = tempStackPtr;

                this->appendCopySlotsUnmasked(pipeline, alloc,

                                              OffsetFromBase(dst),

                                              OffsetFromBase(SlotA()),

                                              inst.fImmA);

                break;

            }

            case BuilderOp::push_immutable: {

                float* dst = tempStackPtr;

                this->appendCopyImmutableUnmasked(pipeline, alloc, basePtr,

                                                  OffsetFromBase(dst),

                                                  OffsetFromBase(ImmutableA()),

                                                  inst.fImmA);

                break;

            }

            case BuilderOp::copy_stack_to_slots_indirect:

            case BuilderOp::push_immutable_indirect:

            case BuilderOp::push_slots_indirect:

            case BuilderOp::push_uniform_indirect: {

                // SlotA: fixed-range start

                // SlotB: limit-range end

                //  immA: number of slots to copy

                //  immB: dynamic stack ID

                ProgramOp op;

                auto* ctx = alloc->make<SkRasterPipeline_CopyIndirectCtx>();

                ctx->indirectOffset =

                        reinterpret_cast<const uint32_t*>(tempStackMap[inst.fImmB]) - (1 * N);

                ctx->indirectLimit = inst.fSlotB - inst.fSlotA - inst.fImmA;

                ctx->slots = inst.fImmA;

                if (inst.fOp == BuilderOp::push_slots_indirect) {

                    op = ProgramOp::copy_from_indirect_unmasked;

                    ctx->src = reinterpret_cast<const int32_t*>(SlotA());

                    ctx->dst = reinterpret_cast<int32_t*>(tempStackPtr);

                } else if (inst.fOp == BuilderOp::push_immutable_indirect) {

                    // We reuse the indirect-uniform op for indirect copies of immutable data.

                    op = ProgramOp::copy_from_indirect_uniform_unmasked;

                    ctx->src = reinterpret_cast<const int32_t*>(ImmutableA());

                    ctx->dst = reinterpret_cast<int32_t*>(tempStackPtr);

                } else if (inst.fOp == BuilderOp::push_uniform_indirect) {

                    op = ProgramOp::copy_from_indirect_uniform_unmasked;

                    ctx->src = reinterpret_cast<const int32_t*>(UniformA());

                    ctx->dst = reinterpret_cast<int32_t*>(tempStackPtr);

                } else {

                    op = ProgramOp::copy_to_indirect_masked;

                    ctx->src = reinterpret_cast<const int32_t*>(tempStackPtr) - (ctx->slots * N);

                    ctx->dst = reinterpret_cast<int32_t*>(SlotA());

                }

                pipeline->push_back({op, ctx});

                break;

            }

            case BuilderOp::push_uniform:

            case BuilderOp::copy_uniform_to_slots_unmasked: {

                const float* src = UniformA();

                float* dst = (inst.fOp == BuilderOp::push_uniform) ? tempStackPtr : SlotB();


                for (int remaining = inst.fImmA; remaining > 0; remaining -= 4) {

                    auto ctx = alloc->make<SkRasterPipeline_UniformCtx>();

                    ctx->dst = reinterpret_cast<int32_t*>(dst);

                    ctx->src = reinterpret_cast<const int32_t*>(src);

                    switch (remaining) {

                        case 1:  pipeline->push_back({ProgramOp::copy_uniform,    ctx}); break;

                        case 2:  pipeline->push_back({ProgramOp::copy_2_uniforms, ctx}); break;

                        case 3:  pipeline->push_back({ProgramOp::copy_3_uniforms, ctx}); break;

                        default: pipeline->push_back({ProgramOp::copy_4_uniforms, ctx}); break;

                    }

                    dst += 4 * N;

                    src += 4;

                }

                break;

            }

            case BuilderOp::push_condition_mask: {

                float* dst = tempStackPtr;

                pipeline->push_back({ProgramOp::store_condition_mask, dst});

                break;

            }

            case BuilderOp::pop_condition_mask: {

                float* src = tempStackPtr - (1 * N);

                pipeline->push_back({ProgramOp::load_condition_mask, src});

                break;

            }

            case BuilderOp::merge_condition_mask:

            case BuilderOp::merge_inv_condition_mask: {

                float* ptr = tempStackPtr - (2 * N);

                pipeline->push_back({(ProgramOp)inst.fOp, ptr});

                break;

            }

            case BuilderOp::push_loop_mask: {

                float* dst = tempStackPtr;

                pipeline->push_back({ProgramOp::store_loop_mask, dst});

                break;

            }

            case BuilderOp::pop_loop_mask: {

                float* src = tempStackPtr - (1 * N);

                pipeline->push_back({ProgramOp::load_loop_mask, src});

                break;

            }

            case BuilderOp::pop_and_reenable_loop_mask: {

                float* src = tempStackPtr - (1 * N);

                pipeline->push_back({ProgramOp::reenable_loop_mask, src});

                break;

            }

            case BuilderOp::reenable_loop_mask:

                pipeline->push_back({ProgramOp::reenable_loop_mask, SlotA()});

                break;


            case BuilderOp::mask_off_loop_mask:

                pipeline->push_back({ProgramOp::mask_off_loop_mask, nullptr});

                break;


            case BuilderOp::merge_loop_mask: {

                float* src = tempStackPtr - (1 * N);

                pipeline->push_back({ProgramOp::merge_loop_mask, src});

                break;

            }

            case BuilderOp::push_return_mask: {

                float* dst = tempStackPtr;

                pipeline->push_back({ProgramOp::store_return_mask, dst});

                break;

            }

            case BuilderOp::pop_return_mask: {

                float* src = tempStackPtr - (1 * N);

                pipeline->push_back({ProgramOp::load_return_mask, src});

                break;

            }

            case BuilderOp::mask_off_return_mask:

                pipeline->push_back({ProgramOp::mask_off_return_mask, nullptr});

                break;


            case BuilderOp::copy_constant:

            case BuilderOp::push_constant: {

                float* dst = (inst.fOp == BuilderOp::copy_constant) ? SlotA() : tempStackPtr;

                // Splat constant values onto the stack.

                for (int remaining = inst.fImmA; remaining > 0; remaining -= 4) {

                    SkRasterPipeline_ConstantCtx ctx;

                    ctx.dst = OffsetFromBase(dst);

                    ctx.value = inst.fImmB;

                    void* ptr = SkRPCtxUtils::Pack(ctx, alloc);

                    switch (remaining) {

                        case 1:  pipeline->push_back({ProgramOp::copy_constant,     ptr}); break;

                        case 2:  pipeline->push_back({ProgramOp::splat_2_constants, ptr}); break;

                        case 3:  pipeline->push_back({ProgramOp::splat_3_constants, ptr}); break;

                        default: pipeline->push_back({ProgramOp::splat_4_constants, ptr}); break;

                    }

                    dst += 4 * N;

                }

                break;

            }

            case BuilderOp::copy_stack_to_slots: {

                float* src = tempStackPtr - (inst.fImmB * N);

                this->appendCopySlotsMasked(pipeline, alloc,

                                            OffsetFromBase(SlotA()),

                                            OffsetFromBase(src),

                                            inst.fImmA);

                break;

            }

            case BuilderOp::copy_stack_to_slots_unmasked: {

                float* src = tempStackPtr - (inst.fImmB * N);

                this->appendCopySlotsUnmasked(pipeline, alloc,

                                              OffsetFromBase(SlotA()),

                                              OffsetFromBase(src),

                                              inst.fImmA);

                break;

            }

            case BuilderOp::swizzle_copy_stack_to_slots: {

                // SlotA: fixed-range start

                // immA: number of swizzle components

                // immB: swizzle components

                // immC: offset from stack top

                auto stage = (ProgramOp)((int)ProgramOp::swizzle_copy_slot_masked + inst.fImmA - 1);

                auto* ctx = alloc->make<SkRasterPipeline_SwizzleCopyCtx>();

                ctx->src = reinterpret_cast<const int32_t*>(tempStackPtr) - (inst.fImmC * N);

                ctx->dst = reinterpret_cast<int32_t*>(SlotA());

                unpack_nybbles_to_offsets(inst.fImmB, SkSpan(ctx->offsets));

                pipeline->push_back({stage, ctx});

                break;

            }

            case BuilderOp::push_clone: {

                float* src = tempStackPtr - (inst.fImmB * N);

                float* dst = tempStackPtr;

                this->appendCopySlotsUnmasked(pipeline, alloc,

                                              OffsetFromBase(dst),

                                              OffsetFromBase(src),

                                              inst.fImmA);

                break;

            }

            case BuilderOp::push_clone_from_stack: {

                // immA: number of slots

                // immB: other stack ID

                // immC: offset from stack top

                float* sourceStackPtr = tempStackMap[inst.fImmB];

                float* src = sourceStackPtr - (inst.fImmC * N);

                float* dst = tempStackPtr;

                this->appendCopySlotsUnmasked(pipeline, alloc,

                                              OffsetFromBase(dst),

                                              OffsetFromBase(src),

                                              inst.fImmA);

                break;

            }

            case BuilderOp::push_clone_indirect_from_stack: {

                // immA: number of slots

                // immB: other stack ID

                // immC: offset from stack top

                // immD: dynamic stack ID

                float* sourceStackPtr = tempStackMap[inst.fImmB];


                auto* ctx = alloc->make<SkRasterPipeline_CopyIndirectCtx>();

                ctx->dst = reinterpret_cast<int32_t*>(tempStackPtr);

                ctx->src = reinterpret_cast<const int32_t*>(sourceStackPtr) - (inst.fImmC * N);

                ctx->indirectOffset =

                        reinterpret_cast<const uint32_t*>(tempStackMap[inst.fImmD]) - (1 * N);

                ctx->indirectLimit = inst.fImmC - inst.fImmA;

                ctx->slots = inst.fImmA;

                pipeline->push_back({ProgramOp::copy_from_indirect_unmasked, ctx});

                break;

            }

            case BuilderOp::swizzle_copy_stack_to_slots_indirect: {

                // SlotA: fixed-range start

                // SlotB: limit-range end

                // immA: number of swizzle components

                // immB: swizzle components

                // immC: offset from stack top

                // immD: dynamic stack ID

                auto* ctx = alloc->make<SkRasterPipeline_SwizzleCopyIndirectCtx>();

                ctx->src = reinterpret_cast<const int32_t*>(tempStackPtr) - (inst.fImmC * N);

                ctx->dst = reinterpret_cast<int32_t*>(SlotA());

                ctx->indirectOffset =

                        reinterpret_cast<const uint32_t*>(tempStackMap[inst.fImmD]) - (1 * N);

                ctx->indirectLimit =

                        inst.fSlotB - inst.fSlotA - (max_packed_nybble(inst.fImmB, inst.fImmA) + 1);

                ctx->slots = inst.fImmA;

                unpack_nybbles_to_offsets(inst.fImmB, SkSpan(ctx->offsets));

                pipeline->push_back({ProgramOp::swizzle_copy_to_indirect_masked, ctx});

                break;

            }

            case BuilderOp::case_op: {

                SkRasterPipeline_CaseOpCtx ctx;

                ctx.expectedValue = inst.fImmA;

                ctx.offset = OffsetFromBase(tempStackPtr - (2 * N));

                pipeline->push_back({ProgramOp::case_op, SkRPCtxUtils::Pack(ctx, alloc)});

                break;

            }

            case BuilderOp::continue_op:

                pipeline->push_back({ProgramOp::continue_op, tempStackMap[inst.fImmA] - (1 * N)});

                break;


            case BuilderOp::pad_stack:

            case BuilderOp::discard_stack:

                break;


            case BuilderOp::invoke_shader:

            case BuilderOp::invoke_color_filter:

            case BuilderOp::invoke_blender:

                pipeline->push_back({(ProgramOp)inst.fOp, context_bit_pun(inst.fImmA)});

                break;


            case BuilderOp::invoke_to_linear_srgb:

            case BuilderOp::invoke_from_linear_srgb:

                pipeline->push_back({(ProgramOp)inst.fOp, tempStackMap[inst.fImmA] - (4 * N)});

                break;


            case BuilderOp::trace_line: {

                auto* ctx = AllocTraceContext((SkRasterPipeline_TraceLineCtx*)nullptr);

                ctx->lineNumber = inst.fImmB;

                pipeline->push_back({ProgramOp::trace_line, ctx});

                break;

            }

            case BuilderOp::trace_scope: {

                auto* ctx = AllocTraceContext((SkRasterPipeline_TraceScopeCtx*)nullptr);

                ctx->delta = inst.fImmB;

                pipeline->push_back({ProgramOp::trace_scope, ctx});

                break;

            }

            case BuilderOp::trace_enter:

            case BuilderOp::trace_exit: {

                auto* ctx = AllocTraceContext((SkRasterPipeline_TraceFuncCtx*)nullptr);

                ctx->funcIdx = inst.fImmB;

                pipeline->push_back({(ProgramOp)inst.fOp, ctx});

                break;

            }

            case BuilderOp::trace_var:

            case BuilderOp::trace_var_indirect: {

                // SlotA: fixed-range start

                // SlotB: limit-range end

                // immA: trace-mask stack ID

                // immB: number of slots

                // immC: dynamic stack ID

                auto* ctx = AllocTraceContext((SkRasterPipeline_TraceVarCtx*)nullptr);

                ctx->slotIdx = inst.fSlotA;

                ctx->numSlots = inst.fImmB;

                ctx->data = reinterpret_cast<int*>(SlotA());

                if (inst.fOp == BuilderOp::trace_var_indirect) {

                    ctx->indirectOffset =

                            reinterpret_cast<const uint32_t*>(tempStackMap[inst.fImmC]) - (1 * N);

                    ctx->indirectLimit = inst.fSlotB - inst.fSlotA - inst.fImmB;

                } else {

                    ctx->indirectOffset = nullptr;

                    ctx->indirectLimit = 0;

                }

                pipeline->push_back({ProgramOp::trace_var, ctx});

                break;

            }

            default:

                SkDEBUGFAILF("Raster Pipeline: unsupported instruction %d", (int)inst.fOp);

                break;

        }


        int stackUsage = stack_usage(inst);

        if (stackUsage != 0) {

            tempStackPtr += stackUsage * N;

            SkASSERT(tempStackPtr >= slots.stack.begin());

            SkASSERT(tempStackPtr <= slots.stack.end());

        }


        // Periodically rewind the stack every 500 instructions. When SK_HAS_MUSTTAIL is set,

        // rewinds are not actually used; the appendStackRewind call becomes a no-op. On platforms

        // that don't support SK_HAS_MUSTTAIL, rewinding the stack periodically can prevent a

        // potential stack overflow when running a long program.

        int numPipelineStages = pipeline->size();

        if (numPipelineStages - mostRecentRewind > 500) {

            this->appendStackRewind(pipeline);

            mostRecentRewind = numPipelineStages;

        }

    }

}


class Program::Dumper {

public:

    Dumper(const Program& p) : fProgram(p) {}


    void dump(SkWStream* out, bool writeInstructionCount);


    // Finds the labels in the program, and keeps track of their offsets.

    void buildLabelToStageMap() {

        for (int index = 0; index < fStages.size(); ++index) {

            if (fStages[index].op == ProgramOp::label) {

                int labelID = sk_bit_cast<intptr_t>(fStages[index].ctx);

                SkASSERT(!fLabelToStageMap.find(labelID));

                fLabelToStageMap[labelID] = index;

            }

        }

    }


    // Assign unique names to each variable slot; our trace might have multiple variables with the

    // same name, which can make a dump hard to read. We disambiguate them with subscripts.

    void buildUniqueSlotNameList() {

        if (fProgram.fDebugTrace) {

            fSlotNameList.reserve_exact(fProgram.fDebugTrace->fSlotInfo.size());


            // The map consists of <variable name, <source position, unique name>>.

            THashMap<std::string_view, THashMap<int, std::string>> uniqueNameMap;


            for (const SlotDebugInfo& slotInfo : fProgram.fDebugTrace->fSlotInfo) {

                // Look up this variable by its name and source position.

                int pos = slotInfo.pos.valid() ? slotInfo.pos.startOffset() : 0;

                THashMap<int, std::string>& positionMap = uniqueNameMap[slotInfo.name];

                std::string& uniqueName = positionMap[pos];


                // Have we seen this variable name/position combination before?

                if (uniqueName.empty()) {

                    // This is a unique name/position pair.

                    uniqueName = slotInfo.name;


                    // But if it's not a unique _name_, it deserves a subscript to disambiguate it.

                    int subscript = positionMap.count() - 1;

                    if (subscript > 0) {

                        for (char digit : std::to_string(subscript)) {

                            // U+2080 through U+2089 (₀₁₂₃₄₅₆₇₈₉) in UTF8:

                            uniqueName.push_back((char)0xE2);

                            uniqueName.push_back((char)0x82);

                            uniqueName.push_back((char)(0x80 + digit - '0'));

                        }

                    }

                }


                fSlotNameList.push_back(uniqueName);

            }

        }

    }


    // Interprets the context value as a branch offset.

    std::string branchOffset(const SkRasterPipeline_BranchCtx* ctx, int index) const {

        // The context's offset field contains a label ID

        int labelID = ctx->offset;

        const int* targetIndex = fLabelToStageMap.find(labelID);

        SkASSERT(targetIndex);

        return SkSL::String::printf("%+d (label %d at #%d)", *targetIndex - index, labelID,

                                                             *targetIndex + 1);

    }


    // Prints a 32-bit immediate value of unknown type (int/float).

    std::string imm(float immFloat, bool showAsFloat = true) const {

        // Special case exact zero as "0" for readability (vs `0x00000000 (0.0)`).

        if (sk_bit_cast<int32_t>(immFloat) == 0) {

            return "0";

        }

        // Start with `0x3F800000` as a baseline.

        uint32_t immUnsigned;

        memcpy(&immUnsigned, &immFloat, sizeof(uint32_t));

        auto text = SkSL::String::printf("0x%08X", immUnsigned);


        // Extend it to `0x3F800000 (1.0)` for finite floating point values.

        if (showAsFloat && std::isfinite(immFloat)) {

            text += " (";

            text += skstd::to_string(immFloat);

            text += ')';

        }

        return text;

    }


    // Interprets the context pointer as a 32-bit immediate value of unknown type (int/float).

    std::string immCtx(const void* ctx, bool showAsFloat = true) const {

        float f;

        memcpy(&f, &ctx, sizeof(float));

        return this->imm(f, showAsFloat);

    }


    // Prints `1` for single slots and `1..3` for ranges of slots.

    std::string asRange(int first, int count) const {

        std::string text = std::to_string(first);

        if (count > 1) {

            text += ".." + std::to_string(first + count - 1);

        }

        return text;

    }


    // Generates a reasonable name for a range of slots or uniforms, e.g.:

    // `val`: slot range points at one variable, named val

    // `val(0..1)`: slot range points at the first and second slot of val (which has 3+ slots)

    // `foo, bar`: slot range fully covers two variables, named foo and bar

    // `foo(3), bar(0)`: slot range covers the fourth slot of foo and the first slot of bar

    std::string slotOrUniformName(SkSpan<const SlotDebugInfo> debugInfo,

                                  SkSpan<const std::string> names,

                                  SlotRange range) const {

        SkASSERT(range.index >= 0 && (range.index + range.count) <= (int)debugInfo.size());


        std::string text;

        auto separator = SkSL::String::Separator();

        while (range.count > 0) {

            const SlotDebugInfo& slotInfo = debugInfo[range.index];

            text += separator();

            text += names.empty() ? slotInfo.name : names[range.index];


            // Figure out how many slots we can chomp in this iteration.

            int entireVariable = slotInfo.columns * slotInfo.rows;

            int slotsToChomp = std::min(range.count, entireVariable - slotInfo.componentIndex);

            // If we aren't consuming an entire variable, from first slot to last...

            if (slotsToChomp != entireVariable) {

                // ... decorate it with a range suffix.

                text += '(' + this->asRange(slotInfo.componentIndex, slotsToChomp) + ')';

            }

            range.index += slotsToChomp;

            range.count -= slotsToChomp;

        }


        return text;

    }


    // Generates a reasonable name for a range of slots.

    std::string slotName(SlotRange range) const {

        return this->slotOrUniformName(fProgram.fDebugTrace->fSlotInfo, fSlotNameList, range);

    }


    // Generates a reasonable name for a range of uniforms.

    std::string uniformName(SlotRange range) const {

        return this->slotOrUniformName(fProgram.fDebugTrace->fUniformInfo, /*names=*/{}, range);

    }


    // Attempts to interpret the passed-in pointer as a uniform range.

    std::string uniformPtrCtx(const float* ptr, int numSlots) const {

        const float* end = ptr + numSlots;

        if (ptr >= fUniforms.begin() && end <= fUniforms.end()) {

            int uniformIdx = ptr - fUniforms.begin();

            if (fProgram.fDebugTrace) {

                // Handle pointers to named uniform slots.

                std::string name = this->uniformName({uniformIdx, numSlots});

                if (!name.empty()) {

                    return name;

                }

            }

            // Handle pointers to uniforms (when no debug info exists).

            return 'u' + this->asRange(uniformIdx, numSlots);

        }

        return {};

    }


    // Attempts to interpret the passed-in pointer as a value slot range.

    std::string valuePtrCtx(const float* ptr, int numSlots) const {

        const float* end = ptr + (N * numSlots);

        if (ptr >= fSlots.values.begin() && end <= fSlots.values.end()) {

            int valueIdx = ptr - fSlots.values.begin();

            SkASSERT((valueIdx % N) == 0);

            valueIdx /= N;

            if (fProgram.fDebugTrace) {

                // Handle pointers to named value slots.

                std::string name = this->slotName({valueIdx, numSlots});

                if (!name.empty()) {

                    return name;

                }

            }

            // Handle pointers to value slots (when no debug info exists).

            return 'v' + this->asRange(valueIdx, numSlots);

        }

        return {};

    }


    // Attempts to interpret the passed-in pointer as a immutable slot range.

    std::string immutablePtrCtx(const float* ptr, int numSlots) const {

        const float* end = ptr + numSlots;

        if (ptr >= fSlots.immutable.begin() && end <= fSlots.immutable.end()) {

            int index = ptr - fSlots.immutable.begin();

            return 'i' + this->asRange(index, numSlots) + ' ' +

                   this->multiImmCtx(ptr, numSlots);

        }

        return {};

    }


    // Interprets the context value as a pointer to `count` immediate values.

    std::string multiImmCtx(const float* ptr, int count) const {

        // If this is a uniform, print it by name.

        if (std::string text = this->uniformPtrCtx(ptr, count); !text.empty()) {

            return text;

        }

        // Emit a single bracketed immediate.

        if (count == 1) {

            return '[' + this->imm(*ptr) + ']';

        }

        // Emit a list like `[0x00000000 (0.0), 0x3F80000 (1.0)]`.

        std::string text = "[";

        auto separator = SkSL::String::Separator();

        while (count--) {

            text += separator();

            text += this->imm(*ptr++);

        }

        return text + ']';

    }


    // Interprets the context value as a generic pointer.

    std::string ptrCtx(const void* ctx, int numSlots) const {

        const float *ctxAsSlot = static_cast<const float*>(ctx);

        // Check for uniform, value, and immutable pointers.

        if (std::string uniform = this->uniformPtrCtx(ctxAsSlot, numSlots); !uniform.empty()) {

            return uniform;

        }

        if (std::string value = this->valuePtrCtx(ctxAsSlot, numSlots); !value.empty()) {

            return value;

        }

        if (std::string value = this->immutablePtrCtx(ctxAsSlot, numSlots); !value.empty()) {

            return value;

        }

        // Handle pointers to temporary stack slots.

        if (ctxAsSlot >= fSlots.stack.begin() && ctxAsSlot < fSlots.stack.end()) {

            int stackIdx = ctxAsSlot - fSlots.stack.begin();

            SkASSERT((stackIdx % N) == 0);

            return '$' + this->asRange(stackIdx / N, numSlots);

        }

        // This pointer is out of our expected bounds; this generally isn't expected to happen.

        return "ExternalPtr(" + this->asRange(0, numSlots) + ")";

    }


    // Converts an SkRPOffset to a pointer into the value-slot range.

    std::byte* offsetToPtr(SkRPOffset offset) const {

        return (std::byte*)fSlots.values.data() + offset;

    }


    // Interprets a slab offset as a slot range.

    std::string offsetCtx(SkRPOffset offset, int numSlots) const {

        return this->ptrCtx(this->offsetToPtr(offset), numSlots);

    }


    // Interprets the context value as a packed ConstantCtx structure.

    std::tuple<std::string, std::string> constantCtx(const void* v,

                                                     int slots,

                                                     bool showAsFloat = true) const {

        auto ctx = SkRPCtxUtils::Unpack((const SkRasterPipeline_ConstantCtx*)v);

        return {this->offsetCtx(ctx.dst, slots),

                this->imm(sk_bit_cast<float>(ctx.value), showAsFloat)};

    }


    // Interprets the context value as a BinaryOp structure for copy_n_slots (numSlots is dictated

    // by the op itself).

    std::tuple<std::string, std::string> binaryOpCtx(const void* v, int numSlots) const {

        auto ctx = SkRPCtxUtils::Unpack((const SkRasterPipeline_BinaryOpCtx*)v);

        return {this->offsetCtx(ctx.dst, numSlots),

                this->offsetCtx(ctx.src, numSlots)};

    }


    // Interprets the context value as a BinaryOp structure for copy_n_uniforms (numSlots is

    // dictated by the op itself).

    std::tuple<std::string, std::string> copyUniformCtx(const void* v, int numSlots) const {

        const auto *ctx = static_cast<const SkRasterPipeline_UniformCtx*>(v);

        return {this->ptrCtx(ctx->dst, numSlots),

                this->multiImmCtx(reinterpret_cast<const float*>(ctx->src), numSlots)};

    }


    // Interprets the context value as a pointer to two adjacent values.

    std::tuple<std::string, std::string> adjacentPtrCtx(const void* ctx, int numSlots) const {

        const float *ctxAsSlot = static_cast<const float*>(ctx);

        return std::make_tuple(this->ptrCtx(ctxAsSlot, numSlots),

                               this->ptrCtx(ctxAsSlot + (N * numSlots), numSlots));

    }


    // Interprets a slab offset as two adjacent slot ranges.

    std::tuple<std::string, std::string> adjacentOffsetCtx(SkRPOffset offset, int numSlots) const {

        return this->adjacentPtrCtx((std::byte*)fSlots.values.data() + offset, numSlots);

    }


    // Interprets the context value as a BinaryOp structure (numSlots is inferred from the distance

    // between pointers).

    std::tuple<std::string, std::string> adjacentBinaryOpCtx(const void* v) const {

        auto ctx = SkRPCtxUtils::Unpack((const SkRasterPipeline_BinaryOpCtx*)v);

        int numSlots = (ctx.src - ctx.dst) / (N * sizeof(float));

        return this->adjacentOffsetCtx(ctx.dst, numSlots);

    }


    // Interprets the context value as a pointer to three adjacent values.

    std::tuple<std::string, std::string, std::string> adjacent3PtrCtx(const void* ctx,

                                                                      int numSlots) const {

        const float *ctxAsSlot = static_cast<const float*>(ctx);

        return {this->ptrCtx(ctxAsSlot, numSlots),

                this->ptrCtx(ctxAsSlot + (N * numSlots), numSlots),

                this->ptrCtx(ctxAsSlot + (2 * N * numSlots), numSlots)};

    }


    // Interprets a slab offset as three adjacent slot ranges.

    std::tuple<std::string, std::string, std::string> adjacent3OffsetCtx(SkRPOffset offset,

                                                                         int numSlots) const {

        return this->adjacent3PtrCtx((std::byte*)fSlots.values.data() + offset, numSlots);

    }


    // Interprets the context value as a TernaryOp structure (numSlots is inferred from `delta`).

    std::tuple<std::string, std::string, std::string> adjacentTernaryOpCtx(const void* v) const {

        auto ctx = SkRPCtxUtils::Unpack((const SkRasterPipeline_TernaryOpCtx*)v);

        int numSlots = ctx.delta / (sizeof(float) * N);

        return this->adjacent3OffsetCtx(ctx.dst, numSlots);

    }


    // Stringizes a span of swizzle offsets to the textual equivalent (`xyzw`).

    template <typename T>

    std::string swizzleOffsetSpan(SkSpan<T> offsets) const {

        std::string src;

        for (uint16_t offset : offsets) {

            if (offset == (0 * N * sizeof(float))) {

                src.push_back('x');

            } else if (offset == (1 * N * sizeof(float))) {

                src.push_back('y');

            } else if (offset == (2 * N * sizeof(float))) {

                src.push_back('z');

            } else if (offset == (3 * N * sizeof(float))) {

                src.push_back('w');

            } else {

                src.push_back('?');

            }

        }

        return src;

    }


    // Determines the effective width of a swizzle op. When we decode a swizzle, we don't know the

    // slot width of the original value; that's not preserved in the instruction encoding. (e.g.,

    // myFloat4.y would be indistinguishable from myFloat2.y.) We do our best to make a readable

    // dump using the data we have.

    template <typename T>

    size_t swizzleWidth(SkSpan<T> offsets) const {

        size_t highestComponent = *std::max_element(offsets.begin(), offsets.end()) /

                                  (N * sizeof(float));

        size_t swizzleWidth = offsets.size();

        return std::max(swizzleWidth, highestComponent + 1);

    }


    // Stringizes a swizzled pointer.

    template <typename T>

    std::string swizzlePtr(const void* ptr, SkSpan<T> offsets) const {

        return "(" + this->ptrCtx(ptr, this->swizzleWidth(SkSpan(offsets))) + ")." +

               this->swizzleOffsetSpan(SkSpan(offsets));

    }


    // Interprets the context value as a SwizzleCtx structure.

    std::tuple<std::string, std::string> swizzleCtx(ProgramOp op, const void* v) const {

        auto ctx = SkRPCtxUtils::Unpack((const SkRasterPipeline_SwizzleCtx*)v);

        int destSlots = (int)op - (int)BuilderOp::swizzle_1 + 1;

        return {this->offsetCtx(ctx.dst, destSlots),

                this->swizzlePtr(this->offsetToPtr(ctx.dst), SkSpan(ctx.offsets, destSlots))};

    }


    // Interprets the context value as a SwizzleCopyCtx structure.

    std::tuple<std::string, std::string> swizzleCopyCtx(ProgramOp op, const void* v) const {

        const auto* ctx = static_cast<const SkRasterPipeline_SwizzleCopyCtx*>(v);

        int destSlots = (int)op - (int)BuilderOp::swizzle_copy_slot_masked + 1;


        return {this->swizzlePtr(ctx->dst, SkSpan(ctx->offsets, destSlots)),

                this->ptrCtx(ctx->src, destSlots)};

    }


    // Interprets the context value as a ShuffleCtx structure.

    std::tuple<std::string, std::string> shuffleCtx(const void* v) const {

        const auto* ctx = static_cast<const SkRasterPipeline_ShuffleCtx*>(v);


        std::string dst = this->ptrCtx(ctx->ptr, ctx->count);

        std::string src = "(" + dst + ")[";

        for (int index = 0; index < ctx->count; ++index) {

            if (ctx->offsets[index] % (N * sizeof(float))) {

                src.push_back('?');

            } else {

                src += std::to_string(ctx->offsets[index] / (N * sizeof(float)));

            }

            src.push_back(' ');

        }

        src.back() = ']';

        return std::make_tuple(dst, src);

    }


    // Interprets the context value as a packed MatrixMultiplyCtx structure.

    std::tuple<std::string, std::string, std::string> matrixMultiply(const void* v) const {

        auto ctx = SkRPCtxUtils::Unpack((const SkRasterPipeline_MatrixMultiplyCtx*)v);

        int leftMatrix = ctx.leftColumns * ctx.leftRows;

        int rightMatrix = ctx.rightColumns * ctx.rightRows;

        int resultMatrix = ctx.rightColumns * ctx.leftRows;

        SkRPOffset leftOffset = ctx.dst + (ctx.rightColumns * ctx.leftRows * sizeof(float) * N);

        SkRPOffset rightOffset = leftOffset + (ctx.leftColumns * ctx.leftRows * sizeof(float) * N);

        return {SkSL::String::printf("mat%dx%d(%s)",

                                     ctx.rightColumns,

                                     ctx.leftRows,

                                     this->offsetCtx(ctx.dst, resultMatrix).c_str()),

                SkSL::String::printf("mat%dx%d(%s)",

                                     ctx.leftColumns,

                                     ctx.leftRows,

                                     this->offsetCtx(leftOffset, leftMatrix).c_str()),

                SkSL::String::printf("mat%dx%d(%s)",

                                     ctx.rightColumns,

                                     ctx.rightRows,

                                     this->offsetCtx(rightOffset, rightMatrix).c_str())};

    }


private:

    const int N = SkOpts::raster_pipeline_highp_stride;

    const Program& fProgram;

    TArray<Stage> fStages;

    TArray<std::string> fSlotNameList;

    THashMap<int, int> fLabelToStageMap;  // <label ID, stage index>

    SlotData fSlots;

    SkSpan<float> fUniforms;

};


void Program::Dumper::dump(SkWStream* out, bool writeInstructionCount) {

    using POp = ProgramOp;


    // Allocate memory for the slot and uniform data, even though the program won't ever be

    // executed. The program requires pointer ranges for managing its data, and ASAN will report

    // errors if those pointers are pointing at unallocated memory.

    SkArenaAlloc alloc(/*firstHeapAllocation=*/1000);

    fSlots = fProgram.allocateSlotData(&alloc);

    float* uniformPtr = alloc.makeArray<float>(fProgram.fNumUniformSlots);

    fUniforms = SkSpan(uniformPtr, fProgram.fNumUniformSlots);


    // Turn this program into an array of Raster Pipeline stages.

    fProgram.makeStages(&fStages, &alloc, fUniforms, fSlots);


    // Assemble lookup tables for program labels and slot names.

    this->buildLabelToStageMap();

    this->buildUniqueSlotNameList();


    // Emit the program's instruction count.

    if (writeInstructionCount) {

        int invocationCount = 0, instructionCount = 0;

        for (const Stage& stage : fStages) {

            switch (stage.op) {

                case POp::label:

                    // consumes zero instructions

                    break;


                case POp::invoke_shader:

                case POp::invoke_color_filter:

                case POp::invoke_blender:

                case POp::invoke_to_linear_srgb:

                case POp::invoke_from_linear_srgb:

                    ++invocationCount;

                    break;


                default:

                    ++instructionCount;

                    break;

            }

        }


        out->writeText(std::to_string(instructionCount).c_str());

        out->writeText(" instructions");

        if (invocationCount > 0) {

            out->writeText(", ");

            out->writeText(std::to_string(invocationCount).c_str());

            out->writeText(" invocations");

        }

        out->writeText("\n\n");

    }


    // Emit all of the program's immutable data.

    const char* header = "[immutable slots]\n";

    const char* footer = "";

    for (const Instruction& inst : fProgram.fInstructions) {

        if (inst.fOp == BuilderOp::store_immutable_value) {

            out->writeText(header);

            out->writeText("i");

            out->writeText(std::to_string(inst.fSlotA).c_str());

            out->writeText(" = ");

            out->writeText(this->imm(sk_bit_cast<float>(inst.fImmA)).c_str());

            out->writeText("\n");


            header = "";

            footer = "\n";

        }

    }

    out->writeText(footer);


    // Emit the program's instruction list.

    for (int index = 0; index < fStages.size(); ++index) {

        const Stage& stage = fStages[index];


        std::string opArg1, opArg2, opArg3, opSwizzle;

        switch (stage.op) {

            case POp::label:

            case POp::invoke_shader:

            case POp::invoke_color_filter:

            case POp::invoke_blender:

                opArg1 = this->immCtx(stage.ctx, /*showAsFloat=*/false);

                break;


            case POp::case_op: {

                auto ctx = SkRPCtxUtils::Unpack((const SkRasterPipeline_CaseOpCtx*)stage.ctx);

                opArg1 = this->offsetCtx(ctx.offset, 1);

                opArg2 = this->offsetCtx(ctx.offset + sizeof(int32_t) * N, 1);

                opArg3 = this->imm(sk_bit_cast<float>(ctx.expectedValue), /*showAsFloat=*/false);

                break;

            }

            case POp::swizzle_1:

            case POp::swizzle_2:

            case POp::swizzle_3:

            case POp::swizzle_4:

                std::tie(opArg1, opArg2) = this->swizzleCtx(stage.op, stage.ctx);

                break;


            case POp::swizzle_copy_slot_masked:

            case POp::swizzle_copy_2_slots_masked:

            case POp::swizzle_copy_3_slots_masked:

            case POp::swizzle_copy_4_slots_masked:

                std::tie(opArg1, opArg2) = this->swizzleCopyCtx(stage.op, stage.ctx);

                break;


            case POp::refract_4_floats:

                std::tie(opArg1, opArg2) = this->adjacentPtrCtx(stage.ctx, 4);

                opArg3 = this->ptrCtx((const float*)(stage.ctx) + (8 * N), 1);

                break;


            case POp::dot_2_floats:

                opArg1 = this->ptrCtx(stage.ctx, 1);

                std::tie(opArg2, opArg3) = this->adjacentPtrCtx(stage.ctx, 2);

                break;


            case POp::dot_3_floats:

                opArg1 = this->ptrCtx(stage.ctx, 1);

                std::tie(opArg2, opArg3) = this->adjacentPtrCtx(stage.ctx, 3);

                break;


            case POp::dot_4_floats:

                opArg1 = this->ptrCtx(stage.ctx, 1);

                std::tie(opArg2, opArg3) = this->adjacentPtrCtx(stage.ctx, 4);

                break;


            case POp::shuffle:

                std::tie(opArg1, opArg2) = this->shuffleCtx(stage.ctx);

                break;


            case POp::matrix_multiply_2:

            case POp::matrix_multiply_3:

            case POp::matrix_multiply_4:

                std::tie(opArg1, opArg2, opArg3) = this->matrixMultiply(stage.ctx);

                break;


            case POp::load_condition_mask:

            case POp::store_condition_mask:

            case POp::load_loop_mask:

            case POp::store_loop_mask:

            case POp::merge_loop_mask:

            case POp::reenable_loop_mask:

            case POp::load_return_mask:

            case POp::store_return_mask:

            case POp::continue_op:

            case POp::cast_to_float_from_int: case POp::cast_to_float_from_uint:

            case POp::cast_to_int_from_float: case POp::cast_to_uint_from_float:

            case POp::abs_int:

            case POp::acos_float:

            case POp::asin_float:

            case POp::atan_float:

            case POp::ceil_float:

            case POp::cos_float:

            case POp::exp_float:

            case POp::exp2_float:

            case POp::log_float:

            case POp::log2_float:

            case POp::floor_float:

            case POp::invsqrt_float:

            case POp::sin_float:

            case POp::sqrt_float:

            case POp::tan_float:

                opArg1 = this->ptrCtx(stage.ctx, 1);

                break;


            case POp::store_src_rg:

            case POp::cast_to_float_from_2_ints: case POp::cast_to_float_from_2_uints:

            case POp::cast_to_int_from_2_floats: case POp::cast_to_uint_from_2_floats:

            case POp::abs_2_ints:

            case POp::ceil_2_floats:

            case POp::floor_2_floats:

            case POp::invsqrt_2_floats:

                opArg1 = this->ptrCtx(stage.ctx, 2);

                break;


            case POp::cast_to_float_from_3_ints: case POp::cast_to_float_from_3_uints:

            case POp::cast_to_int_from_3_floats: case POp::cast_to_uint_from_3_floats:

            case POp::abs_3_ints:

            case POp::ceil_3_floats:

            case POp::floor_3_floats:

            case POp::invsqrt_3_floats:

                opArg1 = this->ptrCtx(stage.ctx, 3);

                break;


            case POp::load_src:

            case POp::load_dst:

            case POp::exchange_src:

            case POp::store_src:

            case POp::store_dst:

            case POp::store_device_xy01:

            case POp::invoke_to_linear_srgb:

            case POp::invoke_from_linear_srgb:

            case POp::cast_to_float_from_4_ints: case POp::cast_to_float_from_4_uints:

            case POp::cast_to_int_from_4_floats: case POp::cast_to_uint_from_4_floats:

            case POp::abs_4_ints:

            case POp::ceil_4_floats:

            case POp::floor_4_floats:

            case POp::invsqrt_4_floats:

            case POp::inverse_mat2:

                opArg1 = this->ptrCtx(stage.ctx, 4);

                break;


            case POp::inverse_mat3:

                opArg1 = this->ptrCtx(stage.ctx, 9);

                break;


            case POp::inverse_mat4:

                opArg1 = this->ptrCtx(stage.ctx, 16);

                break;


            case POp::copy_constant:

            case POp::add_imm_float:

            case POp::mul_imm_float:

            case POp::cmple_imm_float:

            case POp::cmplt_imm_float:

            case POp::cmpeq_imm_float:

            case POp::cmpne_imm_float:

            case POp::min_imm_float:

            case POp::max_imm_float:

                std::tie(opArg1, opArg2) = this->constantCtx(stage.ctx, 1);

                break;


            case POp::add_imm_int:

            case POp::mul_imm_int:

            case POp::bitwise_and_imm_int:

            case POp::bitwise_xor_imm_int:

            case POp::cmple_imm_int:

            case POp::cmple_imm_uint:

            case POp::cmplt_imm_int:

            case POp::cmplt_imm_uint:

            case POp::cmpeq_imm_int:

            case POp::cmpne_imm_int:

                std::tie(opArg1, opArg2) = this->constantCtx(stage.ctx, 1, /*showAsFloat=*/false);

                break;


            case POp::splat_2_constants:

            case POp::bitwise_and_imm_2_ints:

                std::tie(opArg1, opArg2) = this->constantCtx(stage.ctx, 2);

                break;


            case POp::splat_3_constants:

            case POp::bitwise_and_imm_3_ints:

                std::tie(opArg1, opArg2) = this->constantCtx(stage.ctx, 3);

                break;


            case POp::splat_4_constants:

            case POp::bitwise_and_imm_4_ints:

                std::tie(opArg1, opArg2) = this->constantCtx(stage.ctx, 4);

                break;


            case POp::copy_uniform:

                std::tie(opArg1, opArg2) = this->copyUniformCtx(stage.ctx, 1);

                break;


            case POp::copy_2_uniforms:

                std::tie(opArg1, opArg2) = this->copyUniformCtx(stage.ctx, 2);

                break;


            case POp::copy_3_uniforms:

                std::tie(opArg1, opArg2) = this->copyUniformCtx(stage.ctx, 3);

                break;


            case POp::copy_4_uniforms:

                std::tie(opArg1, opArg2) = this->copyUniformCtx(stage.ctx, 4);

                break;


            case POp::copy_slot_masked:

            case POp::copy_slot_unmasked:

            case POp::copy_immutable_unmasked:

                std::tie(opArg1, opArg2) = this->binaryOpCtx(stage.ctx, 1);

                break;


            case POp::copy_2_slots_masked:

            case POp::copy_2_slots_unmasked:

            case POp::copy_2_immutables_unmasked:

                std::tie(opArg1, opArg2) = this->binaryOpCtx(stage.ctx, 2);

                break;


            case POp::copy_3_slots_masked:

            case POp::copy_3_slots_unmasked:

            case POp::copy_3_immutables_unmasked:

                std::tie(opArg1, opArg2) = this->binaryOpCtx(stage.ctx, 3);

                break;


            case POp::copy_4_slots_masked:

            case POp::copy_4_slots_unmasked:

            case POp::copy_4_immutables_unmasked:

                std::tie(opArg1, opArg2) = this->binaryOpCtx(stage.ctx, 4);

                break;


            case POp::copy_from_indirect_uniform_unmasked:

            case POp::copy_from_indirect_unmasked:

            case POp::copy_to_indirect_masked: {

                const auto* ctx = static_cast<SkRasterPipeline_CopyIndirectCtx*>(stage.ctx);

                // We don't incorporate the indirect-limit in the output

                opArg1 = this->ptrCtx(ctx->dst, ctx->slots);

                opArg2 = this->ptrCtx(ctx->src, ctx->slots);

                opArg3 = this->ptrCtx(ctx->indirectOffset, 1);

                break;

            }

            case POp::swizzle_copy_to_indirect_masked: {

                const auto* ctx = static_cast<SkRasterPipeline_SwizzleCopyIndirectCtx*>(stage.ctx);

                opArg1 = this->ptrCtx(ctx->dst, this->swizzleWidth(SkSpan(ctx->offsets,

                                                                          ctx->slots)));

                opArg2 = this->ptrCtx(ctx->src, ctx->slots);

                opArg3 = this->ptrCtx(ctx->indirectOffset, 1);

                opSwizzle = this->swizzleOffsetSpan(SkSpan(ctx->offsets, ctx->slots));

                break;

            }

            case POp::merge_condition_mask:

            case POp::merge_inv_condition_mask:

            case POp::add_float:   case POp::add_int:

            case POp::sub_float:   case POp::sub_int:

            case POp::mul_float:   case POp::mul_int:

            case POp::div_float:   case POp::div_int:   case POp::div_uint:

                                   case POp::bitwise_and_int:

                                   case POp::bitwise_or_int:

                                   case POp::bitwise_xor_int:

            case POp::mod_float:

            case POp::min_float:   case POp::min_int:   case POp::min_uint:

            case POp::max_float:   case POp::max_int:   case POp::max_uint:

            case POp::cmplt_float: case POp::cmplt_int: case POp::cmplt_uint:

            case POp::cmple_float: case POp::cmple_int: case POp::cmple_uint:

            case POp::cmpeq_float: case POp::cmpeq_int:

            case POp::cmpne_float: case POp::cmpne_int:

                std::tie(opArg1, opArg2) = this->adjacentPtrCtx(stage.ctx, 1);

                break;


            case POp::mix_float:   case POp::mix_int:

                std::tie(opArg1, opArg2, opArg3) = this->adjacent3PtrCtx(stage.ctx, 1);

                break;


            case POp::add_2_floats:   case POp::add_2_ints:

            case POp::sub_2_floats:   case POp::sub_2_ints:

            case POp::mul_2_floats:   case POp::mul_2_ints:

            case POp::div_2_floats:   case POp::div_2_ints:   case POp::div_2_uints:

                                      case POp::bitwise_and_2_ints:

                                      case POp::bitwise_or_2_ints:

                                      case POp::bitwise_xor_2_ints:

            case POp::mod_2_floats:

            case POp::min_2_floats:   case POp::min_2_ints:   case POp::min_2_uints:

            case POp::max_2_floats:   case POp::max_2_ints:   case POp::max_2_uints:

            case POp::cmplt_2_floats: case POp::cmplt_2_ints: case POp::cmplt_2_uints:

            case POp::cmple_2_floats: case POp::cmple_2_ints: case POp::cmple_2_uints:

            case POp::cmpeq_2_floats: case POp::cmpeq_2_ints:

            case POp::cmpne_2_floats: case POp::cmpne_2_ints:

                std::tie(opArg1, opArg2) = this->adjacentPtrCtx(stage.ctx, 2);

                break;


            case POp::mix_2_floats:   case POp::mix_2_ints:

                std::tie(opArg1, opArg2, opArg3) = this->adjacent3PtrCtx(stage.ctx, 2);

                break;


            case POp::add_3_floats:   case POp::add_3_ints:

            case POp::sub_3_floats:   case POp::sub_3_ints:

            case POp::mul_3_floats:   case POp::mul_3_ints:

            case POp::div_3_floats:   case POp::div_3_ints:   case POp::div_3_uints:

                                      case POp::bitwise_and_3_ints:

                                      case POp::bitwise_or_3_ints:

                                      case POp::bitwise_xor_3_ints:

            case POp::mod_3_floats:

            case POp::min_3_floats:   case POp::min_3_ints:   case POp::min_3_uints:

            case POp::max_3_floats:   case POp::max_3_ints:   case POp::max_3_uints:

            case POp::cmplt_3_floats: case POp::cmplt_3_ints: case POp::cmplt_3_uints:

            case POp::cmple_3_floats: case POp::cmple_3_ints: case POp::cmple_3_uints:

            case POp::cmpeq_3_floats: case POp::cmpeq_3_ints:

            case POp::cmpne_3_floats: case POp::cmpne_3_ints:

                std::tie(opArg1, opArg2) = this->adjacentPtrCtx(stage.ctx, 3);

                break;


            case POp::mix_3_floats:   case POp::mix_3_ints:

                std::tie(opArg1, opArg2, opArg3) = this->adjacent3PtrCtx(stage.ctx, 3);

                break;


            case POp::add_4_floats:   case POp::add_4_ints:

            case POp::sub_4_floats:   case POp::sub_4_ints:

            case POp::mul_4_floats:   case POp::mul_4_ints:

            case POp::div_4_floats:   case POp::div_4_ints:   case POp::div_4_uints:

                                      case POp::bitwise_and_4_ints:

                                      case POp::bitwise_or_4_ints:

                                      case POp::bitwise_xor_4_ints:

            case POp::mod_4_floats:

            case POp::min_4_floats:   case POp::min_4_ints:   case POp::min_4_uints:

            case POp::max_4_floats:   case POp::max_4_ints:   case POp::max_4_uints:

            case POp::cmplt_4_floats: case POp::cmplt_4_ints: case POp::cmplt_4_uints:

            case POp::cmple_4_floats: case POp::cmple_4_ints: case POp::cmple_4_uints:

            case POp::cmpeq_4_floats: case POp::cmpeq_4_ints:

            case POp::cmpne_4_floats: case POp::cmpne_4_ints:

                std::tie(opArg1, opArg2) = this->adjacentPtrCtx(stage.ctx, 4);

                break;


            case POp::mix_4_floats:   case POp::mix_4_ints:

                std::tie(opArg1, opArg2, opArg3) = this->adjacent3PtrCtx(stage.ctx, 4);

                break;


            case POp::add_n_floats:   case POp::add_n_ints:

            case POp::sub_n_floats:   case POp::sub_n_ints:

            case POp::mul_n_floats:   case POp::mul_n_ints:

            case POp::div_n_floats:   case POp::div_n_ints:   case POp::div_n_uints:

                                      case POp::bitwise_and_n_ints:

                                      case POp::bitwise_or_n_ints:

                                      case POp::bitwise_xor_n_ints:

            case POp::mod_n_floats:

            case POp::min_n_floats:   case POp::min_n_ints:   case POp::min_n_uints:

            case POp::max_n_floats:   case POp::max_n_ints:   case POp::max_n_uints:

            case POp::cmplt_n_floats: case POp::cmplt_n_ints: case POp::cmplt_n_uints:

            case POp::cmple_n_floats: case POp::cmple_n_ints: case POp::cmple_n_uints:

            case POp::cmpeq_n_floats: case POp::cmpeq_n_ints:

            case POp::cmpne_n_floats: case POp::cmpne_n_ints:

            case POp::atan2_n_floats:

            case POp::pow_n_floats:

                std::tie(opArg1, opArg2) = this->adjacentBinaryOpCtx(stage.ctx);

                break;


            case POp::mix_n_floats:        case POp::mix_n_ints:

            case POp::smoothstep_n_floats:

                std::tie(opArg1, opArg2, opArg3) = this->adjacentTernaryOpCtx(stage.ctx);

                break;


            case POp::jump:

            case POp::branch_if_all_lanes_active:

            case POp::branch_if_any_lanes_active:

            case POp::branch_if_no_lanes_active:

                opArg1 = this->branchOffset(static_cast<SkRasterPipeline_BranchCtx*>(stage.ctx),

                                            index);

                break;


            case POp::branch_if_no_active_lanes_eq: {

                const auto* ctx = static_cast<SkRasterPipeline_BranchIfEqualCtx*>(stage.ctx);

                opArg1 = this->branchOffset(ctx, index);

                opArg2 = this->ptrCtx(ctx->ptr, 1);

                opArg3 = this->imm(sk_bit_cast<float>(ctx->value));

                break;

            }

            case POp::trace_var: {

                const auto* ctx = static_cast<SkRasterPipeline_TraceVarCtx*>(stage.ctx);

                opArg1 = this->ptrCtx(ctx->traceMask, 1);

                opArg2 = this->ptrCtx(ctx->data, ctx->numSlots);

                if (ctx->indirectOffset != nullptr) {

                    opArg3 = " + " + this->ptrCtx(ctx->indirectOffset, 1);

                }

                break;

            }

            case POp::trace_line: {

                const auto* ctx = static_cast<SkRasterPipeline_TraceLineCtx*>(stage.ctx);

                opArg1 = this->ptrCtx(ctx->traceMask, 1);

                opArg2 = std::to_string(ctx->lineNumber);

                break;

            }

            case POp::trace_enter:

            case POp::trace_exit: {

                const auto* ctx = static_cast<SkRasterPipeline_TraceFuncCtx*>(stage.ctx);

                opArg1 = this->ptrCtx(ctx->traceMask, 1);

                opArg2 = (fProgram.fDebugTrace &&

                          ctx->funcIdx >= 0 &&

                          ctx->funcIdx < (int)fProgram.fDebugTrace->fFuncInfo.size())

                                 ? fProgram.fDebugTrace->fFuncInfo[ctx->funcIdx].name

                                 : "???";

                break;

            }

            case POp::trace_scope: {

                const auto* ctx = static_cast<SkRasterPipeline_TraceScopeCtx*>(stage.ctx);

                opArg1 = this->ptrCtx(ctx->traceMask, 1);

                opArg2 = SkSL::String::printf("%+d", ctx->delta);

                break;

            }

            default:

                break;

        }


        std::string_view opName;

        switch (stage.op) {

        #define M(x) case POp::x: opName = #x; break;

            SK_RASTER_PIPELINE_OPS_ALL(M)

            SKRP_EXTENDED_OPS(M)

        #undef M

        }


        std::string opText;

        switch (stage.op) {

            case POp::trace_var:

                opText = "TraceVar(" + opArg2 + opArg3 + ") when " + opArg1 + " is true";

                break;


            case POp::trace_line:

                opText = "TraceLine(" + opArg2 + ") when " + opArg1 + " is true";

                break;


            case POp::trace_enter:

                opText = "TraceEnter(" + opArg2 + ") when " + opArg1 + " is true";

                break;


            case POp::trace_exit:

                opText = "TraceExit(" + opArg2 + ") when " + opArg1 + " is true";

                break;


            case POp::trace_scope:

                opText = "TraceScope(" + opArg2 + ") when " + opArg1 + " is true";

                break;


            case POp::init_lane_masks:

                opText = "CondMask = LoopMask = RetMask = true";

                break;


            case POp::load_condition_mask:

                opText = "CondMask = " + opArg1;

                break;


            case POp::store_condition_mask:

                opText = opArg1 + " = CondMask";

                break;


            case POp::merge_condition_mask:

                opText = "CondMask = " + opArg1 + " & " + opArg2;

                break;


            case POp::merge_inv_condition_mask:

                opText = "CondMask = " + opArg1 + " & ~" + opArg2;

                break;


            case POp::load_loop_mask:

                opText = "LoopMask = " + opArg1;

                break;


            case POp::store_loop_mask:

                opText = opArg1 + " = LoopMask";

                break;


            case POp::mask_off_loop_mask:

                opText = "LoopMask &= ~(CondMask & LoopMask & RetMask)";

                break;


            case POp::reenable_loop_mask:

                opText = "LoopMask |= " + opArg1;

                break;


            case POp::merge_loop_mask:

                opText = "LoopMask &= " + opArg1;

                break;


            case POp::load_return_mask:

                opText = "RetMask = " + opArg1;

                break;


            case POp::store_return_mask:

                opText = opArg1 + " = RetMask";

                break;


            case POp::mask_off_return_mask:

                opText = "RetMask &= ~(CondMask & LoopMask & RetMask)";

                break;


            case POp::store_src_rg:

                opText = opArg1 + " = src.rg";

                break;


            case POp::exchange_src:

                opText = "swap(src.rgba, " + opArg1 + ")";

                break;


            case POp::store_src:

                opText = opArg1 + " = src.rgba";

                break;


            case POp::store_dst:

                opText = opArg1 + " = dst.rgba";

                break;


            case POp::store_device_xy01:

                opText = opArg1 + " = DeviceCoords.xy01";

                break;


            case POp::load_src:

                opText = "src.rgba = " + opArg1;

                break;


            case POp::load_dst:

                opText = "dst.rgba = " + opArg1;

                break;


            case POp::bitwise_and_int:

            case POp::bitwise_and_2_ints:

            case POp::bitwise_and_3_ints:

            case POp::bitwise_and_4_ints:

            case POp::bitwise_and_n_ints:

            case POp::bitwise_and_imm_int:

            case POp::bitwise_and_imm_2_ints:

            case POp::bitwise_and_imm_3_ints:

            case POp::bitwise_and_imm_4_ints:

                opText = opArg1 + " &= " + opArg2;

                break;


            case POp::bitwise_or_int:

            case POp::bitwise_or_2_ints:

            case POp::bitwise_or_3_ints:

            case POp::bitwise_or_4_ints:

            case POp::bitwise_or_n_ints:

                opText = opArg1 + " |= " + opArg2;

                break;


            case POp::bitwise_xor_int:

            case POp::bitwise_xor_2_ints:

            case POp::bitwise_xor_3_ints:

            case POp::bitwise_xor_4_ints:

            case POp::bitwise_xor_n_ints:

            case POp::bitwise_xor_imm_int:

                opText = opArg1 + " ^= " + opArg2;

                break;


            case POp::cast_to_float_from_int:

            case POp::cast_to_float_from_2_ints:

            case POp::cast_to_float_from_3_ints:

            case POp::cast_to_float_from_4_ints:

                opText = opArg1 + " = IntToFloat(" + opArg1 + ")";

                break;


            case POp::cast_to_float_from_uint:

            case POp::cast_to_float_from_2_uints:

            case POp::cast_to_float_from_3_uints:

            case POp::cast_to_float_from_4_uints:

                opText = opArg1 + " = UintToFloat(" + opArg1 + ")";

                break;


            case POp::cast_to_int_from_float:

            case POp::cast_to_int_from_2_floats:

            case POp::cast_to_int_from_3_floats:

            case POp::cast_to_int_from_4_floats:

                opText = opArg1 + " = FloatToInt(" + opArg1 + ")";

                break;


            case POp::cast_to_uint_from_float:

            case POp::cast_to_uint_from_2_floats:

            case POp::cast_to_uint_from_3_floats:

            case POp::cast_to_uint_from_4_floats:

                opText = opArg1 + " = FloatToUint(" + opArg1 + ")";

                break;


            case POp::copy_slot_masked:            case POp::copy_2_slots_masked:

            case POp::copy_3_slots_masked:         case POp::copy_4_slots_masked:

            case POp::swizzle_copy_slot_masked:    case POp::swizzle_copy_2_slots_masked:

            case POp::swizzle_copy_3_slots_masked: case POp::swizzle_copy_4_slots_masked:

                opText = opArg1 + " = Mask(" + opArg2 + ")";

                break;


            case POp::copy_uniform:                case POp::copy_2_uniforms:

            case POp::copy_3_uniforms:             case POp::copy_4_uniforms:

            case POp::copy_slot_unmasked:          case POp::copy_2_slots_unmasked:

            case POp::copy_3_slots_unmasked:       case POp::copy_4_slots_unmasked:

            case POp::copy_immutable_unmasked:     case POp::copy_2_immutables_unmasked:

            case POp::copy_3_immutables_unmasked:  case POp::copy_4_immutables_unmasked:

            case POp::copy_constant:               case POp::splat_2_constants:

            case POp::splat_3_constants:           case POp::splat_4_constants:

            case POp::swizzle_1:                   case POp::swizzle_2:

            case POp::swizzle_3:                   case POp::swizzle_4:

            case POp::shuffle:

                opText = opArg1 + " = " + opArg2;

                break;


            case POp::copy_from_indirect_unmasked:

            case POp::copy_from_indirect_uniform_unmasked:

                opText = opArg1 + " = Indirect(" + opArg2 + " + " + opArg3 + ")";

                break;


            case POp::copy_to_indirect_masked:

                opText = "Indirect(" + opArg1 + " + " + opArg3 + ") = Mask(" + opArg2 + ")";

                break;


            case POp::swizzle_copy_to_indirect_masked:

                opText = "Indirect(" + opArg1 + " + " + opArg3 + ")." + opSwizzle + " = Mask(" +

                         opArg2 + ")";

                break;


            case POp::abs_int:

            case POp::abs_2_ints:

            case POp::abs_3_ints:

            case POp::abs_4_ints:

                opText = opArg1 + " = abs(" + opArg1 + ")";

                break;


            case POp::acos_float:

                opText = opArg1 + " = acos(" + opArg1 + ")";

                break;


            case POp::asin_float:

                opText = opArg1 + " = asin(" + opArg1 + ")";

                break;


            case POp::atan_float:

                opText = opArg1 + " = atan(" + opArg1 + ")";

                break;


            case POp::atan2_n_floats:

                opText = opArg1 + " = atan2(" + opArg1 + ", " + opArg2 + ")";

                break;


            case POp::ceil_float:

            case POp::ceil_2_floats:

            case POp::ceil_3_floats:

            case POp::ceil_4_floats:

                opText = opArg1 + " = ceil(" + opArg1 + ")";

                break;


            case POp::cos_float:

                opText = opArg1 + " = cos(" + opArg1 + ")";

                break;


            case POp::refract_4_floats:

                opText = opArg1 + " = refract(" + opArg1 + ", " + opArg2 + ", " + opArg3 + ")";

                break;


            case POp::dot_2_floats:

            case POp::dot_3_floats:

            case POp::dot_4_floats:

                opText = opArg1 + " = dot(" + opArg2 + ", " + opArg3 + ")";

                break;


            case POp::exp_float:

                opText = opArg1 + " = exp(" + opArg1 + ")";

                break;


            case POp::exp2_float:

                opText = opArg1 + " = exp2(" + opArg1 + ")";

                break;


            case POp::log_float:

                opText = opArg1 + " = log(" + opArg1 + ")";

                break;


            case POp::log2_float:

                opText = opArg1 + " = log2(" + opArg1 + ")";

                break;


            case POp::pow_n_floats:

                opText = opArg1 + " = pow(" + opArg1 + ", " + opArg2 + ")";

                break;


            case POp::sin_float:

                opText = opArg1 + " = sin(" + opArg1 + ")";

                break;


            case POp::sqrt_float:

                opText = opArg1 + " = sqrt(" + opArg1 + ")";

                break;


            case POp::tan_float:

                opText = opArg1 + " = tan(" + opArg1 + ")";

                break;


            case POp::floor_float:

            case POp::floor_2_floats:

            case POp::floor_3_floats:

            case POp::floor_4_floats:

                opText = opArg1 + " = floor(" + opArg1 + ")";

                break;


            case POp::invsqrt_float:

            case POp::invsqrt_2_floats:

            case POp::invsqrt_3_floats:

            case POp::invsqrt_4_floats:

                opText = opArg1 + " = inversesqrt(" + opArg1 + ")";

                break;


            case POp::inverse_mat2:

            case POp::inverse_mat3:

            case POp::inverse_mat4:

                opText = opArg1 + " = inverse(" + opArg1 + ")";

                break;


            case POp::add_float:     case POp::add_int:

            case POp::add_2_floats:  case POp::add_2_ints:

            case POp::add_3_floats:  case POp::add_3_ints:

            case POp::add_4_floats:  case POp::add_4_ints:

            case POp::add_n_floats:  case POp::add_n_ints:

            case POp::add_imm_float: case POp::add_imm_int:

                opText = opArg1 + " += " + opArg2;

                break;


            case POp::sub_float:    case POp::sub_int:

            case POp::sub_2_floats: case POp::sub_2_ints:

            case POp::sub_3_floats: case POp::sub_3_ints:

            case POp::sub_4_floats: case POp::sub_4_ints:

            case POp::sub_n_floats: case POp::sub_n_ints:

                opText = opArg1 + " -= " + opArg2;

                break;


            case POp::mul_float:     case POp::mul_int:

            case POp::mul_2_floats:  case POp::mul_2_ints:

            case POp::mul_3_floats:  case POp::mul_3_ints:

            case POp::mul_4_floats:  case POp::mul_4_ints:

            case POp::mul_n_floats:  case POp::mul_n_ints:

            case POp::mul_imm_float: case POp::mul_imm_int:

                opText = opArg1 + " *= " + opArg2;

                break;


            case POp::div_float:    case POp::div_int:    case POp::div_uint:

            case POp::div_2_floats: case POp::div_2_ints: case POp::div_2_uints:

            case POp::div_3_floats: case POp::div_3_ints: case POp::div_3_uints:

            case POp::div_4_floats: case POp::div_4_ints: case POp::div_4_uints:

            case POp::div_n_floats: case POp::div_n_ints: case POp::div_n_uints:

                opText = opArg1 + " /= " + opArg2;

                break;


            case POp::matrix_multiply_2:

            case POp::matrix_multiply_3:

            case POp::matrix_multiply_4:

                opText = opArg1 + " = " + opArg2 + " * " + opArg3;

                break;


            case POp::mod_float:

            case POp::mod_2_floats:

            case POp::mod_3_floats:

            case POp::mod_4_floats:

            case POp::mod_n_floats:

                opText = opArg1 + " = mod(" + opArg1 + ", " + opArg2 + ")";

                break;


            case POp::min_float:        case POp::min_int:          case POp::min_uint:

            case POp::min_2_floats:     case POp::min_2_ints:       case POp::min_2_uints:

            case POp::min_3_floats:     case POp::min_3_ints:       case POp::min_3_uints:

            case POp::min_4_floats:     case POp::min_4_ints:       case POp::min_4_uints:

            case POp::min_n_floats:     case POp::min_n_ints:       case POp::min_n_uints:

            case POp::min_imm_float:

                opText = opArg1 + " = min(" + opArg1 + ", " + opArg2 + ")";

                break;


            case POp::max_float:        case POp::max_int:          case POp::max_uint:

            case POp::max_2_floats:     case POp::max_2_ints:       case POp::max_2_uints:

            case POp::max_3_floats:     case POp::max_3_ints:       case POp::max_3_uints:

            case POp::max_4_floats:     case POp::max_4_ints:       case POp::max_4_uints:

            case POp::max_n_floats:     case POp::max_n_ints:       case POp::max_n_uints:

            case POp::max_imm_float:

                opText = opArg1 + " = max(" + opArg1 + ", " + opArg2 + ")";

                break;


            case POp::cmplt_float:     case POp::cmplt_int:     case POp::cmplt_uint:

            case POp::cmplt_2_floats:  case POp::cmplt_2_ints:  case POp::cmplt_2_uints:

            case POp::cmplt_3_floats:  case POp::cmplt_3_ints:  case POp::cmplt_3_uints:

            case POp::cmplt_4_floats:  case POp::cmplt_4_ints:  case POp::cmplt_4_uints:

            case POp::cmplt_n_floats:  case POp::cmplt_n_ints:  case POp::cmplt_n_uints:

            case POp::cmplt_imm_float: case POp::cmplt_imm_int: case POp::cmplt_imm_uint:

                opText = opArg1 + " = lessThan(" + opArg1 + ", " + opArg2 + ")";

                break;


            case POp::cmple_float:     case POp::cmple_int:     case POp::cmple_uint:

            case POp::cmple_2_floats:  case POp::cmple_2_ints:  case POp::cmple_2_uints:

            case POp::cmple_3_floats:  case POp::cmple_3_ints:  case POp::cmple_3_uints:

            case POp::cmple_4_floats:  case POp::cmple_4_ints:  case POp::cmple_4_uints:

            case POp::cmple_n_floats:  case POp::cmple_n_ints:  case POp::cmple_n_uints:

            case POp::cmple_imm_float: case POp::cmple_imm_int: case POp::cmple_imm_uint:

                opText = opArg1 + " = lessThanEqual(" + opArg1 + ", " + opArg2 + ")";

                break;


            case POp::cmpeq_float:     case POp::cmpeq_int:

            case POp::cmpeq_2_floats:  case POp::cmpeq_2_ints:

            case POp::cmpeq_3_floats:  case POp::cmpeq_3_ints:

            case POp::cmpeq_4_floats:  case POp::cmpeq_4_ints:

            case POp::cmpeq_n_floats:  case POp::cmpeq_n_ints:

            case POp::cmpeq_imm_float: case POp::cmpeq_imm_int:

                opText = opArg1 + " = equal(" + opArg1 + ", " + opArg2 + ")";

                break;


            case POp::cmpne_float:     case POp::cmpne_int:

            case POp::cmpne_2_floats:  case POp::cmpne_2_ints:

            case POp::cmpne_3_floats:  case POp::cmpne_3_ints:

            case POp::cmpne_4_floats:  case POp::cmpne_4_ints:

            case POp::cmpne_n_floats:  case POp::cmpne_n_ints:

            case POp::cmpne_imm_float: case POp::cmpne_imm_int:

                opText = opArg1 + " = notEqual(" + opArg1 + ", " + opArg2 + ")";

                break;


            case POp::mix_float:      case POp::mix_int:

            case POp::mix_2_floats:   case POp::mix_2_ints:

            case POp::mix_3_floats:   case POp::mix_3_ints:

            case POp::mix_4_floats:   case POp::mix_4_ints:

            case POp::mix_n_floats:   case POp::mix_n_ints:

                opText = opArg1 + " = mix(" + opArg2 + ", " + opArg3 + ", " + opArg1 + ")";

                break;


            case POp::smoothstep_n_floats:

                opText = opArg1 + " = smoothstep(" + opArg1 + ", " + opArg2 + ", " + opArg3 + ")";

                break;


            case POp::jump:

            case POp::branch_if_all_lanes_active:

            case POp::branch_if_any_lanes_active:

            case POp::branch_if_no_lanes_active:

            case POp::invoke_shader:

            case POp::invoke_color_filter:

            case POp::invoke_blender:

                opText = std::string(opName) + " " + opArg1;

                break;


            case POp::invoke_to_linear_srgb:

                opText = opArg1 + " = toLinearSrgb(" + opArg1 + ")";

                break;


            case POp::invoke_from_linear_srgb:

                opText = opArg1 + " = fromLinearSrgb(" + opArg1 + ")";

                break;


            case POp::branch_if_no_active_lanes_eq:

                opText = "branch " + opArg1 + " if no lanes of " + opArg2 + " == " + opArg3;

                break;


            case POp::label:

                opText = "label " + opArg1;

                break;


            case POp::case_op:

                opText = "if (" + opArg1 + " == " + opArg3 +

                         ") { LoopMask = true; " + opArg2 + " = false; }";

                break;


            case POp::continue_op:

                opText = opArg1 +

                         " |= Mask(0xFFFFFFFF); LoopMask &= ~(CondMask & LoopMask & RetMask)";

                break;


            default:

                break;

        }


        opName = opName.substr(0, 30);

        if (!opText.empty()) {

            out->writeText(SkSL::String::printf("%-30.*s %s\n",

                                                (int)opName.size(), opName.data(),

                                                opText.c_str()).c_str());

        } else {

            out->writeText(SkSL::String::printf("%.*s\n",

                                                (int)opName.size(), opName.data()).c_str());

        }

    }

}


void Program::dump(SkWStream* out, bool writeInstructionCount) const {

    Dumper(*this).dump(out, writeInstructionCount);

}


}  // namespace SkSL::RP

sk_bzero
sk_bzero(glyphs, sizeof(glyphs))

count
int count
Definition: FontMgrTest.cpp:50

pos
SkPoint pos
Definition: ImageShaderTest.cpp:27

SkArenaAlloc.h

SkUNREACHABLE
#define SkUNREACHABLE
Definition: SkAssert.h:135

SkDEBUGFAIL
#define SkDEBUGFAIL(message)
Definition: SkAssert.h:118

SkDEBUGFAILF
#define SkDEBUGFAILF(fmt,...)
Definition: SkAssert.h:119

SkASSERT
#define SkASSERT(cond)
Definition: SkAssert.h:116

SkASSERTF
#define SkASSERTF(cond, fmt,...)
Definition: SkAssert.h:117

SkBitSet.h

SkMalloc.h

SkOpts.h

SkRasterPipelineContextUtils.h

SkRasterPipelineOpContexts.h

SkRPOffset
uint32_t SkRPOffset
Definition: SkRasterPipelineOpContexts.h:205

SkRasterPipelineOpList.h

SkRasterPipelineOp
SkRasterPipelineOp
Definition: SkRasterPipelineOpList.h:213

SK_RASTER_PIPELINE_OPS_ALL
#define SK_RASTER_PIPELINE_OPS_ALL(M)
Definition: SkRasterPipelineOpList.h:208

kNumRasterPipelineHighpOps
static constexpr int kNumRasterPipelineHighpOps
Definition: SkRasterPipelineOpList.h:222

SkRasterPipeline.h

SkSLDebugTracePriv.h

SkSLPosition.h

ALL_N_WAY_TERNARY_OP_CASES
#define ALL_N_WAY_TERNARY_OP_CASES
Definition: SkSLRasterPipelineBuilder.cpp:125

ALL_IMMEDIATE_BINARY_OP_CASES
#define ALL_IMMEDIATE_BINARY_OP_CASES
Definition: SkSLRasterPipelineBuilder.cpp:102

ALL_MULTI_SLOT_TERNARY_OP_CASES
#define ALL_MULTI_SLOT_TERNARY_OP_CASES
Definition: SkSLRasterPipelineBuilder.cpp:128

ALL_N_WAY_BINARY_OP_CASES
#define ALL_N_WAY_BINARY_OP_CASES
Definition: SkSLRasterPipelineBuilder.cpp:67

ALL_MULTI_SLOT_UNARY_OP_CASES
#define ALL_MULTI_SLOT_UNARY_OP_CASES
Definition: SkSLRasterPipelineBuilder.cpp:57

ALL_MULTI_SLOT_BINARY_OP_CASES
#define ALL_MULTI_SLOT_BINARY_OP_CASES
Definition: SkSLRasterPipelineBuilder.cpp:71

ALL_IMMEDIATE_MULTI_SLOT_BINARY_OP_CASES
#define ALL_IMMEDIATE_MULTI_SLOT_BINARY_OP_CASES
Definition: SkSLRasterPipelineBuilder.cpp:122

ALL_SINGLE_SLOT_UNARY_OP_CASES
#define ALL_SINGLE_SLOT_UNARY_OP_CASES
Definition: SkSLRasterPipelineBuilder.cpp:44

SkSLRasterPipelineBuilder.h

SKRP_EXTENDED_OPS
#define SKRP_EXTENDED_OPS(M)
Definition: SkSLRasterPipelineBuilder.h:45

SkSLString.h

SkSLTraceHook.h

SkSpan
SkSpan(Container &&) -> SkSpan< std::remove_pointer_t< decltype(std::data(std::declval< Container >()))> >

SkStream.h

copy
static void copy(void *dst, const uint8_t *src, int width, int bpp, int deltaSrc, int offset, const SkPMColor ctable[])
Definition: SkSwizzler.cpp:31

SkTHash.h

SkTo.h

SkToInt
constexpr int SkToInt(S x)
Definition: SkTo.h:29

N
#define N
Definition: beziers.cpp:19

type
GLenum type
Definition: blit_command_gles.cc:126

SkArenaAlloc
Definition: SkArenaAlloc.h:105

SkArenaAlloc::makeBytesAlignedTo
void * makeBytesAlignedTo(size_t size, size_t align)
Definition: SkArenaAlloc.h:200

SkArenaAlloc::makeArray
T * makeArray(size_t count)
Definition: SkArenaAlloc.h:181

SkArenaAlloc::make
auto make(Ctor &&ctor) -> decltype(ctor(nullptr))
Definition: SkArenaAlloc.h:120

SkBitSet
Definition: SkBitSet.h:21

SkRasterPipeline
Definition: SkRasterPipeline.h:68

SkRasterPipeline::getNumStages
int getNumStages() const
Definition: SkRasterPipeline.h:102

SkRasterPipeline::append
void append(SkRasterPipelineOp, void *=nullptr)
Definition: SkRasterPipeline.cpp:44

SkRasterPipeline::appendStackRewind
void appendStackRewind()
Definition: SkRasterPipeline.cpp:540

SkSL::DebugTracePriv
Definition: SkSLDebugTracePriv.h:62

SkSL::DebugTracePriv::fUniformInfo
std::vector< SlotDebugInfo > fUniformInfo
Definition: SkSLDebugTracePriv.h:95

SkSL::DebugTracePriv::fFuncInfo
std::vector< FunctionDebugInfo > fFuncInfo
Definition: SkSLDebugTracePriv.h:99

SkSL::DebugTracePriv::fTraceInfo
std::vector< TraceInfo > fTraceInfo
Definition: SkSLDebugTracePriv.h:102

SkSL::DebugTracePriv::fSlotInfo
std::vector< SlotDebugInfo > fSlotInfo
Definition: SkSLDebugTracePriv.h:98

SkSL::Position::startOffset
int startOffset() const
Definition: SkSLPosition.h:40

SkSL::Position::valid
bool valid() const
Definition: SkSLPosition.h:34

SkSL::RP::Builder::dot_floats
void dot_floats(int32_t slots)
Definition: SkSLRasterPipelineBuilder.cpp:264

SkSL::RP::Builder::copy_stack_to_slots_indirect
void copy_stack_to_slots_indirect(SlotRange fixedRange, int dynamicStackID, SlotRange limitRange)
Definition: SkSLRasterPipelineBuilder.cpp:886

SkSL::RP::Builder::push_duplicates
void push_duplicates(int count)
Definition: SkSLRasterPipelineBuilder.cpp:652

SkSL::RP::Builder::copy_slots_unmasked
void copy_slots_unmasked(SlotRange dst, SlotRange src)
Definition: SkSLRasterPipelineBuilder.cpp:922

SkSL::RP::Builder::pad_stack
void pad_stack(int32_t count)
Definition: SkSLRasterPipelineBuilder.cpp:290

SkSL::RP::Builder::binary_op
void binary_op(BuilderOp op, int32_t slots)
Definition: SkSLRasterPipelineBuilder.cpp:222

SkSL::RP::Builder::exchange_src
void exchange_src()
Definition: SkSLRasterPipelineBuilder.cpp:834

SkSL::RP::Builder::swizzle_copy_stack_to_slots
void swizzle_copy_stack_to_slots(SlotRange dst, SkSpan< const int8_t > components, int offsetFromStackTop)
Definition: SkSLRasterPipelineBuilder.cpp:1098

SkSL::RP::Builder::copy_uniform_to_slots_unmasked
void copy_uniform_to_slots_unmasked(SlotRange dst, SlotRange src)
Definition: SkSLRasterPipelineBuilder.cpp:963

SkSL::RP::Builder::copy_constant
void copy_constant(Slot slot, int constantValue)
Definition: SkSLRasterPipelineBuilder.cpp:904

SkSL::RP::Builder::pop_src_rgba
void pop_src_rgba()
Definition: SkSLRasterPipelineBuilder.cpp:847

SkSL::RP::Builder::branch_if_no_lanes_active
void branch_if_no_lanes_active(int labelID)
Definition: SkSLRasterPipelineBuilder.cpp:502

SkSL::RP::Builder::merge_condition_mask
void merge_condition_mask()
Definition: SkSLRasterPipelineBuilder.cpp:1030

SkSL::RP::Builder::push_clone_indirect_from_stack
void push_clone_indirect_from_stack(SlotRange fixedOffset, int dynamicStackID, int otherStackID, int offsetFromStackTop)
Definition: SkSLRasterPipelineBuilder.cpp:717

SkSL::RP::Builder::jump
void jump(int labelID)
Definition: SkSLRasterPipelineBuilder.cpp:455

SkSL::RP::Builder::label
void label(int labelID)
Definition: SkSLRasterPipelineBuilder.cpp:429

SkSL::RP::Builder::push_clone
void push_clone(int numSlots, int offsetFromStackTop=0)
Definition: SkSLRasterPipelineBuilder.cpp:679

SkSL::RP::Builder::swizzle_copy_stack_to_slots_indirect
void swizzle_copy_stack_to_slots_indirect(SlotRange fixedRange, int dynamicStackID, SlotRange limitRange, SkSpan< const int8_t > components, int offsetFromStackTop)
Definition: SkSLRasterPipelineBuilder.cpp:1114

SkSL::RP::Builder::zero_slots_unmasked
void zero_slots_unmasked(SlotRange dst)
Definition: SkSLRasterPipelineBuilder.cpp:1046

SkSL::RP::Builder::swizzle
void swizzle(int consumedSlots, SkSpan< const int8_t > components)
Definition: SkSLRasterPipelineBuilder.cpp:1136

SkSL::RP::Builder::matrix_resize
void matrix_resize(int origColumns, int origRows, int newColumns, int newRows)
Definition: SkSLRasterPipelineBuilder.cpp:1215

SkSL::RP::Builder::branch_if_no_active_lanes_on_stack_top_equal
void branch_if_no_active_lanes_on_stack_top_equal(int value, int labelID)
Definition: SkSLRasterPipelineBuilder.cpp:519

SkSL::RP::Builder::inverse_matrix
void inverse_matrix(int32_t n)
Definition: SkSLRasterPipelineBuilder.cpp:281

SkSL::RP::Builder::push_uniform
void push_uniform(SlotRange src)
Definition: SkSLRasterPipelineBuilder.cpp:592

SkSL::RP::Builder::pop_slots
void pop_slots(SlotRange dst)
Definition: SkSLRasterPipelineBuilder.cpp:731

SkSL::RP::Builder::pop_return_mask
void pop_return_mask()
Definition: SkSLRasterPipelineBuilder.cpp:1002

SkSL::RP::Builder::transpose
void transpose(int columns, int rows)
Definition: SkSLRasterPipelineBuilder.cpp:1191

SkSL::RP::Builder::discard_stack
void discard_stack(int32_t count, int stackID)
Definition: SkSLRasterPipelineBuilder.cpp:340

SkSL::RP::Builder::trace_var_indirect
void trace_var_indirect(int traceMaskStackID, SlotRange fixedRange, int dynamicStackID, SlotRange limitRange)
Definition: SkSLRasterPipelineBuilder.cpp:622

SkSL::RP::Builder::copy_stack_to_slots
void copy_stack_to_slots(SlotRange dst)
Definition: SkSLRasterPipelineBuilder.h:450

SkSL::RP::Builder::copy_immutable_unmasked
void copy_immutable_unmasked(SlotRange dst, SlotRange src)
Definition: SkSLRasterPipelineBuilder.cpp:944

SkSL::RP::Builder::finish
std::unique_ptr< Program > finish(int numValueSlots, int numUniformSlots, int numImmutableSlots, DebugTracePriv *debugTrace=nullptr)
Definition: SkSLRasterPipelineBuilder.cpp:1263

SkSL::RP::Builder::push_slots_or_immutable_indirect
void push_slots_or_immutable_indirect(SlotRange fixedRange, int dynamicStack, SlotRange limitRange, BuilderOp op)
Definition: SkSLRasterPipelineBuilder.cpp:578

SkSL::RP::Builder::push_uniform_indirect
void push_uniform_indirect(SlotRange fixedRange, int dynamicStack, SlotRange limitRange)
Definition: SkSLRasterPipelineBuilder.cpp:609

SkSL::RP::Builder::ternary_op
void ternary_op(BuilderOp op, int32_t slots)
Definition: SkSLRasterPipelineBuilder.cpp:251

SkSL::RP::Builder::push_constant_f
void push_constant_f(float val)
Definition: SkSLRasterPipelineBuilder.h:392

SkSL::RP::Builder::diagonal_matrix
void diagonal_matrix(int columns, int rows)
Definition: SkSLRasterPipelineBuilder.cpp:1203

SkSL::RP::Builder::refract_floats
void refract_floats()
Definition: SkSLRasterPipelineBuilder.cpp:277

SkSL::RP::Builder::push_clone_from_stack
void push_clone_from_stack(SlotRange range, int otherStackID, int offsetFromStackTop)
Definition: SkSLRasterPipelineBuilder.cpp:694

SkSL::RP::Builder::unary_op
void unary_op(BuilderOp op, int32_t slots)
Definition: SkSLRasterPipelineBuilder.cpp:209

SkSL::RP::Builder::push_constant_i
void push_constant_i(int32_t val, int count=1)
Definition: SkSLRasterPipelineBuilder.cpp:638

SkSL::RP::Builder::push_slots_or_immutable
void push_slots_or_immutable(SlotRange src, BuilderOp op)
Definition: SkSLRasterPipelineBuilder.cpp:534

SkSL::RP::Builder::push_condition_mask
void push_condition_mask()
Definition: SkSLRasterPipelineBuilder.cpp:1016

SkSL::RP::Builder::executionMaskWritesAreEnabled
bool executionMaskWritesAreEnabled()
Definition: SkSLRasterPipelineBuilder.h:324

SkSL::RP::Builder::matrix_multiply
void matrix_multiply(int leftColumns, int leftRows, int rightColumns, int rightRows)
Definition: SkSLRasterPipelineBuilder.cpp:1251

SkSL::RP::Builder::branch_if_all_lanes_active
void branch_if_all_lanes_active(int labelID)
Definition: SkSLRasterPipelineBuilder.cpp:484

SkSL::RP::Builder::pop_slots_unmasked
void pop_slots_unmasked(SlotRange dst)
Definition: SkSLRasterPipelineBuilder.cpp:828

SkSL::RP::Builder::branch_if_any_lanes_active
void branch_if_any_lanes_active(int labelID)
Definition: SkSLRasterPipelineBuilder.cpp:466

SkSL::RP::Builder::copy_stack_to_slots_unmasked
void copy_stack_to_slots_unmasked(SlotRange dst)
Definition: SkSLRasterPipelineBuilder.h:472

SkSL::RP::Callbacks
Definition: SkSLRasterPipelineBuilder.h:140

SkSL::RP::Callbacks::toLinearSrgb
virtual void toLinearSrgb(const void *color)=0

SkSL::RP::Callbacks::appendShader
virtual bool appendShader(int index)=0

SkSL::RP::Callbacks::appendBlender
virtual bool appendBlender(int index)=0

SkSL::RP::Callbacks::appendColorFilter
virtual bool appendColorFilter(int index)=0

SkSL::RP::Callbacks::fromLinearSrgb
virtual void fromLinearSrgb(const void *color)=0

SkSL::RP::Program::Dumper
Definition: SkSLRasterPipelineBuilder.cpp:2423

SkSL::RP::Program::Dumper::swizzleWidth
size_t swizzleWidth(SkSpan< T > offsets) const
Definition: SkSLRasterPipelineBuilder.cpp:2759

SkSL::RP::Program::Dumper::buildUniqueSlotNameList
void buildUniqueSlotNameList()
Definition: SkSLRasterPipelineBuilder.cpp:2442

SkSL::RP::Program::Dumper::uniformName
std::string uniformName(SlotRange range) const
Definition: SkSLRasterPipelineBuilder.cpp:2561

SkSL::RP::Program::Dumper::immCtx
std::string immCtx(const void *ctx, bool showAsFloat=true) const
Definition: SkSLRasterPipelineBuilder.cpp:2508

SkSL::RP::Program::Dumper::adjacentPtrCtx
std::tuple< std::string, std::string > adjacentPtrCtx(const void *ctx, int numSlots) const
Definition: SkSLRasterPipelineBuilder.cpp:2693

SkSL::RP::Program::Dumper::offsetToPtr
std::byte * offsetToPtr(SkRPOffset offset) const
Definition: SkSLRasterPipelineBuilder.cpp:2658

SkSL::RP::Program::Dumper::swizzleCopyCtx
std::tuple< std::string, std::string > swizzleCopyCtx(ProgramOp op, const void *v) const
Definition: SkSLRasterPipelineBuilder.cpp:2782

SkSL::RP::Program::Dumper::uniformPtrCtx
std::string uniformPtrCtx(const float *ptr, int numSlots) const
Definition: SkSLRasterPipelineBuilder.cpp:2566

SkSL::RP::Program::Dumper::slotName
std::string slotName(SlotRange range) const
Definition: SkSLRasterPipelineBuilder.cpp:2556

SkSL::RP::Program::Dumper::dump
void dump(SkWStream *out, bool writeInstructionCount)
Definition: SkSLRasterPipelineBuilder.cpp:2840

SkSL::RP::Program::Dumper::buildLabelToStageMap
void buildLabelToStageMap()
Definition: SkSLRasterPipelineBuilder.cpp:2430

SkSL::RP::Program::Dumper::copyUniformCtx
std::tuple< std::string, std::string > copyUniformCtx(const void *v, int numSlots) const
Definition: SkSLRasterPipelineBuilder.cpp:2686

SkSL::RP::Program::Dumper::imm
std::string imm(float immFloat, bool showAsFloat=true) const
Definition: SkSLRasterPipelineBuilder.cpp:2488

SkSL::RP::Program::Dumper::valuePtrCtx
std::string valuePtrCtx(const float *ptr, int numSlots) const
Definition: SkSLRasterPipelineBuilder.cpp:2584

SkSL::RP::Program::Dumper::adjacentOffsetCtx
std::tuple< std::string, std::string > adjacentOffsetCtx(SkRPOffset offset, int numSlots) const
Definition: SkSLRasterPipelineBuilder.cpp:2700

SkSL::RP::Program::Dumper::constantCtx
std::tuple< std::string, std::string > constantCtx(const void *v, int slots, bool showAsFloat=true) const
Definition: SkSLRasterPipelineBuilder.cpp:2668

SkSL::RP::Program::Dumper::swizzlePtr
std::string swizzlePtr(const void *ptr, SkSpan< T > offsets) const
Definition: SkSLRasterPipelineBuilder.cpp:2768

SkSL::RP::Program::Dumper::slotOrUniformName
std::string slotOrUniformName(SkSpan< const SlotDebugInfo > debugInfo, SkSpan< const std::string > names, SlotRange range) const
Definition: SkSLRasterPipelineBuilder.cpp:2528

SkSL::RP::Program::Dumper::swizzleOffsetSpan
std::string swizzleOffsetSpan(SkSpan< T > offsets) const
Definition: SkSLRasterPipelineBuilder.cpp:2736

SkSL::RP::Program::Dumper::adjacentBinaryOpCtx
std::tuple< std::string, std::string > adjacentBinaryOpCtx(const void *v) const
Definition: SkSLRasterPipelineBuilder.cpp:2706

SkSL::RP::Program::Dumper::adjacentTernaryOpCtx
std::tuple< std::string, std::string, std::string > adjacentTernaryOpCtx(const void *v) const
Definition: SkSLRasterPipelineBuilder.cpp:2728

SkSL::RP::Program::Dumper::matrixMultiply
std::tuple< std::string, std::string, std::string > matrixMultiply(const void *v) const
Definition: SkSLRasterPipelineBuilder.cpp:2809

SkSL::RP::Program::Dumper::ptrCtx
std::string ptrCtx(const void *ctx, int numSlots) const
Definition: SkSLRasterPipelineBuilder.cpp:2635

SkSL::RP::Program::Dumper::adjacent3OffsetCtx
std::tuple< std::string, std::string, std::string > adjacent3OffsetCtx(SkRPOffset offset, int numSlots) const
Definition: SkSLRasterPipelineBuilder.cpp:2722

SkSL::RP::Program::Dumper::multiImmCtx
std::string multiImmCtx(const float *ptr, int count) const
Definition: SkSLRasterPipelineBuilder.cpp:2615

SkSL::RP::Program::Dumper::branchOffset
std::string branchOffset(const SkRasterPipeline_BranchCtx *ctx, int index) const
Definition: SkSLRasterPipelineBuilder.cpp:2478

SkSL::RP::Program::Dumper::adjacent3PtrCtx
std::tuple< std::string, std::string, std::string > adjacent3PtrCtx(const void *ctx, int numSlots) const
Definition: SkSLRasterPipelineBuilder.cpp:2713

SkSL::RP::Program::Dumper::asRange
std::string asRange(int first, int count) const
Definition: SkSLRasterPipelineBuilder.cpp:2515

SkSL::RP::Program::Dumper::offsetCtx
std::string offsetCtx(SkRPOffset offset, int numSlots) const
Definition: SkSLRasterPipelineBuilder.cpp:2663

SkSL::RP::Program::Dumper::shuffleCtx
std::tuple< std::string, std::string > shuffleCtx(const void *v) const
Definition: SkSLRasterPipelineBuilder.cpp:2791

SkSL::RP::Program::Dumper::swizzleCtx
std::tuple< std::string, std::string > swizzleCtx(ProgramOp op, const void *v) const
Definition: SkSLRasterPipelineBuilder.cpp:2774

SkSL::RP::Program::Dumper::Dumper
Dumper(const Program &p)
Definition: SkSLRasterPipelineBuilder.cpp:2425

SkSL::RP::Program::Dumper::binaryOpCtx
std::tuple< std::string, std::string > binaryOpCtx(const void *v, int numSlots) const
Definition: SkSLRasterPipelineBuilder.cpp:2678

SkSL::RP::Program::Dumper::immutablePtrCtx
std::string immutablePtrCtx(const float *ptr, int numSlots) const
Definition: SkSLRasterPipelineBuilder.cpp:2604

SkSL::RP::Program
Definition: SkSLRasterPipelineBuilder.h:152

SkSL::RP::Program::appendStages
bool appendStages(SkRasterPipeline *pipeline, SkArenaAlloc *alloc, Callbacks *callbacks, SkSpan< const float > uniforms) const
Definition: SkSLRasterPipelineBuilder.cpp:1652

SkSL::RP::Program::Dumper
friend class Dumper
Definition: SkSLRasterPipelineBuilder.h:281

SkSL::RP::Program::dump
void dump(SkWStream *out, bool writeInstructionCount=false) const
Definition: SkSLRasterPipelineBuilder.cpp:3771

SkSL::RP::Program::Program
Program(skia_private::TArray< Instruction > instrs, int numValueSlots, int numUniformSlots, int numImmutableSlots, int numLabels, DebugTracePriv *debugTrace)
Definition: SkSLRasterPipelineBuilder.cpp:1390

SkSL::RP::Program::~Program
~Program()

SkSL::Tracer::Make
static std::unique_ptr< Tracer > Make(std::vector< TraceInfo > *traceInfo)
Definition: SkSLTraceHook.cpp:13

SkSpan
Definition: SkSpan_impl.h:65

SkSpan::begin
constexpr T * begin() const
Definition: SkSpan_impl.h:90

SkSpan::end
constexpr T * end() const
Definition: SkSpan_impl.h:91

SkSpan::rbegin
constexpr auto rbegin() const
Definition: SkSpan_impl.h:92

SkSpan::rend
constexpr auto rend() const
Definition: SkSpan_impl.h:93

SkSpan::size
constexpr size_t size() const
Definition: SkSpan_impl.h:95

SkString::c_str
const char * c_str() const
Definition: SkString.h:133

SkWStream
Definition: SkStream.h:218

Stage
Definition: MotionMarkSlide.cpp:33

skia_private::TArray
Definition: SkTArray.h:40

skia_private::TArray::push_back_n
T * push_back_n(int n)
Definition: SkTArray.h:267

skia_private::TArray::resize
void resize(size_t count)
Definition: SkTArray.h:423

skia_private::TArray::size
int size() const
Definition: SkTArray.h:421

skia_private::TArray::begin
T * begin()
Definition: SkTArray.h:398

skia_private::TArray::reserve_exact
void reserve_exact(int n)
Definition: SkTArray.h:181

skia_private::TArray::push_back
T & push_back()
Definition: SkTArray.h:205

skia_private::THashMap
Definition: SkTHash.h:442

skia_private::THashMap::count
int count() const
Definition: SkTHash.h:471

skia_private::THashMap::find
V * find(const K &key) const
Definition: SkTHash.h:494

begin
static const char * begin(const StringSlice &s)
Definition: editor.cpp:252

s
struct MyStruct s

end
glong glong end
Definition: fl_accessible_text_field.cc:40

value
uint8_t value
Definition: fl_standard_message_codec.cc:36

max
static float max(float r, float g, float b)
Definition: hsl.cpp:49

min
static float min(float r, float g, float b)
Definition: hsl.cpp:48

text
std::u16string text
Definition: keyboard_unittests.cc:332

y
double y
Definition: mouse-input-test.cc:83

x
double x
Definition: mouse-input-test.cc:82

SK_OPTS_NS::stack_rewind
static void ABI stack_rewind(Params *params, SkRasterPipelineStage *program, F r, F g, F b, F a)
Definition: SkRasterPipeline_opts.h:1681

SkOpts::raster_pipeline_highp_stride
size_t raster_pipeline_highp_stride
Definition: SkOpts.cpp:26

SkRPCtxUtils::Pack
static void * Pack(const T &ctx, SkArenaAlloc *alloc)
Definition: SkRasterPipelineContextUtils.h:28

SkRPCtxUtils::Unpack
static UnpackedType< T > Unpack(const T *ctx)
Definition: SkRasterPipelineContextUtils.h:42

SkSL::RP
Definition: SkRuntimeEffect.h:49

SkSL::RP::Slot
int Slot
Definition: SkSLRasterPipelineBuilder.h:36

SkSL::RP::max_packed_nybble
static int max_packed_nybble(uint32_t components, size_t numComponents)
Definition: SkSLRasterPipelineBuilder.cpp:1089

SkSL::RP::NA
constexpr Slot NA
Definition: SkSLRasterPipelineBuilder.h:37

SkSL::RP::immutable_data_is_splattable
static bool immutable_data_is_splattable(int32_t *immutablePtr, int numSlots)
Definition: SkSLRasterPipelineBuilder.cpp:1418

SkSL::RP::is_immediate_op
static bool is_immediate_op(BuilderOp op)
Definition: SkSLRasterPipelineBuilder.cpp:132

SkSL::RP::unpack_nybbles_to_offsets
static void unpack_nybbles_to_offsets(uint32_t components, SkSpan< T > offsets)
Definition: SkSLRasterPipelineBuilder.cpp:1081

SkSL::RP::ProgramOp
ProgramOp
Definition: SkSLRasterPipelineBuilder.h:61

SkSL::RP::is_multi_slot_immediate_op
static bool is_multi_slot_immediate_op(BuilderOp op)
Definition: SkSLRasterPipelineBuilder.cpp:139

SkSL::RP::pack_nybbles
static int pack_nybbles(SkSpan< const int8_t > components)
Definition: SkSLRasterPipelineBuilder.cpp:1069

SkSL::RP::context_bit_pun
static void * context_bit_pun(intptr_t val)
Definition: SkSLRasterPipelineBuilder.cpp:1630

SkSL::RP::BuilderOp
BuilderOp
Definition: SkSLRasterPipelineBuilder.h:77

SkSL::RP::BuilderOp::pop_dst_rgba
@ pop_dst_rgba

SkSL::RP::BuilderOp::pop_return_mask
@ pop_return_mask

SkSL::RP::BuilderOp::copy_uniform_to_slots_unmasked
@ copy_uniform_to_slots_unmasked

SkSL::RP::BuilderOp::push_uniform_indirect
@ push_uniform_indirect

SkSL::RP::BuilderOp::store_immutable_value
@ store_immutable_value

SkSL::RP::BuilderOp::push_return_mask
@ push_return_mask

SkSL::RP::BuilderOp::push_slots_indirect
@ push_slots_indirect

SkSL::RP::BuilderOp::push_immutable_indirect
@ push_immutable_indirect

SkSL::RP::BuilderOp::copy_stack_to_slots_indirect
@ copy_stack_to_slots_indirect

SkSL::RP::BuilderOp::trace_var_indirect
@ trace_var_indirect

SkSL::RP::BuilderOp::copy_stack_to_slots_unmasked
@ copy_stack_to_slots_unmasked

SkSL::RP::BuilderOp::discard_stack
@ discard_stack

SkSL::RP::BuilderOp::swizzle_copy_stack_to_slots_indirect
@ swizzle_copy_stack_to_slots_indirect

SkSL::RP::BuilderOp::push_clone
@ push_clone

SkSL::RP::BuilderOp::push_clone_indirect_from_stack
@ push_clone_indirect_from_stack

SkSL::RP::BuilderOp::select
@ select

SkSL::RP::BuilderOp::swizzle_copy_stack_to_slots
@ swizzle_copy_stack_to_slots

SkSL::RP::BuilderOp::push_device_xy01
@ push_device_xy01

SkSL::RP::BuilderOp::push_src_rgba
@ push_src_rgba

SkSL::RP::BuilderOp::copy_stack_to_slots
@ copy_stack_to_slots

SkSL::RP::BuilderOp::pop_src_rgba
@ pop_src_rgba

SkSL::RP::BuilderOp::pop_condition_mask
@ pop_condition_mask

SkSL::RP::BuilderOp::push_constant
@ push_constant

SkSL::RP::BuilderOp::push_immutable
@ push_immutable

SkSL::RP::BuilderOp::push_clone_from_stack
@ push_clone_from_stack

SkSL::RP::BuilderOp::pad_stack
@ pad_stack

SkSL::RP::BuilderOp::push_slots
@ push_slots

SkSL::RP::BuilderOp::push_dst_rgba
@ push_dst_rgba

SkSL::RP::BuilderOp::branch_if_no_active_lanes_on_stack_top_equal
@ branch_if_no_active_lanes_on_stack_top_equal

SkSL::RP::BuilderOp::push_condition_mask
@ push_condition_mask

SkSL::RP::BuilderOp::pop_and_reenable_loop_mask
@ pop_and_reenable_loop_mask

SkSL::RP::BuilderOp::push_loop_mask
@ push_loop_mask

SkSL::RP::BuilderOp::pop_loop_mask
@ pop_loop_mask

SkSL::RP::BuilderOp::push_uniform
@ push_uniform

SkSL::RP::slot_ranges_overlap
static bool slot_ranges_overlap(SlotRange x, SlotRange y)
Definition: SkSLRasterPipelineBuilder.cpp:899

SkSL::RP::stack_usage
static int stack_usage(const Instruction &inst)
Definition: SkSLRasterPipelineBuilder.cpp:1278

SkSL::RP::convert_n_way_op_to_immediate
static BuilderOp convert_n_way_op_to_immediate(BuilderOp op, int slots, int32_t *constantValue)
Definition: SkSLRasterPipelineBuilder.cpp:146

SkSL::String::printf
std::string printf(const char *fmt,...) SK_PRINTF_LIKE(1
Definition: SkSLString.cpp:83

SkSL::String::Separator
std::string void void auto Separator()
Definition: SkSLString.h:30

dart_profiler_symbols.p
p
Definition: dart_profiler_symbols.py:55

dart::names
static const char *const names[]
Definition: symbols.cc:24

dump_adb_log.out
out
Definition: dump_adb_log.py:10

flutter::name
DEF_SWITCHES_START aot vmservice shared library name
Definition: switches.h:32

gn.cp.dst
dst
Definition: cp.py:12

malisc.inst
inst
Definition: malisc.py:37

mskp_parser.src
src
Definition: mskp_parser.py:22

mskp_parser.offsets
list offsets
Definition: mskp_parser.py:37

protoc_wrapper.e
e
Definition: protoc_wrapper.py:226

skcms_private::f
float f
Definition: skcms_Transform.h:121

skgpu::ContextType
ContextType
Definition: ContextType.h:19

skia_private
Definition: SkTArray.h:32

skstd::to_string
std::string to_string(float value)
Definition: SkSLString.cpp:50

skvx::isfinite
SINT bool isfinite(const Vec< N, T > &v)
Definition: SkVx.h:1003

skvx::shuffle
SI Vec< sizeof...(Ix), T > shuffle(const Vec< N, T > &)
Definition: SkVx.h:667

std
Definition: ref_ptr.h:256

tools.skpbench.skpbench.int
int
Definition: skpbench.py:49

tools.skpbench.skpbench.float
float
Definition: skpbench.py:42

to_string
static SkString to_string(int n)
Definition: nanobench.cpp:119

M
#define M(PROC, DITHER)
Definition: shallowgradient.cpp:77

header
static const char header[]
Definition: skpbench.cpp:88

offset
SeparatedVector2 offset
Definition: stroke_path_geometry.cc:311

SkRasterPipeline_BinaryOpCtx
Definition: SkRasterPipelineOpContexts.h:221

SkRasterPipeline_BinaryOpCtx::dst
SkRPOffset dst
Definition: SkRasterPipelineOpContexts.h:222

SkRasterPipeline_BinaryOpCtx::src
SkRPOffset src
Definition: SkRasterPipelineOpContexts.h:223

SkRasterPipeline_BranchCtx
Definition: SkRasterPipelineOpContexts.h:269

SkRasterPipeline_BranchCtx::offset
int offset
Definition: SkRasterPipelineOpContexts.h:270

SkRasterPipeline_BranchIfAllLanesActiveCtx
Definition: SkRasterPipelineOpContexts.h:273

SkRasterPipeline_BranchIfEqualCtx
Definition: SkRasterPipelineOpContexts.h:277

SkRasterPipeline_CaseOpCtx
Definition: SkRasterPipelineOpContexts.h:282

SkRasterPipeline_CaseOpCtx::expectedValue
int expectedValue
Definition: SkRasterPipelineOpContexts.h:283

SkRasterPipeline_CaseOpCtx::offset
SkRPOffset offset
Definition: SkRasterPipelineOpContexts.h:284

SkRasterPipeline_ConstantCtx
Definition: SkRasterPipelineOpContexts.h:211

SkRasterPipeline_ConstantCtx::dst
SkRPOffset dst
Definition: SkRasterPipelineOpContexts.h:213

SkRasterPipeline_ConstantCtx::value
int32_t value
Definition: SkRasterPipelineOpContexts.h:212

SkRasterPipeline_CopyIndirectCtx
Definition: SkRasterPipelineOpContexts.h:257

SkRasterPipeline_InitLaneMasksCtx
Definition: SkRasterPipelineOpContexts.h:207

SkRasterPipeline_MatrixMultiplyCtx
Definition: SkRasterPipelineOpContexts.h:231

SkRasterPipeline_MatrixMultiplyCtx::leftColumns
uint8_t leftColumns
Definition: SkRasterPipelineOpContexts.h:233

SkRasterPipeline_MatrixMultiplyCtx::rightColumns
uint8_t rightColumns
Definition: SkRasterPipelineOpContexts.h:233

SkRasterPipeline_MatrixMultiplyCtx::dst
SkRPOffset dst
Definition: SkRasterPipelineOpContexts.h:232

SkRasterPipeline_MatrixMultiplyCtx::leftRows
uint8_t leftRows
Definition: SkRasterPipelineOpContexts.h:233

SkRasterPipeline_MatrixMultiplyCtx::rightRows
uint8_t rightRows
Definition: SkRasterPipelineOpContexts.h:233

SkRasterPipeline_ShuffleCtx
Definition: SkRasterPipelineOpContexts.h:245

SkRasterPipeline_SwizzleCopyCtx
Definition: SkRasterPipelineOpContexts.h:251

SkRasterPipeline_SwizzleCopyIndirectCtx
Definition: SkRasterPipelineOpContexts.h:265

SkRasterPipeline_SwizzleCtx
Definition: SkRasterPipelineOpContexts.h:236

SkRasterPipeline_SwizzleCtx::dst
SkRPOffset dst
Definition: SkRasterPipelineOpContexts.h:241

SkRasterPipeline_SwizzleCtx::offsets
uint8_t offsets[4]
Definition: SkRasterPipelineOpContexts.h:242

SkRasterPipeline_TernaryOpCtx
Definition: SkRasterPipelineOpContexts.h:226

SkRasterPipeline_TernaryOpCtx::delta
SkRPOffset delta
Definition: SkRasterPipelineOpContexts.h:228

SkRasterPipeline_TernaryOpCtx::dst
SkRPOffset dst
Definition: SkRasterPipelineOpContexts.h:227

SkRasterPipeline_TraceFuncCtx
Definition: SkRasterPipelineOpContexts.h:287

SkRasterPipeline_TraceLineCtx
Definition: SkRasterPipelineOpContexts.h:299

SkRasterPipeline_TraceScopeCtx
Definition: SkRasterPipelineOpContexts.h:293

SkRasterPipeline_TraceVarCtx
Definition: SkRasterPipelineOpContexts.h:305

SkRasterPipeline_UniformCtx
Definition: SkRasterPipelineOpContexts.h:216

SkSL::RP::Instruction
Definition: SkSLRasterPipelineBuilder.h:129

SkSL::RP::Instruction::fStackID
int fStackID
Definition: SkSLRasterPipelineBuilder.h:137

SkSL::RP::Instruction::fSlotA
Slot fSlotA
Definition: SkSLRasterPipelineBuilder.h:131

SkSL::RP::Instruction::fImmB
int fImmB
Definition: SkSLRasterPipelineBuilder.h:134

SkSL::RP::Instruction::fOp
BuilderOp fOp
Definition: SkSLRasterPipelineBuilder.h:130

SkSL::RP::Instruction::fImmC
int fImmC
Definition: SkSLRasterPipelineBuilder.h:135

SkSL::RP::Instruction::fImmA
int fImmA
Definition: SkSLRasterPipelineBuilder.h:133

SkSL::RP::SlotRange
Definition: SkSLRasterPipelineBuilder.h:40

SkSL::RP::SlotRange::count
int count
Definition: SkSLRasterPipelineBuilder.h:42

SkSL::RP::SlotRange::index
Slot index
Definition: SkSLRasterPipelineBuilder.h:41

SkSL::SlotDebugInfo
Definition: SkSLDebugTracePriv.h:39

SkSL::SlotDebugInfo::columns
uint8_t columns
Definition: SkSLDebugTracePriv.h:43

SkSL::SlotDebugInfo::rows
uint8_t rows
Definition: SkSLDebugTracePriv.h:43

SkSL::SlotDebugInfo::pos
Position pos
Definition: SkSLDebugTracePriv.h:52

SkSL::SlotDebugInfo::componentIndex
uint8_t componentIndex
Definition: SkSLDebugTracePriv.h:45

SkSL::SlotDebugInfo::name
std::string name
Definition: SkSLDebugTracePriv.h:41