25#if !defined(SKSL_STANDALONE)
44#define ALL_SINGLE_SLOT_UNARY_OP_CASES \
45 BuilderOp::acos_float: \
46 case BuilderOp::asin_float: \
47 case BuilderOp::atan_float: \
48 case BuilderOp::cos_float: \
49 case BuilderOp::exp_float: \
50 case BuilderOp::exp2_float: \
51 case BuilderOp::log_float: \
52 case BuilderOp::log2_float: \
53 case BuilderOp::sin_float: \
54 case BuilderOp::sqrt_float: \
55 case BuilderOp::tan_float
57#define ALL_MULTI_SLOT_UNARY_OP_CASES \
59 case BuilderOp::cast_to_float_from_int: \
60 case BuilderOp::cast_to_float_from_uint: \
61 case BuilderOp::cast_to_int_from_float: \
62 case BuilderOp::cast_to_uint_from_float: \
63 case BuilderOp::ceil_float: \
64 case BuilderOp::floor_float: \
65 case BuilderOp::invsqrt_float
67#define ALL_N_WAY_BINARY_OP_CASES \
68 BuilderOp::atan2_n_floats: \
69 case BuilderOp::pow_n_floats
71#define ALL_MULTI_SLOT_BINARY_OP_CASES \
72 BuilderOp::add_n_floats: \
73 case BuilderOp::add_n_ints: \
74 case BuilderOp::sub_n_floats: \
75 case BuilderOp::sub_n_ints: \
76 case BuilderOp::mul_n_floats: \
77 case BuilderOp::mul_n_ints: \
78 case BuilderOp::div_n_floats: \
79 case BuilderOp::div_n_ints: \
80 case BuilderOp::div_n_uints: \
81 case BuilderOp::bitwise_and_n_ints: \
82 case BuilderOp::bitwise_or_n_ints: \
83 case BuilderOp::bitwise_xor_n_ints: \
84 case BuilderOp::mod_n_floats: \
85 case BuilderOp::min_n_floats: \
86 case BuilderOp::min_n_ints: \
87 case BuilderOp::min_n_uints: \
88 case BuilderOp::max_n_floats: \
89 case BuilderOp::max_n_ints: \
90 case BuilderOp::max_n_uints: \
91 case BuilderOp::cmple_n_floats: \
92 case BuilderOp::cmple_n_ints: \
93 case BuilderOp::cmple_n_uints: \
94 case BuilderOp::cmplt_n_floats: \
95 case BuilderOp::cmplt_n_ints: \
96 case BuilderOp::cmplt_n_uints: \
97 case BuilderOp::cmpeq_n_floats: \
98 case BuilderOp::cmpeq_n_ints: \
99 case BuilderOp::cmpne_n_floats: \
100 case BuilderOp::cmpne_n_ints
102#define ALL_IMMEDIATE_BINARY_OP_CASES \
103 BuilderOp::add_imm_float: \
104 case BuilderOp::add_imm_int: \
105 case BuilderOp::mul_imm_float: \
106 case BuilderOp::mul_imm_int: \
107 case BuilderOp::bitwise_and_imm_int: \
108 case BuilderOp::bitwise_xor_imm_int: \
109 case BuilderOp::min_imm_float: \
110 case BuilderOp::max_imm_float: \
111 case BuilderOp::cmple_imm_float: \
112 case BuilderOp::cmple_imm_int: \
113 case BuilderOp::cmple_imm_uint: \
114 case BuilderOp::cmplt_imm_float: \
115 case BuilderOp::cmplt_imm_int: \
116 case BuilderOp::cmplt_imm_uint: \
117 case BuilderOp::cmpeq_imm_float: \
118 case BuilderOp::cmpeq_imm_int: \
119 case BuilderOp::cmpne_imm_float: \
120 case BuilderOp::cmpne_imm_int
122#define ALL_IMMEDIATE_MULTI_SLOT_BINARY_OP_CASES \
123 BuilderOp::bitwise_and_imm_int
125#define ALL_N_WAY_TERNARY_OP_CASES \
126 BuilderOp::smoothstep_n_floats
128#define ALL_MULTI_SLOT_TERNARY_OP_CASES \
129 BuilderOp::mix_n_floats: \
130 case BuilderOp::mix_n_ints
135 default:
return false;
142 default:
return false;
167 case BuilderOp::sub_n_ints:
168 *constantValue *= -1;
169 return BuilderOp::add_imm_int;
171 case BuilderOp::sub_n_floats: {
173 *constantValue ^= 0x80000000;
174 return BuilderOp::add_imm_float;
185void Builder::appendInstruction(
BuilderOp op, SlotList slots,
186 int immA,
int immB,
int immC,
int immD) {
187 fInstructions.push_back({op, slots.fSlotA, slots.fSlotB,
188 immA, immB, immC, immD, fCurrentStackID});
191Instruction* Builder::lastInstruction(
int fromBack) {
192 if (fInstructions.size() <= fromBack) {
195 Instruction*
inst = &fInstructions.fromBack(fromBack);
196 if (
inst->fStackID != fCurrentStackID) {
202Instruction* Builder::lastInstructionOnAnyStack(
int fromBack) {
203 if (fInstructions.size() <= fromBack) {
206 return &fInstructions.fromBack(fromBack);
213 this->appendInstruction(op, {}, slots);
223 if (
Instruction* lastInstruction = this->lastInstruction()) {
226 lastInstruction->
fImmA >= slots) {
228 int32_t constantValue = lastInstruction->
fImmB;
233 this->appendInstruction(immOp, {}, slots, constantValue);
242 this->appendInstruction(op, {}, slots);
255 this->appendInstruction(op, {}, slots);
266 case 1: this->appendInstruction(BuilderOp::mul_n_floats, {}, slots);
break;
267 case 2: this->appendInstruction(BuilderOp::dot_2_floats, {}, slots);
break;
268 case 3: this->appendInstruction(BuilderOp::dot_3_floats, {}, slots);
break;
269 case 4: this->appendInstruction(BuilderOp::dot_4_floats, {}, slots);
break;
278 this->appendInstruction(BuilderOp::refract_4_floats, {});
283 case 2: this->appendInstruction(BuilderOp::inverse_mat2, {}, 4);
break;
284 case 3: this->appendInstruction(BuilderOp::inverse_mat3, {}, 9);
break;
285 case 4: this->appendInstruction(BuilderOp::inverse_mat4, {}, 16);
break;
296bool Builder::simplifyImmediateUnmaskedOp() {
297 if (fInstructions.size() < 3) {
304 Instruction* popInstruction = this->lastInstruction(0);
305 Instruction* immInstruction = this->lastInstruction(1);
306 Instruction* pushInstruction = this->lastInstruction(2);
309 if (popInstruction && immInstruction && pushInstruction &&
313 immInstruction->fImmA == popInstruction->fImmA) {
320 pushInstruction->fImmA >= popInstruction->fImmA) {
322 Slot immSlot = popInstruction->fSlotA + popInstruction->fImmA;
323 Slot pushSlot = pushInstruction->fSlotA + pushInstruction->fImmA;
324 if (immSlot == pushSlot) {
327 pushInstruction->fImmA -= immInstruction->fImmA;
328 immInstruction->fSlotA = immSlot - immInstruction->fImmA;
329 fInstructions.pop_back();
344 Instruction* lastInstruction = this->lastInstructionOnAnyStack();
345 if (!lastInstruction || lastInstruction->
fStackID != stackID) {
349 switch (lastInstruction->
fOp) {
370 lastInstruction->
fImmA -= cancelOut;
371 if (lastInstruction->
fImmA == 0) {
372 fInstructions.pop_back();
381 fInstructions.pop_back();
388 if (this->simplifyImmediateUnmaskedOp()) {
399 fInstructions.pop_back();
402 this->simplifyPopSlotsUnmasked(&
dst);
405 if (
dst.count == 0) {
430 SkASSERT(labelID >= 0 && labelID < fNumLabels);
434 while (
const Instruction* lastInstruction = this->lastInstructionOnAnyStack()) {
435 switch (lastInstruction->
fOp) {
436 case BuilderOp::jump:
437 case BuilderOp::branch_if_all_lanes_active:
438 case BuilderOp::branch_if_any_lanes_active:
439 case BuilderOp::branch_if_no_lanes_active:
441 if (lastInstruction->
fImmA == labelID) {
442 fInstructions.pop_back();
452 this->appendInstruction(BuilderOp::label, {}, labelID);
456 SkASSERT(labelID >= 0 && labelID < fNumLabels);
457 if (
const Instruction* lastInstruction = this->lastInstructionOnAnyStack()) {
458 if (lastInstruction->
fOp == BuilderOp::jump) {
463 this->appendInstruction(BuilderOp::jump, {}, labelID);
472 SkASSERT(labelID >= 0 && labelID < fNumLabels);
473 if (
const Instruction* lastInstruction = this->lastInstructionOnAnyStack()) {
474 if (lastInstruction->
fOp == BuilderOp::branch_if_any_lanes_active ||
475 lastInstruction->
fOp == BuilderOp::jump) {
481 this->appendInstruction(BuilderOp::branch_if_any_lanes_active, {}, labelID);
490 SkASSERT(labelID >= 0 && labelID < fNumLabels);
491 if (
const Instruction* lastInstruction = this->lastInstructionOnAnyStack()) {
492 if (lastInstruction->
fOp == BuilderOp::branch_if_all_lanes_active ||
493 lastInstruction->
fOp == BuilderOp::jump) {
499 this->appendInstruction(BuilderOp::branch_if_all_lanes_active, {}, labelID);
507 SkASSERT(labelID >= 0 && labelID < fNumLabels);
508 if (
const Instruction* lastInstruction = this->lastInstructionOnAnyStack()) {
509 if (lastInstruction->
fOp == BuilderOp::branch_if_no_lanes_active ||
510 lastInstruction->
fOp == BuilderOp::jump) {
516 this->appendInstruction(BuilderOp::branch_if_no_lanes_active, {}, labelID);
520 SkASSERT(labelID >= 0 && labelID < fNumLabels);
521 if (
const Instruction* lastInstruction = this->lastInstructionOnAnyStack()) {
522 if (lastInstruction->
fOp == BuilderOp::jump ||
536 if (
Instruction* lastInstruction = this->lastInstruction()) {
539 if (lastInstruction->
fOp == op &&
541 lastInstruction->
fImmA +=
src.count;
547 this->appendInstruction(op, {
src.index},
src.count);
553 if (fInstructions.size() >= 3) {
554 const Instruction* pushInst = this->lastInstruction(0);
555 const Instruction* discardInst = this->lastInstruction(1);
556 const Instruction* copyToSlotsInst = this->lastInstruction(2);
559 int pushIndex = pushInst->
fSlotA;
560 int pushCount = pushInst->
fImmA;
567 copyToSlotsInst->
fSlotA == pushIndex && copyToSlotsInst->
fImmA == pushCount) {
569 fInstructions.pop_back();
570 fInstructions.pop_back();
586 this->appendInstruction(op,
594 if (
Instruction* lastInstruction = this->lastInstruction()) {
599 lastInstruction->
fImmA +=
src.count;
641 if (
Instruction* lastInstruction = this->lastInstruction()) {
653 if (
Instruction* lastInstruction = this->lastInstruction()) {
663 this->
swizzle(1, {0, 0, 0, 0});
672 case 3: this->
swizzle(1, {0, 0, 0, 0});
break;
673 case 2: this->
swizzle(1, {0, 0, 0});
break;
681 if (numSlots == 1 && offsetFromStackTop == 0) {
683 if (
Instruction* lastInstruction = this->lastInstruction()) {
686 lastInstruction->
fImmA += 1;
698 offsetFromStackTop -= range.
index;
700 if (
Instruction* lastInstruction = this->lastInstruction()) {
704 lastInstruction->
fImmB == otherStackID &&
706 lastInstruction->
fImmC - lastInstruction->
fImmA == offsetFromStackTop) {
714 range.
count, otherStackID, offsetFromStackTop);
720 int offsetFromStackTop) {
725 offsetFromStackTop -= fixedOffset.
index;
728 fixedOffset.
count, otherStackID, offsetFromStackTop, dynamicStackID);
746 Instruction* lastInstruction = this->lastInstruction();
747 if (!lastInstruction) {
757 int32_t
value = lastInstruction->fImmB;
758 lastInstruction->fImmA--;
759 if (lastInstruction->fImmA == 0) {
760 fInstructions.pop_back();
765 Slot destinationSlot =
dst->index +
dst->count;
768 this->simplifyPopSlotsUnmasked(
dst);
779 Slot sourceSlot = lastInstruction->fSlotA + lastInstruction->fImmA - 1;
780 lastInstruction->fImmA--;
781 if (lastInstruction->fImmA == 0) {
782 fInstructions.pop_back();
787 Slot destinationSlot =
dst->index +
dst->count;
790 this->simplifyPopSlotsUnmasked(
dst);
800 Slot sourceSlot = lastInstruction->fSlotA + lastInstruction->fImmA - 1;
801 lastInstruction->fImmA--;
802 if (lastInstruction->fImmA == 0) {
803 fInstructions.pop_back();
808 Slot destinationSlot =
dst->index +
dst->count;
811 this->simplifyPopSlotsUnmasked(
dst);
815 if (destinationSlot != sourceSlot) {
835 if (
Instruction* lastInstruction = this->lastInstruction()) {
837 if (lastInstruction->fOp == BuilderOp::exchange_src) {
839 fInstructions.pop_back();
844 this->appendInstruction(BuilderOp::exchange_src, {});
848 if (
Instruction* lastInstruction = this->lastInstruction()) {
850 if (lastInstruction->fOp == BuilderOp::exchange_src) {
852 fInstructions.pop_back();
869 if (
Instruction* lastInstruction = this->lastInstruction()) {
873 lastInstruction->fSlotA + lastInstruction->fImmA ==
dst.index &&
875 lastInstruction->fImmB - lastInstruction->fImmA == offsetFromStackTop) {
877 lastInstruction->fImmA +=
dst.count;
883 dst.count, offsetFromStackTop);
900 return x.index <
y.index +
y.count &&
901 y.index <
x.index +
x.count;
906 if (
Instruction* lastInstr = this->lastInstruction()) {
908 if (lastInstr->fOp == BuilderOp::copy_constant &&
910 lastInstr->fImmB == constantValue &&
912 lastInstr->fSlotA + lastInstr->fImmA == slot) {
914 lastInstr->fImmA += 1;
919 this->appendInstruction(BuilderOp::copy_constant, {slot}, 1, constantValue);
924 if (
Instruction* lastInstr = this->lastInstruction()) {
926 if (lastInstr->fOp == BuilderOp::copy_slot_unmasked &&
928 lastInstr->fSlotA + lastInstr->fImmA ==
dst.index &&
930 lastInstr->fSlotB + lastInstr->fImmA ==
src.index &&
933 {lastInstr->fSlotA, lastInstr->fImmA +
dst.count})) {
935 lastInstr->fImmA +=
dst.count;
941 this->appendInstruction(BuilderOp::copy_slot_unmasked, {
dst.index,
src.index},
dst.count);
946 if (
Instruction* lastInstr = this->lastInstruction()) {
948 if (lastInstr->fOp == BuilderOp::copy_immutable_unmasked &&
950 lastInstr->fSlotA + lastInstr->fImmA ==
dst.index &&
952 lastInstr->fSlotB + lastInstr->fImmA ==
src.index) {
954 lastInstr->fImmA +=
dst.count;
960 this->appendInstruction(BuilderOp::copy_immutable_unmasked, {
dst.index,
src.index},
dst.count);
965 if (
Instruction* lastInstr = this->lastInstruction()) {
969 lastInstr->fSlotB + lastInstr->fImmA ==
dst.index &&
971 lastInstr->fSlotA + lastInstr->fImmA ==
src.index) {
973 lastInstr->fImmA +=
dst.count;
985 if (
Instruction* lastInstr = this->lastInstruction()) {
989 lastInstr->fSlotA + lastInstr->fImmA ==
dst.index &&
991 lastInstr->fImmB - lastInstr->fImmA == offsetFromStackTop) {
993 lastInstr->fImmA +=
dst.count;
999 dst.count, offsetFromStackTop);
1007 if (
Instruction* lastInstruction = this->lastInstructionOnAnyStack()) {
1008 if (lastInstruction->fOp == BuilderOp::mask_off_return_mask) {
1009 fInstructions.pop_back();
1021 if (
Instruction* lastInstruction = this->lastInstruction()) {
1035 if (
Instruction* lastInstruction = this->lastInstructionOnAnyStack()) {
1037 int stackID = lastInstruction->fStackID;
1038 fInstructions.pop_back();
1043 this->appendInstruction(BuilderOp::merge_condition_mask, {});
1047 if (
Instruction* lastInstruction = this->lastInstruction()) {
1048 if (lastInstruction->fOp == BuilderOp::copy_constant && lastInstruction->fImmB == 0) {
1049 if (lastInstruction->fSlotA + lastInstruction->fImmA ==
dst.index) {
1052 lastInstruction->fImmA +=
dst.count;
1056 if (lastInstruction->fSlotA ==
dst.index +
dst.count) {
1059 lastInstruction->fSlotA =
dst.index;
1060 lastInstruction->fImmA +=
dst.count;
1066 this->appendInstruction(BuilderOp::copy_constant, {
dst.index},
dst.count, 0);
1072 for (
auto iter = components.
rbegin(); iter != components.
rend(); ++iter) {
1073 SkASSERT(*iter >= 0 && *iter <= 0xF);
1080template <
typename T>
1083 for (
size_t index = 0; index <
offsets.size(); ++index) {
1091 for (
size_t index = 0; index < numComponents; ++index) {
1092 largest = std::max<int>(largest, components & 0xF);
1100 int offsetFromStackTop) {
1111 offsetFromStackTop);
1118 int offsetFromStackTop) {
1141 int numElements = components.
size();
1143 SkASSERT(std::all_of(components.
begin(), components.
end(), [](int8_t
e){ return e >= 0; }));
1144 SkASSERT(std::all_of(components.
begin(), components.
end(), [](int8_t
e){ return e <= 0xF; }));
1147 int8_t elements[16] = {};
1150 while (numElements > 0) {
1152 if (elements[0] != 0) {
1156 if (std::any_of(&elements[1], &elements[numElements], [](int8_t
e) {
return e == 0; })) {
1161 for (
int index = 1; index < numElements; ++index) {
1162 elements[index - 1] = elements[index] - 1;
1164 elements[numElements - 1] = 0;
1170 if (numElements == 0) {
1175 if (consumedSlots <= 4 && numElements <= 4) {
1177 int op = (
int)BuilderOp::swizzle_1 + numElements - 1;
1178 this->appendInstruction((
BuilderOp)op, {}, consumedSlots,
1186 consumedSlots, numElements,
1193 int8_t elements[16] = {};
1195 for (
int r = 0; r < rows; ++r) {
1196 for (
int c = 0; c < columns; ++c) {
1197 elements[index++] = (c * rows) + r;
1205 int8_t elements[16] = {};
1207 for (
int c = 0; c < columns; ++c) {
1208 for (
int r = 0; r < rows; ++r) {
1209 elements[index++] = (c == r) ? 1 : 0;
1217 int8_t elements[16] = {};
1220 size_t consumedSlots = origColumns * origRows;
1221 size_t zeroOffset = 0, oneOffset = 0;
1223 for (
int c = 0; c < newColumns; ++c) {
1224 for (
int r = 0; r < newRows; ++r) {
1225 if (c < origColumns && r < origRows) {
1227 elements[index++] = (c * origRows) + r;
1232 if (oneOffset == 0) {
1234 oneOffset = consumedSlots++;
1236 elements[index++] = oneOffset;
1239 if (zeroOffset == 0) {
1241 zeroOffset = consumedSlots++;
1243 elements[index++] = zeroOffset;
1253 switch (leftColumns) {
1254 case 2: op = BuilderOp::matrix_multiply_2;
break;
1255 case 3: op = BuilderOp::matrix_multiply_3;
break;
1256 case 4: op = BuilderOp::matrix_multiply_4;
break;
1257 default:
SkDEBUGFAIL(
"unsupported matrix dimensions");
return;
1260 this->appendInstruction(op, {}, leftColumns, leftRows, rightColumns, rightRows);
1264 int numUniformSlots,
1265 int numImmutableSlots,
1268 SkASSERT(fExecutionMaskWritesEnabled == 0);
1270 return std::make_unique<Program>(std::move(fInstructions), numValueSlots, numUniformSlots,
1271 numImmutableSlots, fNumLabels, debugTrace);
1274void Program::optimize() {
1321 return 2 * -
inst.fImmA;
1323 case BuilderOp::swizzle_1:
1324 return 1 -
inst.fImmA;
1325 case BuilderOp::swizzle_2:
1326 return 2 -
inst.fImmA;
1327 case BuilderOp::swizzle_3:
1328 return 3 -
inst.fImmA;
1329 case BuilderOp::swizzle_4:
1330 return 4 -
inst.fImmA;
1332 case BuilderOp::dot_2_floats:
1334 case BuilderOp::dot_3_floats:
1336 case BuilderOp::dot_4_floats:
1339 case BuilderOp::refract_4_floats:
1342 case BuilderOp::matrix_multiply_2:
1343 case BuilderOp::matrix_multiply_3:
1344 case BuilderOp::matrix_multiply_4:
1349 int consumed =
inst.fImmA;
1350 int generated =
inst.fImmB;
1351 return generated - consumed;
1361Program::StackDepths Program::tempStackMaxDepths()
const {
1364 for (
const Instruction&
inst : fInstructions) {
1369 StackDepths largest, current;
1371 current.push_back_n(numStacks, 0);
1373 for (
const Instruction&
inst : fInstructions) {
1374 int stackID =
inst.fStackID;
1376 largest[stackID] =
std::max(current[stackID], largest[stackID]);
1378 SkASSERTF(current[stackID] >= 0,
"unbalanced temp stack push/pop on stack %d", stackID);
1382 for (
int stackID = 0; stackID < numStacks; ++stackID) {
1384 SkASSERTF(current[stackID] == 0,
"unbalanced temp stack push/pop on stack %d", stackID);
1392 int numUniformSlots,
1393 int numImmutableSlots,
1396 : fInstructions(
std::move(instrs))
1397 , fNumValueSlots(numValueSlots)
1398 , fNumUniformSlots(numUniformSlots)
1399 , fNumImmutableSlots(numImmutableSlots)
1400 , fNumLabels(numLabels)
1401 , fDebugTrace(debugTrace) {
1404 fTempStackMaxDepths = this->tempStackMaxDepths();
1406 fNumTempStackSlots = 0;
1407 for (
const int depth : fTempStackMaxDepths) {
1408 fNumTempStackSlots += depth;
1421 for (
int index = 1; index < numSlots; ++index) {
1422 if (immutablePtr[0] != immutablePtr[index]) {
1435 int numSlots)
const {
1437 while (numSlots > 4) {
1439 this->appendCopy(pipeline, alloc, basePtr,
1444 dst += 4 * dstStride *
sizeof(
float);
1445 src += 4 * srcStride *
sizeof(
float);
1456 int32_t* immutablePtr =
reinterpret_cast<int32_t*
>(basePtr +
src);
1458 auto stage = (
ProgramOp)((
int)ProgramOp::copy_constant + numSlots - 1);
1461 ctx.
value = *immutablePtr;
1468 auto stage = (
ProgramOp)((
int)baseStage + numSlots - 1);
1476void Program::appendCopySlotsUnmasked(
TArray<Stage>* pipeline,
1480 int numSlots)
const {
1481 this->appendCopy(pipeline, alloc,
nullptr,
1482 ProgramOp::copy_slot_unmasked,
1488void Program::appendCopyImmutableUnmasked(
TArray<Stage>* pipeline,
1493 int numSlots)
const {
1494 this->appendCopy(pipeline, alloc, basePtr,
1495 ProgramOp::copy_immutable_unmasked,
1505 int numSlots)
const {
1506 this->appendCopy(pipeline, alloc,
nullptr,
1507 ProgramOp::copy_slot_masked,
1514 float*
dst,
int numSlots)
const {
1516 while (numSlots--) {
1523 float*
dst,
int numSlots)
const {
1525 while (numSlots > 0) {
1526 int currentSlots =
std::min(numSlots, 4);
1527 auto stage = (
ProgramOp)((
int)baseStage + currentSlots - 1);
1546 while (numSlots > 0) {
1547 int currentSlots =
std::min(numSlots, slotsPerStage);
1548 auto stage = (
ProgramOp)((
int)baseStage - (currentSlots - 1));
1552 numSlots -= slotsPerStage;
1572 ProgramOp baseStage, std::byte* basePtr,
1579 this->appendAdjacentNWayBinaryOp(pipeline, alloc, baseStage,
dst,
src, numSlots);
1583 auto specializedStage = (
ProgramOp)((
int)baseStage + numSlots);
1584 pipeline->
push_back({specializedStage, basePtr +
dst});
1605 ProgramOp baseStage, std::byte* basePtr,
1607 int numSlots)
const {
1614 this->appendAdjacentNWayTernaryOp(pipeline, alloc, baseStage, basePtr,
1615 dst, src0, src1, numSlots);
1619 auto specializedStage = (
ProgramOp)((
int)baseStage + numSlots);
1620 pipeline->
push_back({specializedStage, basePtr +
dst});
1624void Program::appendStackRewind(
TArray<Stage>* pipeline)
const {
1625#if defined(SKSL_STANDALONE) || !SK_HAS_MUSTTAIL
1631 return sk_bit_cast<void*>(val);
1634Program::SlotData Program::allocateSlotData(
SkArenaAlloc* alloc)
const {
1637 const int scalarWidth = 1 *
sizeof(
float);
1638 const int vectorWidth =
N *
sizeof(
float);
1639 const int allocSize = vectorWidth * (fNumValueSlots + fNumTempStackSlots) +
1640 scalarWidth * fNumImmutableSlots;
1641 float* slotPtr =
static_cast<float*
>(alloc->
makeBytesAlignedTo(allocSize, vectorWidth));
1646 s.values =
SkSpan{slotPtr,
N * fNumValueSlots};
1647 s.stack =
SkSpan{
s.values.end(),
N * fNumTempStackSlots};
1648 s.immutable =
SkSpan{
s.stack.end(), 1 * fNumImmutableSlots};
1656#if defined(SKSL_STANDALONE)
1661 SlotData slotData = this->allocateSlotData(alloc);
1662 this->makeStages(&stages, alloc, uniforms, slotData);
1673 auto resetBasePointer = [&]() {
1676 pipeline->
append(SkRasterPipelineOp::set_base_pointer, slotData.values.data());
1681 for (
const Stage& stage : stages) {
1687 case ProgramOp::invoke_shader:
1688 if (!callbacks || !callbacks->
appendShader(sk_bit_cast<intptr_t>(stage.ctx))) {
1694 case ProgramOp::invoke_color_filter:
1695 if (!callbacks || !callbacks->
appendColorFilter(sk_bit_cast<intptr_t>(stage.ctx))) {
1701 case ProgramOp::invoke_blender:
1702 if (!callbacks || !callbacks->
appendBlender(sk_bit_cast<intptr_t>(stage.ctx))) {
1708 case ProgramOp::invoke_to_linear_srgb:
1717 case ProgramOp::invoke_from_linear_srgb:
1726 case ProgramOp::label: {
1728 int labelID = sk_bit_cast<intptr_t>(stage.ctx);
1729 SkASSERT(labelID >= 0 && labelID < fNumLabels);
1733 case ProgramOp::jump:
1734 case ProgramOp::branch_if_all_lanes_active:
1735 case ProgramOp::branch_if_any_lanes_active:
1736 case ProgramOp::branch_if_no_lanes_active:
1737 case ProgramOp::branch_if_no_active_lanes_eq: {
1740 int labelID = branchCtx->
offset;
1741 SkASSERT(labelID >= 0 && labelID < fNumLabels);
1763 for (
int index = 0; index < branchContexts.
size(); ++index) {
1764 int branchFromIdx = branchContexts[index]->offset;
1765 int branchToIdx = labelOffsets[branchGoesToLabel[index]];
1766 branchContexts[index]->offset = branchToIdx - branchFromIdx;
1776 const SlotData& slots)
const {
1780 int mostRecentRewind = 0;
1786 tempStackMap.
resize(fTempStackMaxDepths.
size());
1787 for (
int idx = 0; idx < fTempStackMaxDepths.
size(); ++idx) {
1788 tempStackMap[idx] = slots.stack.
begin() + (
pos *
N);
1789 pos += fTempStackMaxDepths[idx];
1793 SkBitSet labelsEncountered(fNumLabels);
1795 auto EmitStackRewindForBackwardsBranch = [&](
int labelID) {
1799 if (labelsEncountered.test(labelID)) {
1800 this->appendStackRewind(pipeline);
1801 mostRecentRewind = pipeline->
size();
1805 auto*
const basePtr = (std::byte*)slots.values.data();
1806 auto OffsetFromBase = [&](
const void* ptr) ->
SkRPOffset {
1807 return (
SkRPOffset)((
const std::byte*)ptr - basePtr);
1811 for (
const Instruction&
inst : fInstructions) {
1813 slots.immutable[
inst.fSlotA] = sk_bit_cast<float>(
inst.fImmA);
1819 for (
const Instruction&
inst : fInstructions) {
1820 auto ImmutableA = [&]() {
return &slots.immutable[1 *
inst.fSlotA]; };
1821 auto ImmutableB = [&]() {
return &slots.immutable[1 *
inst.fSlotB]; };
1822 auto SlotA = [&]() {
return &slots.values[
N *
inst.fSlotA]; };
1823 auto SlotB = [&]() {
return &slots.values[
N *
inst.fSlotB]; };
1824 auto UniformA = [&]() {
return &uniforms[
inst.fSlotA]; };
1825 auto AllocTraceContext = [&](
auto* ctx) {
1829 ctx->traceMask =
reinterpret_cast<int*
>(tempStackMap[
inst.fImmA] -
N);
1830 ctx->traceHook = fTraceHook.get();
1833 float*& tempStackPtr = tempStackMap[
inst.fStackID];
1836 case BuilderOp::label:
1838 labelsEncountered.set(
inst.fImmA);
1842 case BuilderOp::jump:
1843 case BuilderOp::branch_if_any_lanes_active:
1844 case BuilderOp::branch_if_no_lanes_active: {
1846 EmitStackRewindForBackwardsBranch(
inst.fImmA);
1849 ctx->offset =
inst.fImmA;
1853 case BuilderOp::branch_if_all_lanes_active: {
1855 EmitStackRewindForBackwardsBranch(
inst.fImmA);
1858 ctx->offset =
inst.fImmA;
1859 pipeline->
push_back({ProgramOp::branch_if_all_lanes_active, ctx});
1864 EmitStackRewindForBackwardsBranch(
inst.fImmA);
1867 ctx->offset =
inst.fImmA;
1869 ctx->ptr =
reinterpret_cast<int*
>(tempStackPtr -
N);
1870 pipeline->
push_back({ProgramOp::branch_if_no_active_lanes_eq, ctx});
1873 case BuilderOp::init_lane_masks: {
1875 pipeline->
push_back({ProgramOp::init_lane_masks, ctx});
1878 case BuilderOp::store_src_rg:
1879 pipeline->
push_back({ProgramOp::store_src_rg, SlotA()});
1882 case BuilderOp::store_src:
1883 pipeline->
push_back({ProgramOp::store_src, SlotA()});
1886 case BuilderOp::store_dst:
1887 pipeline->
push_back({ProgramOp::store_dst, SlotA()});
1890 case BuilderOp::store_device_xy01:
1891 pipeline->
push_back({ProgramOp::store_device_xy01, SlotA()});
1898 case BuilderOp::load_src:
1899 pipeline->
push_back({ProgramOp::load_src, SlotA()});
1902 case BuilderOp::load_dst:
1903 pipeline->
push_back({ProgramOp::load_dst, SlotA()});
1907 float*
dst = tempStackPtr - (
inst.fImmA *
N);
1912 float*
dst = tempStackPtr - (
inst.fImmA *
N);
1917 float*
dst = (
inst.fSlotA ==
NA) ? tempStackPtr - (
inst.fImmA *
N)
1920 this->appendImmediateBinaryOp(pipeline, alloc, (
ProgramOp)
inst.fOp,
1925 float*
src = tempStackPtr - (
inst.fImmA *
N);
1926 float*
dst = tempStackPtr - (
inst.fImmA * 2 *
N);
1927 this->appendAdjacentNWayBinaryOp(pipeline, alloc, (
ProgramOp)
inst.fOp,
1928 OffsetFromBase(
dst), OffsetFromBase(
src),
1933 float*
src = tempStackPtr - (
inst.fImmA *
N);
1934 float*
dst = tempStackPtr - (
inst.fImmA * 2 *
N);
1935 this->appendAdjacentMultiSlotBinaryOp(pipeline, alloc, (
ProgramOp)
inst.fOp,
1937 OffsetFromBase(
dst),
1938 OffsetFromBase(
src),
1943 float* src1 = tempStackPtr - (
inst.fImmA *
N);
1944 float* src0 = tempStackPtr - (
inst.fImmA * 2 *
N);
1945 float*
dst = tempStackPtr - (
inst.fImmA * 3 *
N);
1946 this->appendAdjacentNWayTernaryOp(pipeline, alloc, (
ProgramOp)
inst.fOp, basePtr,
1947 OffsetFromBase(
dst),
1948 OffsetFromBase(src0),
1949 OffsetFromBase(src1),
1954 float* src1 = tempStackPtr - (
inst.fImmA *
N);
1955 float* src0 = tempStackPtr - (
inst.fImmA * 2 *
N);
1956 float*
dst = tempStackPtr - (
inst.fImmA * 3 *
N);
1957 this->appendAdjacentMultiSlotTernaryOp(pipeline, alloc,(
ProgramOp)
inst.fOp, basePtr,
1958 OffsetFromBase(
dst),
1959 OffsetFromBase(src0),
1960 OffsetFromBase(src1),
1965 float*
src = tempStackPtr - (
inst.fImmA *
N);
1966 float*
dst = tempStackPtr - (
inst.fImmA * 2 *
N);
1967 this->appendCopySlotsMasked(pipeline, alloc,
1968 OffsetFromBase(
dst),
1969 OffsetFromBase(
src),
1973 case BuilderOp::copy_slot_masked:
1974 this->appendCopySlotsMasked(pipeline, alloc,
1975 OffsetFromBase(SlotA()),
1976 OffsetFromBase(SlotB()),
1980 case BuilderOp::copy_slot_unmasked:
1981 this->appendCopySlotsUnmasked(pipeline, alloc,
1982 OffsetFromBase(SlotA()),
1983 OffsetFromBase(SlotB()),
1987 case BuilderOp::copy_immutable_unmasked:
1988 this->appendCopyImmutableUnmasked(pipeline, alloc, basePtr,
1989 OffsetFromBase(SlotA()),
1990 OffsetFromBase(ImmutableB()),
1994 case BuilderOp::refract_4_floats: {
1995 float*
dst = tempStackPtr - (9 *
N);
1996 pipeline->
push_back({ProgramOp::refract_4_floats,
dst});
1999 case BuilderOp::inverse_mat2:
2000 case BuilderOp::inverse_mat3:
2001 case BuilderOp::inverse_mat4: {
2002 float*
dst = tempStackPtr - (
inst.fImmA *
N);
2006 case BuilderOp::dot_2_floats:
2007 case BuilderOp::dot_3_floats:
2008 case BuilderOp::dot_4_floats: {
2009 float*
dst = tempStackPtr - (
inst.fImmA * 2 *
N);
2013 case BuilderOp::swizzle_1: {
2018 float*
dst = tempStackPtr - (
inst.fImmA *
N);
2021 this->appendCopySlotsUnmasked(pipeline, alloc,
2022 OffsetFromBase(
dst),
2023 OffsetFromBase(
src),
2028 case BuilderOp::swizzle_2:
2029 case BuilderOp::swizzle_3:
2030 case BuilderOp::swizzle_4: {
2032 ctx.
dst = OffsetFromBase(tempStackPtr - (
N *
inst.fImmA));
2039 int consumed =
inst.fImmA;
2040 int generated =
inst.fImmB;
2043 ctx->ptr =
reinterpret_cast<int32_t*
>(tempStackPtr) - (
N * consumed);
2044 ctx->count = generated;
2051 case BuilderOp::matrix_multiply_2:
2052 case BuilderOp::matrix_multiply_3:
2053 case BuilderOp::matrix_multiply_4: {
2054 int consumed = (
inst.fImmB *
inst.fImmC) +
2059 ctx.
dst = OffsetFromBase(tempStackPtr - (
N * consumed));
2067 case BuilderOp::exchange_src: {
2068 float*
dst = tempStackPtr - (4 *
N);
2073 float*
dst = tempStackPtr;
2078 float*
dst = tempStackPtr;
2083 float*
dst = tempStackPtr;
2084 pipeline->
push_back({ProgramOp::store_device_xy01,
dst});
2088 float*
src = tempStackPtr - (4 *
N);
2093 float*
src = tempStackPtr - (4 *
N);
2098 float*
dst = tempStackPtr;
2099 this->appendCopySlotsUnmasked(pipeline, alloc,
2100 OffsetFromBase(
dst),
2101 OffsetFromBase(SlotA()),
2106 float*
dst = tempStackPtr;
2107 this->appendCopyImmutableUnmasked(pipeline, alloc, basePtr,
2108 OffsetFromBase(
dst),
2109 OffsetFromBase(ImmutableA()),
2123 ctx->indirectOffset =
2124 reinterpret_cast<const uint32_t*
>(tempStackMap[
inst.fImmB]) - (1 *
N);
2125 ctx->indirectLimit =
inst.fSlotB -
inst.fSlotA -
inst.fImmA;
2126 ctx->slots =
inst.fImmA;
2128 op = ProgramOp::copy_from_indirect_unmasked;
2129 ctx->src =
reinterpret_cast<const int32_t*
>(SlotA());
2130 ctx->
dst =
reinterpret_cast<int32_t*
>(tempStackPtr);
2133 op = ProgramOp::copy_from_indirect_uniform_unmasked;
2134 ctx->src =
reinterpret_cast<const int32_t*
>(ImmutableA());
2135 ctx->
dst =
reinterpret_cast<int32_t*
>(tempStackPtr);
2137 op = ProgramOp::copy_from_indirect_uniform_unmasked;
2138 ctx->src =
reinterpret_cast<const int32_t*
>(UniformA());
2139 ctx->
dst =
reinterpret_cast<int32_t*
>(tempStackPtr);
2141 op = ProgramOp::copy_to_indirect_masked;
2142 ctx->src =
reinterpret_cast<const int32_t*
>(tempStackPtr) - (ctx->slots *
N);
2143 ctx->
dst =
reinterpret_cast<int32_t*
>(SlotA());
2150 const float*
src = UniformA();
2153 for (
int remaining =
inst.fImmA; remaining > 0; remaining -= 4) {
2155 ctx->
dst =
reinterpret_cast<int32_t*
>(
dst);
2156 ctx->src =
reinterpret_cast<const int32_t*
>(
src);
2157 switch (remaining) {
2158 case 1: pipeline->
push_back({ProgramOp::copy_uniform, ctx});
break;
2159 case 2: pipeline->
push_back({ProgramOp::copy_2_uniforms, ctx});
break;
2160 case 3: pipeline->
push_back({ProgramOp::copy_3_uniforms, ctx});
break;
2161 default: pipeline->
push_back({ProgramOp::copy_4_uniforms, ctx});
break;
2169 float*
dst = tempStackPtr;
2170 pipeline->
push_back({ProgramOp::store_condition_mask,
dst});
2174 float*
src = tempStackPtr - (1 *
N);
2175 pipeline->
push_back({ProgramOp::load_condition_mask,
src});
2178 case BuilderOp::merge_condition_mask:
2179 case BuilderOp::merge_inv_condition_mask: {
2180 float* ptr = tempStackPtr - (2 *
N);
2185 float*
dst = tempStackPtr;
2186 pipeline->
push_back({ProgramOp::store_loop_mask,
dst});
2190 float*
src = tempStackPtr - (1 *
N);
2195 float*
src = tempStackPtr - (1 *
N);
2196 pipeline->
push_back({ProgramOp::reenable_loop_mask,
src});
2199 case BuilderOp::reenable_loop_mask:
2200 pipeline->
push_back({ProgramOp::reenable_loop_mask, SlotA()});
2203 case BuilderOp::mask_off_loop_mask:
2204 pipeline->
push_back({ProgramOp::mask_off_loop_mask,
nullptr});
2207 case BuilderOp::merge_loop_mask: {
2208 float*
src = tempStackPtr - (1 *
N);
2209 pipeline->
push_back({ProgramOp::merge_loop_mask,
src});
2213 float*
dst = tempStackPtr;
2214 pipeline->
push_back({ProgramOp::store_return_mask,
dst});
2218 float*
src = tempStackPtr - (1 *
N);
2219 pipeline->
push_back({ProgramOp::load_return_mask,
src});
2222 case BuilderOp::mask_off_return_mask:
2223 pipeline->
push_back({ProgramOp::mask_off_return_mask,
nullptr});
2226 case BuilderOp::copy_constant:
2228 float*
dst = (
inst.fOp == BuilderOp::copy_constant) ? SlotA() : tempStackPtr;
2230 for (
int remaining =
inst.fImmA; remaining > 0; remaining -= 4) {
2232 ctx.
dst = OffsetFromBase(
dst);
2235 switch (remaining) {
2236 case 1: pipeline->
push_back({ProgramOp::copy_constant, ptr});
break;
2237 case 2: pipeline->
push_back({ProgramOp::splat_2_constants, ptr});
break;
2238 case 3: pipeline->
push_back({ProgramOp::splat_3_constants, ptr});
break;
2239 default: pipeline->
push_back({ProgramOp::splat_4_constants, ptr});
break;
2246 float*
src = tempStackPtr - (
inst.fImmB *
N);
2247 this->appendCopySlotsMasked(pipeline, alloc,
2248 OffsetFromBase(SlotA()),
2249 OffsetFromBase(
src),
2254 float*
src = tempStackPtr - (
inst.fImmB *
N);
2255 this->appendCopySlotsUnmasked(pipeline, alloc,
2256 OffsetFromBase(SlotA()),
2257 OffsetFromBase(
src),
2266 auto stage = (
ProgramOp)((
int)ProgramOp::swizzle_copy_slot_masked +
inst.fImmA - 1);
2268 ctx->src =
reinterpret_cast<const int32_t*
>(tempStackPtr) - (
inst.fImmC *
N);
2269 ctx->
dst =
reinterpret_cast<int32_t*
>(SlotA());
2275 float*
src = tempStackPtr - (
inst.fImmB *
N);
2276 float*
dst = tempStackPtr;
2277 this->appendCopySlotsUnmasked(pipeline, alloc,
2278 OffsetFromBase(
dst),
2279 OffsetFromBase(
src),
2287 float* sourceStackPtr = tempStackMap[
inst.fImmB];
2288 float*
src = sourceStackPtr - (
inst.fImmC *
N);
2289 float*
dst = tempStackPtr;
2290 this->appendCopySlotsUnmasked(pipeline, alloc,
2291 OffsetFromBase(
dst),
2292 OffsetFromBase(
src),
2301 float* sourceStackPtr = tempStackMap[
inst.fImmB];
2304 ctx->
dst =
reinterpret_cast<int32_t*
>(tempStackPtr);
2305 ctx->src =
reinterpret_cast<const int32_t*
>(sourceStackPtr) - (
inst.fImmC *
N);
2306 ctx->indirectOffset =
2307 reinterpret_cast<const uint32_t*
>(tempStackMap[
inst.fImmD]) - (1 *
N);
2308 ctx->indirectLimit =
inst.fImmC -
inst.fImmA;
2309 ctx->slots =
inst.fImmA;
2310 pipeline->
push_back({ProgramOp::copy_from_indirect_unmasked, ctx});
2321 ctx->src =
reinterpret_cast<const int32_t*
>(tempStackPtr) - (
inst.fImmC *
N);
2322 ctx->
dst =
reinterpret_cast<int32_t*
>(SlotA());
2323 ctx->indirectOffset =
2324 reinterpret_cast<const uint32_t*
>(tempStackMap[
inst.fImmD]) - (1 *
N);
2325 ctx->indirectLimit =
2327 ctx->slots =
inst.fImmA;
2329 pipeline->
push_back({ProgramOp::swizzle_copy_to_indirect_masked, ctx});
2332 case BuilderOp::case_op: {
2335 ctx.
offset = OffsetFromBase(tempStackPtr - (2 *
N));
2339 case BuilderOp::continue_op:
2340 pipeline->
push_back({ProgramOp::continue_op, tempStackMap[
inst.fImmA] - (1 *
N)});
2347 case BuilderOp::invoke_shader:
2348 case BuilderOp::invoke_color_filter:
2349 case BuilderOp::invoke_blender:
2353 case BuilderOp::invoke_to_linear_srgb:
2354 case BuilderOp::invoke_from_linear_srgb:
2358 case BuilderOp::trace_line: {
2360 ctx->lineNumber =
inst.fImmB;
2361 pipeline->
push_back({ProgramOp::trace_line, ctx});
2364 case BuilderOp::trace_scope: {
2366 ctx->delta =
inst.fImmB;
2367 pipeline->
push_back({ProgramOp::trace_scope, ctx});
2370 case BuilderOp::trace_enter:
2371 case BuilderOp::trace_exit: {
2373 ctx->funcIdx =
inst.fImmB;
2377 case BuilderOp::trace_var:
2385 ctx->slotIdx =
inst.fSlotA;
2386 ctx->numSlots =
inst.fImmB;
2387 ctx->data =
reinterpret_cast<int*
>(SlotA());
2389 ctx->indirectOffset =
2390 reinterpret_cast<const uint32_t*
>(tempStackMap[
inst.fImmC]) - (1 *
N);
2391 ctx->indirectLimit =
inst.fSlotB -
inst.fSlotA -
inst.fImmB;
2393 ctx->indirectOffset =
nullptr;
2394 ctx->indirectLimit = 0;
2396 pipeline->
push_back({ProgramOp::trace_var, ctx});
2400 SkDEBUGFAILF(
"Raster Pipeline: unsupported instruction %d", (
int)
inst.fOp);
2405 if (stackUsage != 0) {
2406 tempStackPtr += stackUsage *
N;
2407 SkASSERT(tempStackPtr >= slots.stack.begin());
2408 SkASSERT(tempStackPtr <= slots.stack.end());
2415 int numPipelineStages = pipeline->
size();
2416 if (numPipelineStages - mostRecentRewind > 500) {
2417 this->appendStackRewind(pipeline);
2418 mostRecentRewind = numPipelineStages;
2431 for (
int index = 0; index < fStages.
size(); ++index) {
2432 if (fStages[index].op == ProgramOp::label) {
2433 int labelID = sk_bit_cast<intptr_t>(fStages[index].ctx);
2435 fLabelToStageMap[labelID] = index;
2443 if (fProgram.fDebugTrace) {
2453 std::string& uniqueName = positionMap[
pos];
2456 if (uniqueName.empty()) {
2458 uniqueName = slotInfo.
name;
2461 int subscript = positionMap.
count() - 1;
2462 if (subscript > 0) {
2465 uniqueName.push_back((
char)0xE2);
2466 uniqueName.push_back((
char)0x82);
2467 uniqueName.push_back((
char)(0x80 + digit -
'0'));
2480 int labelID = ctx->
offset;
2481 const int* targetIndex = fLabelToStageMap.
find(labelID);
2488 std::string
imm(
float immFloat,
bool showAsFloat =
true)
const {
2490 if (sk_bit_cast<int32_t>(immFloat) == 0) {
2494 uint32_t immUnsigned;
2495 memcpy(&immUnsigned, &immFloat,
sizeof(uint32_t));
2508 std::string
immCtx(
const void* ctx,
bool showAsFloat =
true)
const {
2510 memcpy(&
f, &ctx,
sizeof(
float));
2511 return this->
imm(f, showAsFloat);
2535 while (range.
count > 0) {
2537 text += separator();
2541 int entireVariable = slotInfo.
columns * slotInfo.
rows;
2544 if (slotsToChomp != entireVariable) {
2548 range.
index += slotsToChomp;
2549 range.
count -= slotsToChomp;
2567 const float*
end = ptr + numSlots;
2568 if (ptr >= fUniforms.
begin() &&
end <= fUniforms.
end()) {
2569 int uniformIdx = ptr - fUniforms.
begin();
2570 if (fProgram.fDebugTrace) {
2573 if (!
name.empty()) {
2578 return 'u' + this->
asRange(uniformIdx, numSlots);
2585 const float*
end = ptr + (
N * numSlots);
2586 if (ptr >= fSlots.values.begin() &&
end <= fSlots.values.end()) {
2587 int valueIdx = ptr - fSlots.values.begin();
2590 if (fProgram.fDebugTrace) {
2592 std::string
name = this->
slotName({valueIdx, numSlots});
2593 if (!
name.empty()) {
2598 return 'v' + this->
asRange(valueIdx, numSlots);
2605 const float*
end = ptr + numSlots;
2606 if (ptr >= fSlots.immutable.begin() &&
end <= fSlots.immutable.end()) {
2607 int index = ptr - fSlots.immutable.begin();
2608 return 'i' + this->
asRange(index, numSlots) +
' ' +
2622 return '[' + this->
imm(*ptr) +
']';
2625 std::string
text =
"[";
2628 text += separator();
2635 std::string
ptrCtx(
const void* ctx,
int numSlots)
const {
2636 const float *ctxAsSlot =
static_cast<const float*
>(ctx);
2638 if (std::string uniform = this->
uniformPtrCtx(ctxAsSlot, numSlots); !uniform.empty()) {
2648 if (ctxAsSlot >= fSlots.stack.begin() && ctxAsSlot < fSlots.stack.end()) {
2649 int stackIdx = ctxAsSlot - fSlots.stack.begin();
2651 return '$' + this->
asRange(stackIdx /
N, numSlots);
2654 return "ExternalPtr(" + this->
asRange(0, numSlots) +
")";
2659 return (std::byte*)fSlots.values.data() +
offset;
2670 bool showAsFloat =
true)
const {
2672 return {this->
offsetCtx(ctx.dst, slots),
2673 this->
imm(sk_bit_cast<float>(ctx.value), showAsFloat)};
2678 std::tuple<std::string, std::string>
binaryOpCtx(
const void* v,
int numSlots)
const {
2680 return {this->
offsetCtx(ctx.dst, numSlots),
2686 std::tuple<std::string, std::string>
copyUniformCtx(
const void* v,
int numSlots)
const {
2688 return {this->
ptrCtx(ctx->dst, numSlots),
2689 this->
multiImmCtx(
reinterpret_cast<const float*
>(ctx->src), numSlots)};
2693 std::tuple<std::string, std::string>
adjacentPtrCtx(
const void* ctx,
int numSlots)
const {
2694 const float *ctxAsSlot =
static_cast<const float*
>(ctx);
2695 return std::make_tuple(this->
ptrCtx(ctxAsSlot, numSlots),
2696 this->
ptrCtx(ctxAsSlot + (
N * numSlots), numSlots));
2708 int numSlots = (ctx.src - ctx.dst) / (
N *
sizeof(
float));
2714 int numSlots)
const {
2715 const float *ctxAsSlot =
static_cast<const float*
>(ctx);
2716 return {this->
ptrCtx(ctxAsSlot, numSlots),
2717 this->
ptrCtx(ctxAsSlot + (
N * numSlots), numSlots),
2718 this->
ptrCtx(ctxAsSlot + (2 *
N * numSlots), numSlots)};
2723 int numSlots)
const {
2730 int numSlots = ctx.delta / (
sizeof(
float) *
N);
2735 template <
typename T>
2739 if (
offset == (0 *
N *
sizeof(
float))) {
2741 }
else if (
offset == (1 *
N *
sizeof(
float))) {
2743 }
else if (
offset == (2 *
N *
sizeof(
float))) {
2745 }
else if (
offset == (3 *
N *
sizeof(
float))) {
2758 template <
typename T>
2760 size_t highestComponent = *std::max_element(
offsets.begin(),
offsets.end()) /
2767 template <
typename T>
2776 int destSlots = (
int)op - (
int)BuilderOp::swizzle_1 + 1;
2777 return {this->
offsetCtx(ctx.dst, destSlots),
2784 int destSlots = (
int)op - (
int)BuilderOp::swizzle_copy_slot_masked + 1;
2787 this->
ptrCtx(ctx->src, destSlots)};
2791 std::tuple<std::string, std::string>
shuffleCtx(
const void* v)
const {
2794 std::string
dst = this->
ptrCtx(ctx->ptr, ctx->count);
2795 std::string
src =
"(" +
dst +
")[";
2796 for (
int index = 0; index < ctx->count; ++index) {
2797 if (ctx->offsets[index] % (
N *
sizeof(
float))) {
2805 return std::make_tuple(
dst,
src);
2809 std::tuple<std::string, std::string, std::string>
matrixMultiply(
const void* v)
const {
2811 int leftMatrix = ctx.leftColumns * ctx.leftRows;
2812 int rightMatrix = ctx.rightColumns * ctx.rightRows;
2813 int resultMatrix = ctx.rightColumns * ctx.leftRows;
2814 SkRPOffset leftOffset = ctx.dst + (ctx.rightColumns * ctx.leftRows *
sizeof(
float) *
N);
2815 SkRPOffset rightOffset = leftOffset + (ctx.leftColumns * ctx.leftRows *
sizeof(
float) *
N);
2819 this->offsetCtx(ctx.dst, resultMatrix).c_str()),
2823 this->offsetCtx(leftOffset, leftMatrix).c_str()),
2827 this->offsetCtx(rightOffset, rightMatrix).c_str())};
2847 fSlots = fProgram.allocateSlotData(&alloc);
2848 float* uniformPtr = alloc.
makeArray<
float>(fProgram.fNumUniformSlots);
2849 fUniforms =
SkSpan(uniformPtr, fProgram.fNumUniformSlots);
2852 fProgram.makeStages(&fStages, &alloc, fUniforms, fSlots);
2859 if (writeInstructionCount) {
2860 int invocationCount = 0, instructionCount = 0;
2861 for (
const Stage& stage : fStages) {
2867 case POp::invoke_shader:
2868 case POp::invoke_color_filter:
2869 case POp::invoke_blender:
2870 case POp::invoke_to_linear_srgb:
2871 case POp::invoke_from_linear_srgb:
2882 out->writeText(
" instructions");
2883 if (invocationCount > 0) {
2884 out->writeText(
", ");
2886 out->writeText(
" invocations");
2888 out->writeText(
"\n\n");
2892 const char*
header =
"[immutable slots]\n";
2893 const char* footer =
"";
2897 out->writeText(
"i");
2899 out->writeText(
" = ");
2900 out->writeText(this->
imm(sk_bit_cast<float>(
inst.fImmA)).c_str());
2901 out->writeText(
"\n");
2907 out->writeText(footer);
2910 for (
int index = 0; index < fStages.
size(); ++index) {
2911 const Stage& stage = fStages[index];
2913 std::string opArg1, opArg2, opArg3, opSwizzle;
2916 case POp::invoke_shader:
2917 case POp::invoke_color_filter:
2918 case POp::invoke_blender:
2919 opArg1 = this->
immCtx(stage.ctx,
false);
2922 case POp::case_op: {
2929 case POp::swizzle_1:
2930 case POp::swizzle_2:
2931 case POp::swizzle_3:
2932 case POp::swizzle_4:
2933 std::tie(opArg1, opArg2) = this->
swizzleCtx(stage.op, stage.ctx);
2936 case POp::swizzle_copy_slot_masked:
2937 case POp::swizzle_copy_2_slots_masked:
2938 case POp::swizzle_copy_3_slots_masked:
2939 case POp::swizzle_copy_4_slots_masked:
2940 std::tie(opArg1, opArg2) = this->
swizzleCopyCtx(stage.op, stage.ctx);
2943 case POp::refract_4_floats:
2945 opArg3 = this->
ptrCtx((
const float*)(stage.ctx) + (8 *
N), 1);
2948 case POp::dot_2_floats:
2949 opArg1 = this->
ptrCtx(stage.ctx, 1);
2953 case POp::dot_3_floats:
2954 opArg1 = this->
ptrCtx(stage.ctx, 1);
2958 case POp::dot_4_floats:
2959 opArg1 = this->
ptrCtx(stage.ctx, 1);
2964 std::tie(opArg1, opArg2) = this->
shuffleCtx(stage.ctx);
2967 case POp::matrix_multiply_2:
2968 case POp::matrix_multiply_3:
2969 case POp::matrix_multiply_4:
2970 std::tie(opArg1, opArg2, opArg3) = this->
matrixMultiply(stage.ctx);
2973 case POp::load_condition_mask:
2974 case POp::store_condition_mask:
2975 case POp::load_loop_mask:
2976 case POp::store_loop_mask:
2977 case POp::merge_loop_mask:
2978 case POp::reenable_loop_mask:
2979 case POp::load_return_mask:
2980 case POp::store_return_mask:
2981 case POp::continue_op:
2982 case POp::cast_to_float_from_int:
case POp::cast_to_float_from_uint:
2983 case POp::cast_to_int_from_float:
case POp::cast_to_uint_from_float:
2985 case POp::acos_float:
2986 case POp::asin_float:
2987 case POp::atan_float:
2988 case POp::ceil_float:
2989 case POp::cos_float:
2990 case POp::exp_float:
2991 case POp::exp2_float:
2992 case POp::log_float:
2993 case POp::log2_float:
2994 case POp::floor_float:
2995 case POp::invsqrt_float:
2996 case POp::sin_float:
2997 case POp::sqrt_float:
2998 case POp::tan_float:
2999 opArg1 = this->
ptrCtx(stage.ctx, 1);
3002 case POp::store_src_rg:
3003 case POp::cast_to_float_from_2_ints:
case POp::cast_to_float_from_2_uints:
3004 case POp::cast_to_int_from_2_floats:
case POp::cast_to_uint_from_2_floats:
3005 case POp::abs_2_ints:
3006 case POp::ceil_2_floats:
3007 case POp::floor_2_floats:
3008 case POp::invsqrt_2_floats:
3009 opArg1 = this->
ptrCtx(stage.ctx, 2);
3012 case POp::cast_to_float_from_3_ints:
case POp::cast_to_float_from_3_uints:
3013 case POp::cast_to_int_from_3_floats:
case POp::cast_to_uint_from_3_floats:
3014 case POp::abs_3_ints:
3015 case POp::ceil_3_floats:
3016 case POp::floor_3_floats:
3017 case POp::invsqrt_3_floats:
3018 opArg1 = this->
ptrCtx(stage.ctx, 3);
3023 case POp::exchange_src:
3024 case POp::store_src:
3025 case POp::store_dst:
3026 case POp::store_device_xy01:
3027 case POp::invoke_to_linear_srgb:
3028 case POp::invoke_from_linear_srgb:
3029 case POp::cast_to_float_from_4_ints:
case POp::cast_to_float_from_4_uints:
3030 case POp::cast_to_int_from_4_floats:
case POp::cast_to_uint_from_4_floats:
3031 case POp::abs_4_ints:
3032 case POp::ceil_4_floats:
3033 case POp::floor_4_floats:
3034 case POp::invsqrt_4_floats:
3035 case POp::inverse_mat2:
3036 opArg1 = this->
ptrCtx(stage.ctx, 4);
3039 case POp::inverse_mat3:
3040 opArg1 = this->
ptrCtx(stage.ctx, 9);
3043 case POp::inverse_mat4:
3044 opArg1 = this->
ptrCtx(stage.ctx, 16);
3047 case POp::copy_constant:
3048 case POp::add_imm_float:
3049 case POp::mul_imm_float:
3050 case POp::cmple_imm_float:
3051 case POp::cmplt_imm_float:
3052 case POp::cmpeq_imm_float:
3053 case POp::cmpne_imm_float:
3054 case POp::min_imm_float:
3055 case POp::max_imm_float:
3056 std::tie(opArg1, opArg2) = this->
constantCtx(stage.ctx, 1);
3059 case POp::add_imm_int:
3060 case POp::mul_imm_int:
3061 case POp::bitwise_and_imm_int:
3062 case POp::bitwise_xor_imm_int:
3063 case POp::cmple_imm_int:
3064 case POp::cmple_imm_uint:
3065 case POp::cmplt_imm_int:
3066 case POp::cmplt_imm_uint:
3067 case POp::cmpeq_imm_int:
3068 case POp::cmpne_imm_int:
3069 std::tie(opArg1, opArg2) = this->
constantCtx(stage.ctx, 1,
false);
3072 case POp::splat_2_constants:
3073 case POp::bitwise_and_imm_2_ints:
3074 std::tie(opArg1, opArg2) = this->
constantCtx(stage.ctx, 2);
3077 case POp::splat_3_constants:
3078 case POp::bitwise_and_imm_3_ints:
3079 std::tie(opArg1, opArg2) = this->
constantCtx(stage.ctx, 3);
3082 case POp::splat_4_constants:
3083 case POp::bitwise_and_imm_4_ints:
3084 std::tie(opArg1, opArg2) = this->
constantCtx(stage.ctx, 4);
3087 case POp::copy_uniform:
3091 case POp::copy_2_uniforms:
3095 case POp::copy_3_uniforms:
3099 case POp::copy_4_uniforms:
3103 case POp::copy_slot_masked:
3104 case POp::copy_slot_unmasked:
3105 case POp::copy_immutable_unmasked:
3106 std::tie(opArg1, opArg2) = this->
binaryOpCtx(stage.ctx, 1);
3109 case POp::copy_2_slots_masked:
3110 case POp::copy_2_slots_unmasked:
3111 case POp::copy_2_immutables_unmasked:
3112 std::tie(opArg1, opArg2) = this->
binaryOpCtx(stage.ctx, 2);
3115 case POp::copy_3_slots_masked:
3116 case POp::copy_3_slots_unmasked:
3117 case POp::copy_3_immutables_unmasked:
3118 std::tie(opArg1, opArg2) = this->
binaryOpCtx(stage.ctx, 3);
3121 case POp::copy_4_slots_masked:
3122 case POp::copy_4_slots_unmasked:
3123 case POp::copy_4_immutables_unmasked:
3124 std::tie(opArg1, opArg2) = this->
binaryOpCtx(stage.ctx, 4);
3127 case POp::copy_from_indirect_uniform_unmasked:
3128 case POp::copy_from_indirect_unmasked:
3129 case POp::copy_to_indirect_masked: {
3132 opArg1 = this->
ptrCtx(ctx->dst, ctx->slots);
3133 opArg2 = this->
ptrCtx(ctx->src, ctx->slots);
3134 opArg3 = this->
ptrCtx(ctx->indirectOffset, 1);
3137 case POp::swizzle_copy_to_indirect_masked: {
3139 opArg1 = this->
ptrCtx(ctx->dst, this->swizzleWidth(
SkSpan(ctx->offsets,
3141 opArg2 = this->
ptrCtx(ctx->src, ctx->slots);
3142 opArg3 = this->
ptrCtx(ctx->indirectOffset, 1);
3146 case POp::merge_condition_mask:
3147 case POp::merge_inv_condition_mask:
3148 case POp::add_float:
case POp::add_int:
3149 case POp::sub_float:
case POp::sub_int:
3150 case POp::mul_float:
case POp::mul_int:
3151 case POp::div_float:
case POp::div_int:
case POp::div_uint:
3152 case POp::bitwise_and_int:
3153 case POp::bitwise_or_int:
3154 case POp::bitwise_xor_int:
3155 case POp::mod_float:
3156 case POp::min_float:
case POp::min_int:
case POp::min_uint:
3157 case POp::max_float:
case POp::max_int:
case POp::max_uint:
3158 case POp::cmplt_float:
case POp::cmplt_int:
case POp::cmplt_uint:
3159 case POp::cmple_float:
case POp::cmple_int:
case POp::cmple_uint:
3160 case POp::cmpeq_float:
case POp::cmpeq_int:
3161 case POp::cmpne_float:
case POp::cmpne_int:
3165 case POp::mix_float:
case POp::mix_int:
3166 std::tie(opArg1, opArg2, opArg3) = this->
adjacent3PtrCtx(stage.ctx, 1);
3169 case POp::add_2_floats:
case POp::add_2_ints:
3170 case POp::sub_2_floats:
case POp::sub_2_ints:
3171 case POp::mul_2_floats:
case POp::mul_2_ints:
3172 case POp::div_2_floats:
case POp::div_2_ints:
case POp::div_2_uints:
3173 case POp::bitwise_and_2_ints:
3174 case POp::bitwise_or_2_ints:
3175 case POp::bitwise_xor_2_ints:
3176 case POp::mod_2_floats:
3177 case POp::min_2_floats:
case POp::min_2_ints:
case POp::min_2_uints:
3178 case POp::max_2_floats:
case POp::max_2_ints:
case POp::max_2_uints:
3179 case POp::cmplt_2_floats:
case POp::cmplt_2_ints:
case POp::cmplt_2_uints:
3180 case POp::cmple_2_floats:
case POp::cmple_2_ints:
case POp::cmple_2_uints:
3181 case POp::cmpeq_2_floats:
case POp::cmpeq_2_ints:
3182 case POp::cmpne_2_floats:
case POp::cmpne_2_ints:
3186 case POp::mix_2_floats:
case POp::mix_2_ints:
3187 std::tie(opArg1, opArg2, opArg3) = this->
adjacent3PtrCtx(stage.ctx, 2);
3190 case POp::add_3_floats:
case POp::add_3_ints:
3191 case POp::sub_3_floats:
case POp::sub_3_ints:
3192 case POp::mul_3_floats:
case POp::mul_3_ints:
3193 case POp::div_3_floats:
case POp::div_3_ints:
case POp::div_3_uints:
3194 case POp::bitwise_and_3_ints:
3195 case POp::bitwise_or_3_ints:
3196 case POp::bitwise_xor_3_ints:
3197 case POp::mod_3_floats:
3198 case POp::min_3_floats:
case POp::min_3_ints:
case POp::min_3_uints:
3199 case POp::max_3_floats:
case POp::max_3_ints:
case POp::max_3_uints:
3200 case POp::cmplt_3_floats:
case POp::cmplt_3_ints:
case POp::cmplt_3_uints:
3201 case POp::cmple_3_floats:
case POp::cmple_3_ints:
case POp::cmple_3_uints:
3202 case POp::cmpeq_3_floats:
case POp::cmpeq_3_ints:
3203 case POp::cmpne_3_floats:
case POp::cmpne_3_ints:
3207 case POp::mix_3_floats:
case POp::mix_3_ints:
3208 std::tie(opArg1, opArg2, opArg3) = this->
adjacent3PtrCtx(stage.ctx, 3);
3211 case POp::add_4_floats:
case POp::add_4_ints:
3212 case POp::sub_4_floats:
case POp::sub_4_ints:
3213 case POp::mul_4_floats:
case POp::mul_4_ints:
3214 case POp::div_4_floats:
case POp::div_4_ints:
case POp::div_4_uints:
3215 case POp::bitwise_and_4_ints:
3216 case POp::bitwise_or_4_ints:
3217 case POp::bitwise_xor_4_ints:
3218 case POp::mod_4_floats:
3219 case POp::min_4_floats:
case POp::min_4_ints:
case POp::min_4_uints:
3220 case POp::max_4_floats:
case POp::max_4_ints:
case POp::max_4_uints:
3221 case POp::cmplt_4_floats:
case POp::cmplt_4_ints:
case POp::cmplt_4_uints:
3222 case POp::cmple_4_floats:
case POp::cmple_4_ints:
case POp::cmple_4_uints:
3223 case POp::cmpeq_4_floats:
case POp::cmpeq_4_ints:
3224 case POp::cmpne_4_floats:
case POp::cmpne_4_ints:
3228 case POp::mix_4_floats:
case POp::mix_4_ints:
3229 std::tie(opArg1, opArg2, opArg3) = this->
adjacent3PtrCtx(stage.ctx, 4);
3232 case POp::add_n_floats:
case POp::add_n_ints:
3233 case POp::sub_n_floats:
case POp::sub_n_ints:
3234 case POp::mul_n_floats:
case POp::mul_n_ints:
3235 case POp::div_n_floats:
case POp::div_n_ints:
case POp::div_n_uints:
3236 case POp::bitwise_and_n_ints:
3237 case POp::bitwise_or_n_ints:
3238 case POp::bitwise_xor_n_ints:
3239 case POp::mod_n_floats:
3240 case POp::min_n_floats:
case POp::min_n_ints:
case POp::min_n_uints:
3241 case POp::max_n_floats:
case POp::max_n_ints:
case POp::max_n_uints:
3242 case POp::cmplt_n_floats:
case POp::cmplt_n_ints:
case POp::cmplt_n_uints:
3243 case POp::cmple_n_floats:
case POp::cmple_n_ints:
case POp::cmple_n_uints:
3244 case POp::cmpeq_n_floats:
case POp::cmpeq_n_ints:
3245 case POp::cmpne_n_floats:
case POp::cmpne_n_ints:
3246 case POp::atan2_n_floats:
3247 case POp::pow_n_floats:
3251 case POp::mix_n_floats:
case POp::mix_n_ints:
3252 case POp::smoothstep_n_floats:
3257 case POp::branch_if_all_lanes_active:
3258 case POp::branch_if_any_lanes_active:
3259 case POp::branch_if_no_lanes_active:
3264 case POp::branch_if_no_active_lanes_eq: {
3267 opArg2 = this->
ptrCtx(ctx->ptr, 1);
3268 opArg3 = this->
imm(sk_bit_cast<float>(ctx->value));
3271 case POp::trace_var: {
3273 opArg1 = this->
ptrCtx(ctx->traceMask, 1);
3274 opArg2 = this->
ptrCtx(ctx->data, ctx->numSlots);
3275 if (ctx->indirectOffset !=
nullptr) {
3276 opArg3 =
" + " + this->
ptrCtx(ctx->indirectOffset, 1);
3280 case POp::trace_line: {
3282 opArg1 = this->
ptrCtx(ctx->traceMask, 1);
3286 case POp::trace_enter:
3287 case POp::trace_exit: {
3289 opArg1 = this->
ptrCtx(ctx->traceMask, 1);
3290 opArg2 = (fProgram.fDebugTrace &&
3291 ctx->funcIdx >= 0 &&
3292 ctx->funcIdx < (
int)fProgram.fDebugTrace->
fFuncInfo.size())
3293 ? fProgram.fDebugTrace->
fFuncInfo[ctx->funcIdx].name
3297 case POp::trace_scope: {
3299 opArg1 = this->
ptrCtx(ctx->traceMask, 1);
3307 std::string_view opName;
3309 #define M(x) case POp::x: opName = #x; break;
3317 case POp::trace_var:
3318 opText =
"TraceVar(" + opArg2 + opArg3 +
") when " + opArg1 +
" is true";
3321 case POp::trace_line:
3322 opText =
"TraceLine(" + opArg2 +
") when " + opArg1 +
" is true";
3325 case POp::trace_enter:
3326 opText =
"TraceEnter(" + opArg2 +
") when " + opArg1 +
" is true";
3329 case POp::trace_exit:
3330 opText =
"TraceExit(" + opArg2 +
") when " + opArg1 +
" is true";
3333 case POp::trace_scope:
3334 opText =
"TraceScope(" + opArg2 +
") when " + opArg1 +
" is true";
3337 case POp::init_lane_masks:
3338 opText =
"CondMask = LoopMask = RetMask = true";
3341 case POp::load_condition_mask:
3342 opText =
"CondMask = " + opArg1;
3345 case POp::store_condition_mask:
3346 opText = opArg1 +
" = CondMask";
3349 case POp::merge_condition_mask:
3350 opText =
"CondMask = " + opArg1 +
" & " + opArg2;
3353 case POp::merge_inv_condition_mask:
3354 opText =
"CondMask = " + opArg1 +
" & ~" + opArg2;
3357 case POp::load_loop_mask:
3358 opText =
"LoopMask = " + opArg1;
3361 case POp::store_loop_mask:
3362 opText = opArg1 +
" = LoopMask";
3365 case POp::mask_off_loop_mask:
3366 opText =
"LoopMask &= ~(CondMask & LoopMask & RetMask)";
3369 case POp::reenable_loop_mask:
3370 opText =
"LoopMask |= " + opArg1;
3373 case POp::merge_loop_mask:
3374 opText =
"LoopMask &= " + opArg1;
3377 case POp::load_return_mask:
3378 opText =
"RetMask = " + opArg1;
3381 case POp::store_return_mask:
3382 opText = opArg1 +
" = RetMask";
3385 case POp::mask_off_return_mask:
3386 opText =
"RetMask &= ~(CondMask & LoopMask & RetMask)";
3389 case POp::store_src_rg:
3390 opText = opArg1 +
" = src.rg";
3393 case POp::exchange_src:
3394 opText =
"swap(src.rgba, " + opArg1 +
")";
3397 case POp::store_src:
3398 opText = opArg1 +
" = src.rgba";
3401 case POp::store_dst:
3402 opText = opArg1 +
" = dst.rgba";
3405 case POp::store_device_xy01:
3406 opText = opArg1 +
" = DeviceCoords.xy01";
3410 opText =
"src.rgba = " + opArg1;
3414 opText =
"dst.rgba = " + opArg1;
3417 case POp::bitwise_and_int:
3418 case POp::bitwise_and_2_ints:
3419 case POp::bitwise_and_3_ints:
3420 case POp::bitwise_and_4_ints:
3421 case POp::bitwise_and_n_ints:
3422 case POp::bitwise_and_imm_int:
3423 case POp::bitwise_and_imm_2_ints:
3424 case POp::bitwise_and_imm_3_ints:
3425 case POp::bitwise_and_imm_4_ints:
3426 opText = opArg1 +
" &= " + opArg2;
3429 case POp::bitwise_or_int:
3430 case POp::bitwise_or_2_ints:
3431 case POp::bitwise_or_3_ints:
3432 case POp::bitwise_or_4_ints:
3433 case POp::bitwise_or_n_ints:
3434 opText = opArg1 +
" |= " + opArg2;
3437 case POp::bitwise_xor_int:
3438 case POp::bitwise_xor_2_ints:
3439 case POp::bitwise_xor_3_ints:
3440 case POp::bitwise_xor_4_ints:
3441 case POp::bitwise_xor_n_ints:
3442 case POp::bitwise_xor_imm_int:
3443 opText = opArg1 +
" ^= " + opArg2;
3446 case POp::cast_to_float_from_int:
3447 case POp::cast_to_float_from_2_ints:
3448 case POp::cast_to_float_from_3_ints:
3449 case POp::cast_to_float_from_4_ints:
3450 opText = opArg1 +
" = IntToFloat(" + opArg1 +
")";
3453 case POp::cast_to_float_from_uint:
3454 case POp::cast_to_float_from_2_uints:
3455 case POp::cast_to_float_from_3_uints:
3456 case POp::cast_to_float_from_4_uints:
3457 opText = opArg1 +
" = UintToFloat(" + opArg1 +
")";
3460 case POp::cast_to_int_from_float:
3461 case POp::cast_to_int_from_2_floats:
3462 case POp::cast_to_int_from_3_floats:
3463 case POp::cast_to_int_from_4_floats:
3464 opText = opArg1 +
" = FloatToInt(" + opArg1 +
")";
3467 case POp::cast_to_uint_from_float:
3468 case POp::cast_to_uint_from_2_floats:
3469 case POp::cast_to_uint_from_3_floats:
3470 case POp::cast_to_uint_from_4_floats:
3471 opText = opArg1 +
" = FloatToUint(" + opArg1 +
")";
3474 case POp::copy_slot_masked:
case POp::copy_2_slots_masked:
3475 case POp::copy_3_slots_masked:
case POp::copy_4_slots_masked:
3476 case POp::swizzle_copy_slot_masked:
case POp::swizzle_copy_2_slots_masked:
3477 case POp::swizzle_copy_3_slots_masked:
case POp::swizzle_copy_4_slots_masked:
3478 opText = opArg1 +
" = Mask(" + opArg2 +
")";
3481 case POp::copy_uniform:
case POp::copy_2_uniforms:
3482 case POp::copy_3_uniforms:
case POp::copy_4_uniforms:
3483 case POp::copy_slot_unmasked:
case POp::copy_2_slots_unmasked:
3484 case POp::copy_3_slots_unmasked:
case POp::copy_4_slots_unmasked:
3485 case POp::copy_immutable_unmasked:
case POp::copy_2_immutables_unmasked:
3486 case POp::copy_3_immutables_unmasked:
case POp::copy_4_immutables_unmasked:
3487 case POp::copy_constant:
case POp::splat_2_constants:
3488 case POp::splat_3_constants:
case POp::splat_4_constants:
3489 case POp::swizzle_1:
case POp::swizzle_2:
3490 case POp::swizzle_3:
case POp::swizzle_4:
3492 opText = opArg1 +
" = " + opArg2;
3495 case POp::copy_from_indirect_unmasked:
3496 case POp::copy_from_indirect_uniform_unmasked:
3497 opText = opArg1 +
" = Indirect(" + opArg2 +
" + " + opArg3 +
")";
3500 case POp::copy_to_indirect_masked:
3501 opText =
"Indirect(" + opArg1 +
" + " + opArg3 +
") = Mask(" + opArg2 +
")";
3504 case POp::swizzle_copy_to_indirect_masked:
3505 opText =
"Indirect(" + opArg1 +
" + " + opArg3 +
")." + opSwizzle +
" = Mask(" +
3510 case POp::abs_2_ints:
3511 case POp::abs_3_ints:
3512 case POp::abs_4_ints:
3513 opText = opArg1 +
" = abs(" + opArg1 +
")";
3516 case POp::acos_float:
3517 opText = opArg1 +
" = acos(" + opArg1 +
")";
3520 case POp::asin_float:
3521 opText = opArg1 +
" = asin(" + opArg1 +
")";
3524 case POp::atan_float:
3525 opText = opArg1 +
" = atan(" + opArg1 +
")";
3528 case POp::atan2_n_floats:
3529 opText = opArg1 +
" = atan2(" + opArg1 +
", " + opArg2 +
")";
3532 case POp::ceil_float:
3533 case POp::ceil_2_floats:
3534 case POp::ceil_3_floats:
3535 case POp::ceil_4_floats:
3536 opText = opArg1 +
" = ceil(" + opArg1 +
")";
3539 case POp::cos_float:
3540 opText = opArg1 +
" = cos(" + opArg1 +
")";
3543 case POp::refract_4_floats:
3544 opText = opArg1 +
" = refract(" + opArg1 +
", " + opArg2 +
", " + opArg3 +
")";
3547 case POp::dot_2_floats:
3548 case POp::dot_3_floats:
3549 case POp::dot_4_floats:
3550 opText = opArg1 +
" = dot(" + opArg2 +
", " + opArg3 +
")";
3553 case POp::exp_float:
3554 opText = opArg1 +
" = exp(" + opArg1 +
")";
3557 case POp::exp2_float:
3558 opText = opArg1 +
" = exp2(" + opArg1 +
")";
3561 case POp::log_float:
3562 opText = opArg1 +
" = log(" + opArg1 +
")";
3565 case POp::log2_float:
3566 opText = opArg1 +
" = log2(" + opArg1 +
")";
3569 case POp::pow_n_floats:
3570 opText = opArg1 +
" = pow(" + opArg1 +
", " + opArg2 +
")";
3573 case POp::sin_float:
3574 opText = opArg1 +
" = sin(" + opArg1 +
")";
3577 case POp::sqrt_float:
3578 opText = opArg1 +
" = sqrt(" + opArg1 +
")";
3581 case POp::tan_float:
3582 opText = opArg1 +
" = tan(" + opArg1 +
")";
3585 case POp::floor_float:
3586 case POp::floor_2_floats:
3587 case POp::floor_3_floats:
3588 case POp::floor_4_floats:
3589 opText = opArg1 +
" = floor(" + opArg1 +
")";
3592 case POp::invsqrt_float:
3593 case POp::invsqrt_2_floats:
3594 case POp::invsqrt_3_floats:
3595 case POp::invsqrt_4_floats:
3596 opText = opArg1 +
" = inversesqrt(" + opArg1 +
")";
3599 case POp::inverse_mat2:
3600 case POp::inverse_mat3:
3601 case POp::inverse_mat4:
3602 opText = opArg1 +
" = inverse(" + opArg1 +
")";
3605 case POp::add_float:
case POp::add_int:
3606 case POp::add_2_floats:
case POp::add_2_ints:
3607 case POp::add_3_floats:
case POp::add_3_ints:
3608 case POp::add_4_floats:
case POp::add_4_ints:
3609 case POp::add_n_floats:
case POp::add_n_ints:
3610 case POp::add_imm_float:
case POp::add_imm_int:
3611 opText = opArg1 +
" += " + opArg2;
3614 case POp::sub_float:
case POp::sub_int:
3615 case POp::sub_2_floats:
case POp::sub_2_ints:
3616 case POp::sub_3_floats:
case POp::sub_3_ints:
3617 case POp::sub_4_floats:
case POp::sub_4_ints:
3618 case POp::sub_n_floats:
case POp::sub_n_ints:
3619 opText = opArg1 +
" -= " + opArg2;
3622 case POp::mul_float:
case POp::mul_int:
3623 case POp::mul_2_floats:
case POp::mul_2_ints:
3624 case POp::mul_3_floats:
case POp::mul_3_ints:
3625 case POp::mul_4_floats:
case POp::mul_4_ints:
3626 case POp::mul_n_floats:
case POp::mul_n_ints:
3627 case POp::mul_imm_float:
case POp::mul_imm_int:
3628 opText = opArg1 +
" *= " + opArg2;
3631 case POp::div_float:
case POp::div_int:
case POp::div_uint:
3632 case POp::div_2_floats:
case POp::div_2_ints:
case POp::div_2_uints:
3633 case POp::div_3_floats:
case POp::div_3_ints:
case POp::div_3_uints:
3634 case POp::div_4_floats:
case POp::div_4_ints:
case POp::div_4_uints:
3635 case POp::div_n_floats:
case POp::div_n_ints:
case POp::div_n_uints:
3636 opText = opArg1 +
" /= " + opArg2;
3639 case POp::matrix_multiply_2:
3640 case POp::matrix_multiply_3:
3641 case POp::matrix_multiply_4:
3642 opText = opArg1 +
" = " + opArg2 +
" * " + opArg3;
3645 case POp::mod_float:
3646 case POp::mod_2_floats:
3647 case POp::mod_3_floats:
3648 case POp::mod_4_floats:
3649 case POp::mod_n_floats:
3650 opText = opArg1 +
" = mod(" + opArg1 +
", " + opArg2 +
")";
3653 case POp::min_float:
case POp::min_int:
case POp::min_uint:
3654 case POp::min_2_floats:
case POp::min_2_ints:
case POp::min_2_uints:
3655 case POp::min_3_floats:
case POp::min_3_ints:
case POp::min_3_uints:
3656 case POp::min_4_floats:
case POp::min_4_ints:
case POp::min_4_uints:
3657 case POp::min_n_floats:
case POp::min_n_ints:
case POp::min_n_uints:
3658 case POp::min_imm_float:
3659 opText = opArg1 +
" = min(" + opArg1 +
", " + opArg2 +
")";
3662 case POp::max_float:
case POp::max_int:
case POp::max_uint:
3663 case POp::max_2_floats:
case POp::max_2_ints:
case POp::max_2_uints:
3664 case POp::max_3_floats:
case POp::max_3_ints:
case POp::max_3_uints:
3665 case POp::max_4_floats:
case POp::max_4_ints:
case POp::max_4_uints:
3666 case POp::max_n_floats:
case POp::max_n_ints:
case POp::max_n_uints:
3667 case POp::max_imm_float:
3668 opText = opArg1 +
" = max(" + opArg1 +
", " + opArg2 +
")";
3671 case POp::cmplt_float:
case POp::cmplt_int:
case POp::cmplt_uint:
3672 case POp::cmplt_2_floats:
case POp::cmplt_2_ints:
case POp::cmplt_2_uints:
3673 case POp::cmplt_3_floats:
case POp::cmplt_3_ints:
case POp::cmplt_3_uints:
3674 case POp::cmplt_4_floats:
case POp::cmplt_4_ints:
case POp::cmplt_4_uints:
3675 case POp::cmplt_n_floats:
case POp::cmplt_n_ints:
case POp::cmplt_n_uints:
3676 case POp::cmplt_imm_float:
case POp::cmplt_imm_int:
case POp::cmplt_imm_uint:
3677 opText = opArg1 +
" = lessThan(" + opArg1 +
", " + opArg2 +
")";
3680 case POp::cmple_float:
case POp::cmple_int:
case POp::cmple_uint:
3681 case POp::cmple_2_floats:
case POp::cmple_2_ints:
case POp::cmple_2_uints:
3682 case POp::cmple_3_floats:
case POp::cmple_3_ints:
case POp::cmple_3_uints:
3683 case POp::cmple_4_floats:
case POp::cmple_4_ints:
case POp::cmple_4_uints:
3684 case POp::cmple_n_floats:
case POp::cmple_n_ints:
case POp::cmple_n_uints:
3685 case POp::cmple_imm_float:
case POp::cmple_imm_int:
case POp::cmple_imm_uint:
3686 opText = opArg1 +
" = lessThanEqual(" + opArg1 +
", " + opArg2 +
")";
3689 case POp::cmpeq_float:
case POp::cmpeq_int:
3690 case POp::cmpeq_2_floats:
case POp::cmpeq_2_ints:
3691 case POp::cmpeq_3_floats:
case POp::cmpeq_3_ints:
3692 case POp::cmpeq_4_floats:
case POp::cmpeq_4_ints:
3693 case POp::cmpeq_n_floats:
case POp::cmpeq_n_ints:
3694 case POp::cmpeq_imm_float:
case POp::cmpeq_imm_int:
3695 opText = opArg1 +
" = equal(" + opArg1 +
", " + opArg2 +
")";
3698 case POp::cmpne_float:
case POp::cmpne_int:
3699 case POp::cmpne_2_floats:
case POp::cmpne_2_ints:
3700 case POp::cmpne_3_floats:
case POp::cmpne_3_ints:
3701 case POp::cmpne_4_floats:
case POp::cmpne_4_ints:
3702 case POp::cmpne_n_floats:
case POp::cmpne_n_ints:
3703 case POp::cmpne_imm_float:
case POp::cmpne_imm_int:
3704 opText = opArg1 +
" = notEqual(" + opArg1 +
", " + opArg2 +
")";
3707 case POp::mix_float:
case POp::mix_int:
3708 case POp::mix_2_floats:
case POp::mix_2_ints:
3709 case POp::mix_3_floats:
case POp::mix_3_ints:
3710 case POp::mix_4_floats:
case POp::mix_4_ints:
3711 case POp::mix_n_floats:
case POp::mix_n_ints:
3712 opText = opArg1 +
" = mix(" + opArg2 +
", " + opArg3 +
", " + opArg1 +
")";
3715 case POp::smoothstep_n_floats:
3716 opText = opArg1 +
" = smoothstep(" + opArg1 +
", " + opArg2 +
", " + opArg3 +
")";
3720 case POp::branch_if_all_lanes_active:
3721 case POp::branch_if_any_lanes_active:
3722 case POp::branch_if_no_lanes_active:
3723 case POp::invoke_shader:
3724 case POp::invoke_color_filter:
3725 case POp::invoke_blender:
3726 opText = std::string(opName) +
" " + opArg1;
3729 case POp::invoke_to_linear_srgb:
3730 opText = opArg1 +
" = toLinearSrgb(" + opArg1 +
")";
3733 case POp::invoke_from_linear_srgb:
3734 opText = opArg1 +
" = fromLinearSrgb(" + opArg1 +
")";
3737 case POp::branch_if_no_active_lanes_eq:
3738 opText =
"branch " + opArg1 +
" if no lanes of " + opArg2 +
" == " + opArg3;
3742 opText =
"label " + opArg1;
3746 opText =
"if (" + opArg1 +
" == " + opArg3 +
3747 ") { LoopMask = true; " + opArg2 +
" = false; }";
3750 case POp::continue_op:
3752 " |= Mask(0xFFFFFFFF); LoopMask &= ~(CondMask & LoopMask & RetMask)";
3759 opName = opName.substr(0, 30);
3760 if (!opText.empty()) {
3762 (
int)opName.size(), opName.data(),
3763 opText.c_str()).c_str());
3766 (
int)opName.size(), opName.data()).c_str());
3772 Dumper(*this).dump(
out, writeInstructionCount);
sk_bzero(glyphs, sizeof(glyphs))
#define SkDEBUGFAIL(message)
#define SkDEBUGFAILF(fmt,...)
#define SkASSERTF(cond, fmt,...)
#define SK_RASTER_PIPELINE_OPS_ALL(M)
static constexpr int kNumRasterPipelineHighpOps
#define ALL_N_WAY_TERNARY_OP_CASES
#define ALL_IMMEDIATE_BINARY_OP_CASES
#define ALL_MULTI_SLOT_TERNARY_OP_CASES
#define ALL_N_WAY_BINARY_OP_CASES
#define ALL_MULTI_SLOT_UNARY_OP_CASES
#define ALL_MULTI_SLOT_BINARY_OP_CASES
#define ALL_IMMEDIATE_MULTI_SLOT_BINARY_OP_CASES
#define ALL_SINGLE_SLOT_UNARY_OP_CASES
#define SKRP_EXTENDED_OPS(M)
SkSpan(Container &&) -> SkSpan< std::remove_pointer_t< decltype(std::data(std::declval< Container >()))> >
static void copy(void *dst, const uint8_t *src, int width, int bpp, int deltaSrc, int offset, const SkPMColor ctable[])
constexpr int SkToInt(S x)
void * makeBytesAlignedTo(size_t size, size_t align)
T * makeArray(size_t count)
auto make(Ctor &&ctor) -> decltype(ctor(nullptr))
void append(SkRasterPipelineOp, void *=nullptr)
std::vector< SlotDebugInfo > fUniformInfo
std::vector< FunctionDebugInfo > fFuncInfo
std::vector< TraceInfo > fTraceInfo
std::vector< SlotDebugInfo > fSlotInfo
void dot_floats(int32_t slots)
void copy_stack_to_slots_indirect(SlotRange fixedRange, int dynamicStackID, SlotRange limitRange)
void push_duplicates(int count)
void copy_slots_unmasked(SlotRange dst, SlotRange src)
void pad_stack(int32_t count)
void binary_op(BuilderOp op, int32_t slots)
void swizzle_copy_stack_to_slots(SlotRange dst, SkSpan< const int8_t > components, int offsetFromStackTop)
void copy_uniform_to_slots_unmasked(SlotRange dst, SlotRange src)
void copy_constant(Slot slot, int constantValue)
void branch_if_no_lanes_active(int labelID)
void merge_condition_mask()
void push_clone_indirect_from_stack(SlotRange fixedOffset, int dynamicStackID, int otherStackID, int offsetFromStackTop)
void push_clone(int numSlots, int offsetFromStackTop=0)
void swizzle_copy_stack_to_slots_indirect(SlotRange fixedRange, int dynamicStackID, SlotRange limitRange, SkSpan< const int8_t > components, int offsetFromStackTop)
void zero_slots_unmasked(SlotRange dst)
void swizzle(int consumedSlots, SkSpan< const int8_t > components)
void matrix_resize(int origColumns, int origRows, int newColumns, int newRows)
void branch_if_no_active_lanes_on_stack_top_equal(int value, int labelID)
void inverse_matrix(int32_t n)
void push_uniform(SlotRange src)
void pop_slots(SlotRange dst)
void transpose(int columns, int rows)
void discard_stack(int32_t count, int stackID)
void trace_var_indirect(int traceMaskStackID, SlotRange fixedRange, int dynamicStackID, SlotRange limitRange)
void copy_stack_to_slots(SlotRange dst)
void copy_immutable_unmasked(SlotRange dst, SlotRange src)
std::unique_ptr< Program > finish(int numValueSlots, int numUniformSlots, int numImmutableSlots, DebugTracePriv *debugTrace=nullptr)
void push_slots_or_immutable_indirect(SlotRange fixedRange, int dynamicStack, SlotRange limitRange, BuilderOp op)
void push_uniform_indirect(SlotRange fixedRange, int dynamicStack, SlotRange limitRange)
void ternary_op(BuilderOp op, int32_t slots)
void push_constant_f(float val)
void diagonal_matrix(int columns, int rows)
void push_clone_from_stack(SlotRange range, int otherStackID, int offsetFromStackTop)
void unary_op(BuilderOp op, int32_t slots)
void push_constant_i(int32_t val, int count=1)
void push_slots_or_immutable(SlotRange src, BuilderOp op)
void push_condition_mask()
bool executionMaskWritesAreEnabled()
void matrix_multiply(int leftColumns, int leftRows, int rightColumns, int rightRows)
void branch_if_all_lanes_active(int labelID)
void pop_slots_unmasked(SlotRange dst)
void branch_if_any_lanes_active(int labelID)
void copy_stack_to_slots_unmasked(SlotRange dst)
virtual void toLinearSrgb(const void *color)=0
virtual bool appendShader(int index)=0
virtual bool appendBlender(int index)=0
virtual bool appendColorFilter(int index)=0
virtual void fromLinearSrgb(const void *color)=0
size_t swizzleWidth(SkSpan< T > offsets) const
void buildUniqueSlotNameList()
std::string uniformName(SlotRange range) const
std::string immCtx(const void *ctx, bool showAsFloat=true) const
std::tuple< std::string, std::string > adjacentPtrCtx(const void *ctx, int numSlots) const
std::byte * offsetToPtr(SkRPOffset offset) const
std::tuple< std::string, std::string > swizzleCopyCtx(ProgramOp op, const void *v) const
std::string uniformPtrCtx(const float *ptr, int numSlots) const
std::string slotName(SlotRange range) const
void dump(SkWStream *out, bool writeInstructionCount)
void buildLabelToStageMap()
std::tuple< std::string, std::string > copyUniformCtx(const void *v, int numSlots) const
std::string imm(float immFloat, bool showAsFloat=true) const
std::string valuePtrCtx(const float *ptr, int numSlots) const
std::tuple< std::string, std::string > adjacentOffsetCtx(SkRPOffset offset, int numSlots) const
std::tuple< std::string, std::string > constantCtx(const void *v, int slots, bool showAsFloat=true) const
std::string swizzlePtr(const void *ptr, SkSpan< T > offsets) const
std::string slotOrUniformName(SkSpan< const SlotDebugInfo > debugInfo, SkSpan< const std::string > names, SlotRange range) const
std::string swizzleOffsetSpan(SkSpan< T > offsets) const
std::tuple< std::string, std::string > adjacentBinaryOpCtx(const void *v) const
std::tuple< std::string, std::string, std::string > adjacentTernaryOpCtx(const void *v) const
std::tuple< std::string, std::string, std::string > matrixMultiply(const void *v) const
std::string ptrCtx(const void *ctx, int numSlots) const
std::tuple< std::string, std::string, std::string > adjacent3OffsetCtx(SkRPOffset offset, int numSlots) const
std::string multiImmCtx(const float *ptr, int count) const
std::string branchOffset(const SkRasterPipeline_BranchCtx *ctx, int index) const
std::tuple< std::string, std::string, std::string > adjacent3PtrCtx(const void *ctx, int numSlots) const
std::string asRange(int first, int count) const
std::string offsetCtx(SkRPOffset offset, int numSlots) const
std::tuple< std::string, std::string > shuffleCtx(const void *v) const
std::tuple< std::string, std::string > swizzleCtx(ProgramOp op, const void *v) const
std::tuple< std::string, std::string > binaryOpCtx(const void *v, int numSlots) const
std::string immutablePtrCtx(const float *ptr, int numSlots) const
bool appendStages(SkRasterPipeline *pipeline, SkArenaAlloc *alloc, Callbacks *callbacks, SkSpan< const float > uniforms) const
void dump(SkWStream *out, bool writeInstructionCount=false) const
Program(skia_private::TArray< Instruction > instrs, int numValueSlots, int numUniformSlots, int numImmutableSlots, int numLabels, DebugTracePriv *debugTrace)
static std::unique_ptr< Tracer > Make(std::vector< TraceInfo > *traceInfo)
constexpr T * begin() const
constexpr T * end() const
constexpr auto rbegin() const
constexpr auto rend() const
constexpr size_t size() const
const char * c_str() const
void resize(size_t count)
void reserve_exact(int n)
V * find(const K &key) const
static const char * begin(const StringSlice &s)
static float max(float r, float g, float b)
static float min(float r, float g, float b)
static void ABI stack_rewind(Params *params, SkRasterPipelineStage *program, F r, F g, F b, F a)
size_t raster_pipeline_highp_stride
static void * Pack(const T &ctx, SkArenaAlloc *alloc)
static UnpackedType< T > Unpack(const T *ctx)
static int max_packed_nybble(uint32_t components, size_t numComponents)
static bool immutable_data_is_splattable(int32_t *immutablePtr, int numSlots)
static bool is_immediate_op(BuilderOp op)
static void unpack_nybbles_to_offsets(uint32_t components, SkSpan< T > offsets)
static bool is_multi_slot_immediate_op(BuilderOp op)
static int pack_nybbles(SkSpan< const int8_t > components)
static void * context_bit_pun(intptr_t val)
@ copy_uniform_to_slots_unmasked
@ push_immutable_indirect
@ copy_stack_to_slots_indirect
@ copy_stack_to_slots_unmasked
@ swizzle_copy_stack_to_slots_indirect
@ push_clone_indirect_from_stack
@ swizzle_copy_stack_to_slots
@ branch_if_no_active_lanes_on_stack_top_equal
@ pop_and_reenable_loop_mask
static bool slot_ranges_overlap(SlotRange x, SlotRange y)
static int stack_usage(const Instruction &inst)
static BuilderOp convert_n_way_op_to_immediate(BuilderOp op, int slots, int32_t *constantValue)
std::string printf(const char *fmt,...) SK_PRINTF_LIKE(1
std::string void void auto Separator()
static const char *const names[]
DEF_SWITCHES_START aot vmservice shared library name
std::string to_string(float value)
SINT bool isfinite(const Vec< N, T > &v)
SI Vec< sizeof...(Ix), T > shuffle(const Vec< N, T > &)
static SkString to_string(int n)
static const char header[]