113 {
114 constexpr uint32_t kProblemSize = 512;
115 constexpr float kFactor = 4.f;
116
117
118
119 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
120
121 std::unique_ptr<Recorder> recorder = context->makeRecorder();
122
124 public:
125
126
128 "TestArrayMultiply",
129 {kWorkgroupSize, 1, 1},
130 {
131
132 {
133
134 ResourceType::kStorageBuffer,
137 "inputBlock {\n"
138 " float factor;\n"
139 " layout(offset=16) float4 in_data[];\n"
140 "}",
141 },
142
143 {
144 ResourceType::kStorageBuffer,
146
148 0,
149 "outputBlock { float4 out_data[]; }",
150 }
151 }) {}
152 ~TestComputeStep() override = default;
153
154
156 return R"(
157 void main() {
158 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor;
159 }
160 )";
161 }
162
164 if (index == 0) {
165 SkASSERT(r.fFlow == DataFlow::kPrivate);
166 return sizeof(
float) * (kProblemSize + 4);
167 }
170 SkASSERT(r.fFlow == DataFlow::kShared);
171 return sizeof(
float) * kProblemSize;
172 }
173
175 const ResourceDesc& r,
177 size_t bufferSize) const override {
178
179 if (resourceIndex != 0) {
180 return;
181 }
182 SkASSERT(r.fFlow == DataFlow::kPrivate);
183
184 size_t dataCount =
sizeof(
float) * (kProblemSize + 4);
187 inData[0] = kFactor;
188 for (unsigned int i = 0; i < kProblemSize; ++i) {
189 inData[i + 4] = i + 1;
190 }
191 }
192
195 }
197
201 return;
202 }
203
204
206 if (!outputInfo) {
207 ERRORF(
reporter,
"Failed to allocate an output buffer at slot 0");
208 return;
209 }
210
211
215
216
217 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.
fBuffer);
218
219
220 std::unique_ptr<Recording> recording = recorder->snap();
221 if (!recording) {
223 return;
224 }
225
228 context->insertRecording(insertInfo);
229 testContext->syncedSubmit(context);
230
231
232 float* outData = static_cast<float*>(
233 map_buffer(context, testContext, outputBuffer.get(), outputInfo.
fOffset));
234 SkASSERT(outputBuffer->isMapped() && outData !=
nullptr);
235 for (unsigned int i = 0; i < kProblemSize; ++i) {
236 const float expected = (i + 1) * kFactor;
237 const float found = outData[i];
239 }
240}
241
242
243
246 context,
247 testContext) {
248
249 if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {
250 return;
251 }
252
253 constexpr uint32_t kProblemSize = 512;
254 constexpr float kFactor1 = 4.f;
255 constexpr float kFactor2 = 3.f;
256
257
258
259 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
260
261 std::unique_ptr<Recorder> recorder = context->makeRecorder();
262
263
264
266 public:
267
268
270 "TestArrayMultiplyFirstPass",
271 {kWorkgroupSize, 1, 1},
272 {
273
274 {
275
276 ResourceType::kStorageBuffer,
279 "inputBlock {\n"
280 " float factor;\n"
281 " layout(offset=16) float4 in_data[];\n"
282 "}",
283 },
284
285 {
286 ResourceType::kStorageBuffer,
289 0,
290 "outputBlock1 { float4 forward_data[]; }",
291 },
292 {
293 ResourceType::kStorageBuffer,
296 1,
297 "outputBlock2 { float2 extra_data; }",
298 }
299 }) {}
300 ~TestComputeStep1() override = default;
301
302
304 return R"(
305 void main() {
306 uint idx = sk_GlobalInvocationID.x;
307 forward_data[idx] = in_data[idx] * factor;
308 if (idx == 0) {
309 extra_data.x = factor;
310 extra_data.y = 2 * factor;
311 }
312 }
313 )";
314 }
315
317 if (index == 0) {
318 SkASSERT(r.fFlow == DataFlow::kPrivate);
319 return sizeof(
float) * (kProblemSize + 4);
320 }
321 if (index == 1) {
322 SkASSERT(r.fFlow == DataFlow::kShared);
324 return sizeof(
float) * kProblemSize;
325 }
326
329 SkASSERT(r.fFlow == DataFlow::kShared);
330 return 2 *
sizeof(
float);
331 }
332
334 const ResourceDesc& r,
336 size_t bufferSize) const override {
337 if (resourceIndex != 0) {
338 return;
339 }
340
341 size_t dataCount =
sizeof(
float) * (kProblemSize + 4);
344 inData[0] = kFactor1;
345 for (unsigned int i = 0; i < kProblemSize; ++i) {
346 inData[i + 4] = i + 1;
347 }
348 }
349
352 }
353 } step1;
354
356 public:
358 "TestArrayMultiplySecondPass",
359 {kWorkgroupSize, 1, 1},
360 {
361
362 {
363 ResourceType::kStorageBuffer,
366 0,
367 "inputBlock { float4 in_data[]; }",
368 },
369 {
370 ResourceType::kStorageBuffer,
373 "factorBlock { float factor; }"
374 },
375
376 {
377 ResourceType::kStorageBuffer,
380 2,
381 "outputBlock { float4 out_data[]; }",
382 }
383 }) {}
384 ~TestComputeStep2() override = default;
385
386
388 return R"(
389 void main() {
390 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor;
391 }
392 )";
393 }
394
397 if (index == 1) {
398 SkASSERT(r.fFlow == DataFlow::kPrivate);
399 return sizeof(
float) * 4;
400 }
403 SkASSERT(r.fFlow == DataFlow::kShared);
404 return sizeof(
float) * kProblemSize;
405 }
406
408 const ResourceDesc& r,
410 size_t bufferSize) const override {
411 if (resourceIndex != 1) {
412 return;
413 }
414 SkASSERT(r.fFlow == DataFlow::kPrivate);
415 *
static_cast<float*
>(
buffer) = kFactor2;
416 }
417
420 }
421 } step2;
422
426
427
428
429
431 std::holds_alternative<BufferView>(
builder.outputTable().fSharedSlots[0]),
432 "shared resource at slot 0 is missing");
434 if (!outputInfo) {
435 ERRORF(
reporter,
"Failed to allocate an output buffer at slot 0");
436 return;
437 }
438
439
441 if (!extraOutputInfo) {
443 return;
444 }
445
446
450
451
452 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.
fBuffer);
453 auto extraOutputBuffer = sync_buffer_to_cpu(recorder.get(), extraOutputInfo.
fBuffer);
454
455
456 std::unique_ptr<Recording> recording = recorder->snap();
457 if (!recording) {
459 return;
460 }
461
464 context->insertRecording(insertInfo);
465 testContext->syncedSubmit(context);
466
467
468 float* outData = static_cast<float*>(
469 map_buffer(context, testContext, outputBuffer.get(), outputInfo.
fOffset));
470 SkASSERT(outputBuffer->isMapped() && outData !=
nullptr);
471 for (unsigned int i = 0; i < kProblemSize; ++i) {
472 const float expected = (i + 1) * kFactor1 * kFactor2;
473 const float found = outData[i];
475 }
476
477
478 float* extraOutData = static_cast<float*>(
479 map_buffer(context, testContext, extraOutputBuffer.get(), extraOutputInfo.
fOffset));
480 SkASSERT(extraOutputBuffer->isMapped() && extraOutData !=
nullptr);
482 kFactor1 == extraOutData[0],
483 "expected '%f', found '%f'",
484 kFactor1,
485 extraOutData[0]);
487 2 * kFactor1 == extraOutData[1],
488 "expected '%f', found '%f'",
489 2 * kFactor2,
490 extraOutData[1]);
491}
492
493
494
497 context,
498 testContext) {
499
500 if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {
501 return;
502 }
503
504 constexpr uint32_t kProblemSize = 512;
505 constexpr float kFactor = 4.f;
506
507
508
509 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
510
511 std::unique_ptr<Recorder> recorder = context->makeRecorder();
512
514 public:
516 "TestArrayMultiply",
517 {kWorkgroupSize, 1, 1},
518 {
519
520 {
521 ResourceType::kUniformBuffer,
524 "uniformBlock { float factor; }"
525 },
526
527 {
528 ResourceType::kStorageBuffer,
531 "inputBlock { float4 in_data[]; }",
532 },
533
534 {
535 ResourceType::kStorageBuffer,
537
539 0,
540 "outputBlock { float4 out_data[]; }",
541 }
542 }) {}
543 ~TestComputeStep() override = default;
544
545
547 return R"(
548 void main() {
549 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor;
550 }
551 )";
552 }
553
555 if (index == 0) {
556 SkASSERT(r.fFlow == DataFlow::kPrivate);
557 return sizeof(
float);
558 }
559 if (index == 1) {
560 SkASSERT(r.fFlow == DataFlow::kPrivate);
561 return sizeof(
float) * kProblemSize;
562 }
565 SkASSERT(r.fFlow == DataFlow::kShared);
566 return sizeof(
float) * kProblemSize;
567 }
568
570 const ResourceDesc& r,
572 size_t bufferSize) const override {
573
574 if (resourceIndex != 1) {
575 return;
576 }
577 SkASSERT(r.fFlow == DataFlow::kPrivate);
578 size_t dataCount =
sizeof(
float) * kProblemSize;
581 for (unsigned int i = 0; i < kProblemSize; ++i) {
582 inData[i] = i + 1;
583 }
584 }
585
587 const ResourceDesc&,
592 mgr->setExpectedUniforms(uniforms);
593 )
595 }
596
599 }
601
605 return;
606 }
607
608
610 if (!outputInfo) {
611 ERRORF(
reporter,
"Failed to allocate an output buffer at slot 0");
612 return;
613 }
614
615
619
620
621 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.
fBuffer);
622
623
624 std::unique_ptr<Recording> recording = recorder->snap();
625 if (!recording) {
627 return;
628 }
629
632 context->insertRecording(insertInfo);
633 testContext->syncedSubmit(context);
634
635
636 float* outData = static_cast<float*>(
637 map_buffer(context, testContext, outputBuffer.get(), outputInfo.
fOffset));
638 SkASSERT(outputBuffer->isMapped() && outData !=
nullptr);
639 for (unsigned int i = 0; i < kProblemSize; ++i) {
640 const float expected = (i + 1) * kFactor;
641 const float found = outData[i];
643 }
644}
645
646
647
650 context,
651 testContext) {
652 constexpr uint32_t kProblemSize = 512;
653 constexpr float kFactor = 4.f;
654
655
656
657 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
658
659 std::unique_ptr<Recorder> recorder = context->makeRecorder();
660
662 public:
664 "ExternallyAssignedBuffer",
665 {kWorkgroupSize, 1, 1},
666 {
667
668 {
669 ResourceType::kStorageBuffer,
672 "inputBlock {\n"
673 " float factor;\n"
674 " layout(offset = 16) float4 in_data[];\n"
675 "}\n",
676 },
677
678 {
679 ResourceType::kStorageBuffer,
681
683 0,
684 "outputBlock { float4 out_data[]; }",
685 }
686 }) {}
687 ~TestComputeStep() override = default;
688
689
691 return R"(
692 void main() {
693 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor;
694 }
695 )";
696 }
697
700 SkASSERT(r.fFlow == DataFlow::kPrivate);
701 return sizeof(
float) * (kProblemSize + 4);
702 }
703
705 const ResourceDesc& r,
707 size_t bufferSize) const override {
709 SkASSERT(r.fFlow == DataFlow::kPrivate);
710
711 size_t dataCount =
sizeof(
float) * (kProblemSize + 4);
714 inData[0] = kFactor;
715 for (unsigned int i = 0; i < kProblemSize; ++i) {
716 inData[i + 4] = i + 1;
717 }
718 }
720
721
722
723 auto [
_, outputInfo] =
724 recorder->priv().drawBufferManager()->getStoragePointer(sizeof(float) * kProblemSize);
726
728 builder.assignSharedBuffer({outputInfo,
sizeof(
float) * kProblemSize}, 0);
729
730
731 if (!
builder.appendStep(&
step, {WorkgroupSize(1, 1, 1)})) {
733 return;
734 }
735
736
740
741
742 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.
fBuffer);
743
744
745 std::unique_ptr<Recording> recording = recorder->snap();
746 if (!recording) {
748 return;
749 }
750
753 context->insertRecording(insertInfo);
754 testContext->syncedSubmit(context);
755
756
757 float* outData = static_cast<float*>(
758 map_buffer(context, testContext, outputBuffer.get(), outputInfo.
fOffset));
759 SkASSERT(outputBuffer->isMapped() && outData !=
nullptr);
760 for (unsigned int i = 0; i < kProblemSize; ++i) {
761 const float expected = (i + 1) * kFactor;
762 const float found = outData[i];
764 }
765}
766
767
768
771 context,
772 testContext) {
773 std::unique_ptr<Recorder> recorder = context->makeRecorder();
774
775
776
777 constexpr uint32_t kDim = 16;
778
780 public:
782 "TestStorageTexture",
783 {kDim, kDim, 1},
784 {
785 {
786 ResourceType::kWriteOnlyStorageTexture,
789 0,
790 "dst",
791 }
792 }) {}
793 ~TestComputeStep() override = default;
794
796 return R"(
797 void main() {
798 textureWrite(dst, sk_LocalInvocationID.xy, half4(0.0, 1.0, 0.0, 1.0));
799 }
800 )";
801 }
802
804 int index, const ResourceDesc& r) const override {
806 }
807
810 }
812
816 return;
817 }
818
822 return;
823 }
824
825
829
830
831 std::unique_ptr<Recording> recording = recorder->snap();
832 if (!recording) {
834 return;
835 }
836
839 context->insertRecording(insertInfo);
840 testContext->syncedSubmit(context);
841
845 bitmap.allocPixels(imgInfo);
846
848 bool peekPixelsSuccess =
bitmap.peekPixels(&pixels);
850
851 bool readPixelsSuccess = context->priv().readPixels(pixels,
texture.get(), imgInfo, 0, 0);
853
854 for (uint32_t
x = 0;
x < kDim; ++
x) {
855 for (uint32_t
y = 0;
y < kDim; ++
y) {
859 "At position {%u, %u}, "
860 "expected {%.1f, %.1f, %.1f, %.1f}, "
861 "found {%.1f, %.1f, %.1f, %.1f}",
863 expected.fR, expected.fG, expected.fB, expected.fA,
865 }
866 }
867}
868
869
870
873 context,
874 testContext) {
875 std::unique_ptr<Recorder> recorder = context->makeRecorder();
876
877
878
879 constexpr uint32_t kDim = 16;
880
882 public:
884 "TestStorageTextureReadAndWrite",
885 {kDim, kDim, 1},
886 {
887 {
888 ResourceType::kReadOnlyTexture,
891 0,
892 "src",
893 },
894 {
895 ResourceType::kWriteOnlyStorageTexture,
898 1,
899 "dst",
900 }
901 }) {}
902 ~TestComputeStep() override = default;
903
905 return R"(
906 void main() {
907 half4 color = textureRead(src, sk_LocalInvocationID.xy);
908 textureWrite(dst, sk_LocalInvocationID.xy, color);
909 }
910 )";
911 }
912
914 int index, const ResourceDesc& r) const override {
917 }
918
921 }
923
924
930 bool srcPeekPixelsSuccess = srcBitmap.
peekPixels(&srcPixels);
932 for (uint32_t
x = 0;
x < kDim; ++
x) {
933 for (uint32_t
y = 0;
y < kDim; ++
y) {
936 }
937 }
938
940 skgpu::Mipmapped::kNo,
941 skgpu::Protected::kNo,
942 skgpu::Renderable::kNo);
944 recorder->priv().resourceProvider(),
945 {kDim, kDim},
946 texInfo,
952 srcProxy,
955 {mipLevel},
957 std::make_unique<ImageUploadContext>());
960 return;
961 }
963
965
966
967
968 builder.assignSharedTexture(std::move(srcProxy), 0);
969
972 return;
973 }
974
976 if (!dst) {
978 return;
979 }
980
981
985
986
987 std::unique_ptr<Recording> recording = recorder->snap();
988 if (!recording) {
990 return;
991 }
992
995 context->insertRecording(insertInfo);
996 testContext->syncedSubmit(context);
997
1001 bitmap.allocPixels(imgInfo);
1002
1004 bool peekPixelsSuccess =
bitmap.peekPixels(&pixels);
1006
1007 bool readPixelsSuccess = context->priv().readPixels(pixels,
dst.get(), imgInfo, 0, 0);
1009
1010 for (uint32_t
x = 0;
x < kDim; ++
x) {
1011 for (uint32_t
y = 0;
y < kDim; ++
y) {
1016 "At position {%u, %u}, "
1017 "expected {%.1f, %.1f, %.1f, %.1f}, "
1018 "found {%.1f, %.1f, %.1f, %.1f}",
1020 expected.fR, expected.fG, expected.fB, expected.fA,
1022 }
1023 }
1024}
1025
1028 context,
1029 testContext) {
1030 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1031
1032
1033
1034 constexpr uint32_t kDim = 16;
1035
1037 public:
1039 "TestReadOnlyStorageBuffer",
1040 {kDim, kDim, 1},
1041 {
1042 {
1043 ResourceType::kReadOnlyStorageBuffer,
1046 0,
1047 "src { uint in_data[]; }",
1048 },
1049 {
1050 ResourceType::kWriteOnlyStorageTexture,
1053 1,
1054 "dst",
1055 }
1056 }) {}
1057 ~TestComputeStep() override = default;
1058
1060 return R"(
1061 void main() {
1062 uint ix = sk_LocalInvocationID.y * 16 + sk_LocalInvocationID.x;
1063 uint value = in_data[ix];
1064 half4 splat = half4(
1065 half(value & 0xFF),
1066 half((value >> 8) & 0xFF),
1067 half((value >> 16) & 0xFF),
1068 half((value >> 24) & 0xFF)
1069 );
1070 textureWrite(dst, sk_LocalInvocationID.xy, splat / 255.0);
1071 }
1072 )";
1073 }
1074
1077 return kDim * kDim * sizeof(uint32_t);
1078 }
1079
1081 const ResourceDesc&,
1083 size_t bufferSize) const override {
1085 SkASSERT(bufferSize == kDim * kDim *
sizeof(uint32_t));
1086
1087 uint32_t*
inputs =
reinterpret_cast<uint32_t*
>(
buffer);
1088 for (uint32_t
y = 0;
y < kDim; ++
y) {
1089 for (uint32_t
x = 0;
x < kDim; ++
x) {
1091 ((
x * 256 / kDim) & 0xFF) | ((
y * 256 / kDim) & 0xFF) << 8 | 255 << 24;
1093 }
1094 }
1095 }
1096
1098 int index, const ResourceDesc& r) const override {
1101 }
1102
1105 }
1107
1111 return;
1112 }
1113
1115 if (!dst) {
1117 return;
1118 }
1119
1120
1124
1125
1126 std::unique_ptr<Recording> recording = recorder->snap();
1127 if (!recording) {
1129 return;
1130 }
1131
1134 context->insertRecording(insertInfo);
1135 testContext->syncedSubmit(context);
1136
1140 bitmap.allocPixels(imgInfo);
1141
1143 bool peekPixelsSuccess =
bitmap.peekPixels(&pixels);
1145
1146 bool readPixelsSuccess = context->priv().readPixels(pixels,
dst.get(), imgInfo, 0, 0);
1148
1149 for (uint32_t
x = 0;
x < kDim; ++
x) {
1150 for (uint32_t
y = 0;
y < kDim; ++
y) {
1154 bool pass = true;
1155 for (int i = 0; i < 4; i++) {
1156 pass &=
color[i] == expected[i];
1157 }
1159 "At position {%u, %u}, "
1160 "expected {%.1f, %.1f, %.1f, %.1f}, "
1161 "found {%.1f, %.1f, %.1f, %.1f}",
1163 expected.fR, expected.fG, expected.fB, expected.fA,
1165 }
1166 }
1167}
1168
1169
1172 context,
1173 testContext) {
1174 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1175
1176
1177
1178 constexpr uint32_t kDim = 16;
1179
1180
1182 public:
1184 "TestStorageTexturesFirstPass",
1185 {kDim, kDim, 1},
1186 {
1187 {
1188 ResourceType::kWriteOnlyStorageTexture,
1191 0,
1192 "dst",
1193 }
1194 }) {}
1195 ~TestComputeStep1() override = default;
1196
1198 return R"(
1199 void main() {
1200 textureWrite(dst, sk_LocalInvocationID.xy, half4(0.0, 1.0, 0.0, 1.0));
1201 }
1202 )";
1203 }
1204
1206 int index, const ResourceDesc& r) const override {
1209 }
1210
1213 }
1214 } step1;
1215
1216
1218 public:
1220 "TestStorageTexturesSecondPass",
1221 {kDim, kDim, 1},
1222 {
1223 {
1224 ResourceType::kReadOnlyTexture,
1227 0,
1228 "src",
1229 },
1230 {
1231 ResourceType::kWriteOnlyStorageTexture,
1234 1,
1235 "dst",
1236 }
1237 }) {}
1238 ~TestComputeStep2() override = default;
1239
1241 return R"(
1242 void main() {
1243 half4 color = textureRead(src, sk_LocalInvocationID.xy);
1244 textureWrite(dst, sk_LocalInvocationID.xy, color);
1245 }
1246 )";
1247 }
1248
1250 int index, const ResourceDesc& r) const override {
1253 }
1254
1257 }
1258 } step2;
1259
1263
1265 if (!dst) {
1267 return;
1268 }
1269
1270
1274
1275
1276 std::unique_ptr<Recording> recording = recorder->snap();
1277 if (!recording) {
1279 return;
1280 }
1281
1284 context->insertRecording(insertInfo);
1285 testContext->syncedSubmit(context);
1286
1290 bitmap.allocPixels(imgInfo);
1291
1293 bool peekPixelsSuccess =
bitmap.peekPixels(&pixels);
1295
1296 bool readPixelsSuccess = context->priv().readPixels(pixels,
dst.get(), imgInfo, 0, 0);
1298
1299 for (uint32_t
x = 0;
x < kDim; ++
x) {
1300 for (uint32_t
y = 0;
y < kDim; ++
y) {
1304 "At position {%u, %u}, "
1305 "expected {%.1f, %.1f, %.1f, %.1f}, "
1306 "found {%.1f, %.1f, %.1f, %.1f}",
1308 expected.fR, expected.fG, expected.fB, expected.fA,
1310 }
1311 }
1312}
1313
1314
1315
1316
1319 context,
1320 testContext) {
1321 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1322
1323
1324
1325
1326
1327 constexpr uint32_t kSrcDim = 16;
1328 constexpr uint32_t kDstDim = 4;
1329
1331 public:
1333 "Test_SampledTexture_Init",
1334 {kSrcDim, kSrcDim, 1},
1335 {
1336 {
1337 ResourceType::kWriteOnlyStorageTexture,
1340 0,
1341 "dst",
1342 }
1343 }) {}
1344 ~TestComputeStep1() override = default;
1345
1347 return R"(
1348 void main() {
1349 uint2 c = sk_LocalInvocationID.xy;
1350 uint checkerBoardColor = (c.x + (c.y % 2)) % 2;
1351 textureWrite(dst, c, half4(checkerBoardColor, 0, 0, 1));
1352 }
1353 )";
1354 }
1355
1357 int index, const ResourceDesc& r) const override {
1360 }
1361
1364 }
1365 } step1;
1366
1368 public:
1370 "Test_SampledTexture_Sample",
1371 {kDstDim, kDstDim, 1},
1372 {
1373
1374
1375
1376
1377 {
1378 ResourceType::kWriteOnlyStorageTexture,
1381 1,
1382 "dst",
1383 },
1384 {
1385 ResourceType::kSampledTexture,
1388 0,
1389 "src",
1390 }
1391 }) {}
1392 ~TestComputeStep2() override = default;
1393
1395 return R"(
1396 void main() {
1397 // Normalize the 4x4 invocation indices and sample the source texture using
1398 // that.
1399 uint2 dstCoord = sk_LocalInvocationID.xy;
1400 const float2 dstSizeInv = float2(0.25, 0.25);
1401 float2 unormCoord = float2(dstCoord) * dstSizeInv;
1402
1403 // Use explicit LOD, as quad derivatives are not available to a compute shader.
1404 half4 color = sampleLod(src, unormCoord, 0);
1405 textureWrite(dst, dstCoord, color);
1406 }
1407 )";
1408 }
1409
1411 int index, const ResourceDesc& r) const override {
1412 SkASSERT(index == 0 || index == 1);
1414 }
1415
1418
1421 }
1422
1425 }
1426 } step2;
1427
1431
1433 if (!dst) {
1435 return;
1436 }
1437
1438
1442
1443
1444 std::unique_ptr<Recording> recording = recorder->snap();
1445 if (!recording) {
1447 return;
1448 }
1449
1452 context->insertRecording(insertInfo);
1453 testContext->syncedSubmit(context);
1454
1458 bitmap.allocPixels(imgInfo);
1459
1461 bool peekPixelsSuccess =
bitmap.peekPixels(&pixels);
1463
1464 bool readPixelsSuccess = context->priv().readPixels(pixels,
dst.get(), imgInfo, 0, 0);
1466
1467 for (uint32_t
x = 0;
x < kDstDim; ++
x) {
1468 for (uint32_t
y = 0;
y < kDstDim; ++
y) {
1471 "At position {%u, %u}, "
1472 "expected red channel in range [0.49, 0.51], "
1473 "found {%.3f}",
1475 }
1476 }
1477}
1478
1479
1480
1481
1482
1483
1486 context,
1487 testContext) {
1488
1489 if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {
1490 return;
1491 }
1492
1493 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1494
1495 constexpr uint32_t kWorkgroupCount = 32;
1496 constexpr uint32_t kWorkgroupSize = 256;
1497
1499 public:
1501 "TestAtomicOperations",
1502 {kWorkgroupSize, 1, 1},
1503 {
1504 {
1505 ResourceType::kStorageBuffer,
1508 0,
1509 "ssbo { atomicUint globalCounter; }",
1510 }
1511 }) {}
1512 ~TestComputeStep() override = default;
1513
1514
1515
1516
1517
1518
1519
1521 return R"(
1522 workgroup atomicUint localCounter;
1523
1524 void main() {
1525 // Initialize the local counter.
1526 if (sk_LocalInvocationID.x == 0) {
1527 atomicStore(localCounter, 0);
1528 }
1529
1530 // Synchronize the threads in the workgroup so they all see the initial value.
1531 workgroupBarrier();
1532
1533 // All threads increment the counter.
1534 atomicAdd(localCounter, 1);
1535
1536 // Synchronize the threads again to ensure they have all executed the increment
1537 // and the following load reads the same value across all threads in the
1538 // workgroup.
1539 workgroupBarrier();
1540
1541 // Add the workgroup-only tally to the global counter.
1542 if (sk_LocalInvocationID.x == 0) {
1543 atomicAdd(globalCounter, atomicLoad(localCounter));
1544 }
1545 }
1546 )";
1547 }
1548
1552 SkASSERT(r.fFlow == DataFlow::kShared);
1553 return sizeof(uint32_t);
1554 }
1555
1558 }
1559
1561 const ResourceDesc& r,
1563 size_t bufferSize) const override {
1565 *
static_cast<uint32_t*
>(
buffer) = 0;
1566 }
1568
1571
1575 return;
1576 }
1577
1578
1582
1583
1584 auto buffer = sync_buffer_to_cpu(recorder.get(),
info.fBuffer);
1585
1586
1587 std::unique_ptr<Recording> recording = recorder->snap();
1588 if (!recording) {
1590 return;
1591 }
1592
1595 context->insertRecording(insertInfo);
1596 testContext->syncedSubmit(context);
1597
1598
1599 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
1600 const uint32_t
result =
static_cast<const uint32_t*
>(
1601 map_buffer(context, testContext,
buffer.get(),
info.fOffset))[0];
1603 result == kExpectedCount,
1604 "expected '%u', found '%u'",
1605 kExpectedCount,
1607}
1608
1609
1610
1611
1612
1613
1616 context,
1617 testContext) {
1618
1619 if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {
1620 return;
1621 }
1622
1623 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1624
1625 constexpr uint32_t kWorkgroupCount = 32;
1626 constexpr uint32_t kWorkgroupSize = 256;
1627
1629 public:
1631 "TestAtomicOperationsOverArrayAndStruct",
1632 {kWorkgroupSize, 1, 1},
1633 {
1634 {
1635 ResourceType::kStorageBuffer,
1638 0,
1639 "ssbo {\n"
1640 " atomicUint globalCountsFirstHalf;\n"
1641 " atomicUint globalCountsSecondHalf;\n"
1642 "}\n"
1643 }
1644 }) {}
1645 ~TestComputeStep() override = default;
1646
1647
1648
1649
1650
1651
1652
1654 return R"(
1655 const uint WORKGROUP_SIZE = 256;
1656
1657 workgroup atomicUint localCounts[2];
1658
1659 void main() {
1660 // Initialize the local counts.
1661 if (sk_LocalInvocationID.x == 0) {
1662 atomicStore(localCounts[0], 0);
1663 atomicStore(localCounts[1], 0);
1664 }
1665
1666 // Synchronize the threads in the workgroup so they all see the initial value.
1667 workgroupBarrier();
1668
1669 // Each thread increments one of the local counters based on its invocation
1670 // index.
1671 uint idx = sk_LocalInvocationID.x < (WORKGROUP_SIZE / 2) ? 0 : 1;
1672 atomicAdd(localCounts[idx], 1);
1673
1674 // Synchronize the threads again to ensure they have all executed the increments
1675 // and the following load reads the same value across all threads in the
1676 // workgroup.
1677 workgroupBarrier();
1678
1679 // Add the workgroup-only tally to the global counter.
1680 if (sk_LocalInvocationID.x == 0) {
1681 atomicAdd(globalCountsFirstHalf, atomicLoad(localCounts[0]));
1682 atomicAdd(globalCountsSecondHalf, atomicLoad(localCounts[1]));
1683 }
1684 }
1685 )";
1686 }
1687
1691 SkASSERT(r.fFlow == DataFlow::kShared);
1692 return 2 * sizeof(uint32_t);
1693 }
1694
1697 }
1698
1700 const ResourceDesc& r,
1702 size_t bufferSize) const override {
1704 uint32_t*
data =
static_cast<uint32_t*
>(
buffer);
1707 }
1709
1712
1716 return;
1717 }
1718
1719
1723
1724
1725 auto buffer = sync_buffer_to_cpu(recorder.get(),
info.fBuffer);
1726
1727
1728 std::unique_ptr<Recording> recording = recorder->snap();
1729 if (!recording) {
1731 return;
1732 }
1733
1736 context->insertRecording(insertInfo);
1737 testContext->syncedSubmit(context);
1738
1739
1740 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize / 2;
1741
1742 const uint32_t* ssboData = static_cast<const uint32_t*>(
1743 map_buffer(context, testContext,
buffer.get(),
info.fOffset));
1744 const uint32_t firstHalfCount = ssboData[0];
1745 const uint32_t secondHalfCount = ssboData[1];
1747 firstHalfCount == kExpectedCount,
1748 "expected '%u', found '%u'",
1749 kExpectedCount,
1750 firstHalfCount);
1752 secondHalfCount == kExpectedCount,
1753 "expected '%u', found '%u'",
1754 kExpectedCount,
1755 secondHalfCount);
1756}
1757
1760 context,
1761 testContext) {
1762 constexpr uint32_t kProblemSize = 512;
1763
1764
1765
1766 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
1767
1768 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1769
1770
1771
1773 public:
1775 "TestClearedBuffer",
1776 {kWorkgroupSize, 1, 1},
1777 {
1778
1779 {
1780
1781 ResourceType::kStorageBuffer,
1784 "inputBlock { uint4 in_data[]; }\n",
1785 },
1786
1787 {
1788 ResourceType::kStorageBuffer,
1790
1792 0,
1793 "outputBlock { uint4 out_data[]; }\n",
1794 }
1795 }) {}
1796 ~TestComputeStep() override = default;
1797
1799 return R"(
1800 void main() {
1801 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x];
1802 }
1803 )";
1804 }
1805
1807 return sizeof(uint32_t) * kProblemSize;
1808 }
1809
1811 const ResourceDesc& r,
1813 size_t bufferSize) const override {
1814
1816 }
1817
1820 }
1822
1826 return;
1827 }
1828
1829
1831 if (!outputInfo) {
1832 ERRORF(
reporter,
"Failed to allocate an output buffer at slot 0");
1833 return;
1834 }
1835
1836
1840
1841
1842 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.
fBuffer);
1843
1844
1845 std::unique_ptr<Recording> recording = recorder->snap();
1846 if (!recording) {
1848 return;
1849 }
1850
1853 context->insertRecording(insertInfo);
1854 testContext->syncedSubmit(context);
1855
1856
1857 uint32_t* outData = static_cast<uint32_t*>(
1858 map_buffer(context, testContext, outputBuffer.get(), outputInfo.
fOffset));
1859 SkASSERT(outputBuffer->isMapped() && outData !=
nullptr);
1860 for (unsigned int i = 0; i < kProblemSize; ++i) {
1861 const uint32_t found = outData[i];
1863 }
1864}
1865
1868 context,
1869 testContext) {
1870
1871
1872
1873 constexpr uint32_t kWorkgroupSize = 64;
1874
1875
1877 public:
1879 "FillWithGarbage",
1880 {kWorkgroupSize, 1, 1},
1881 {
1882 {
1883 ResourceType::kStorageBuffer,
1886 0,
1887 "outputBlock { uint4 out_data[]; }\n",
1888 }
1889 }) {}
1890 ~FillWithGarbage() override = default;
1891
1893 return R"(
1894 void main() {
1895 out_data[sk_GlobalInvocationID.x] = uint4(0xFE);
1896 }
1897 )";
1898 }
1899 } garbageStep;
1900
1901
1902
1904 public:
1906 "CopyBuffer",
1907 {kWorkgroupSize, 1, 1},
1908 {
1909 {
1910 ResourceType::kStorageBuffer,
1913 0,
1914 "inputBlock { uint4 in_data[]; }\n",
1915 },
1916 {
1917 ResourceType::kStorageBuffer,
1920 1,
1921 "outputBlock { uint4 out_data[]; }\n",
1922 }
1923 }) {}
1924 ~CopyBuffer() override = default;
1925
1927 return R"(
1928 void main() {
1929 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x];
1930 }
1931 )";
1932 }
1933 } copyStep;
1934
1935 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1937
1938 constexpr size_t kElementCount = 4 * kWorkgroupSize;
1939 constexpr size_t kBufferSize =
sizeof(uint32_t) * kElementCount;
1940 auto input = recorder->priv().drawBufferManager()->getStorage(
kBufferSize);
1941 auto [
_,
output] = recorder->priv().drawBufferManager()->getStoragePointer(
kBufferSize);
1942
1944
1945
1947 builder.appendStep(&garbageStep, {{1, 1, 1}});
1949
1950
1954 builder.appendStep(©Step, {{1, 1, 1}});
1956
1958
1959 auto outputBuffer = sync_buffer_to_cpu(recorder.get(),
output.fBuffer);
1960
1961
1962 std::unique_ptr<Recording> recording = submit_recording(context, testContext, recorder.get());
1963 if (!recording) {
1965 return;
1966 }
1967
1968
1969 uint32_t* outData = static_cast<uint32_t*>(
1970 map_buffer(context, testContext, outputBuffer.get(),
output.fOffset));
1971 SkASSERT(outputBuffer->isMapped() && outData !=
nullptr);
1972 for (unsigned int i = 0; i < kElementCount; ++i) {
1973 const uint32_t found = outData[i];
1975 }
1976}
1977
1980 context,
1981 testContext) {
1982
1983
1984 constexpr uint32_t kWorkgroupSize = 64;
1985
1986
1988 public:
1990 "FillWithGarbage",
1991 {kWorkgroupSize, 1, 1},
1992 {
1993 {
1994 ResourceType::kStorageBuffer,
1997 0,
1998 "outputBlock { uint4 out_data[]; }\n",
1999 }
2000 }) {}
2001 ~FillWithGarbage() override = default;
2002
2004 return R"(
2005 void main() {
2006 out_data[sk_GlobalInvocationID.x] = uint4(0xFE);
2007 }
2008 )";
2009 }
2010 } garbageStep;
2011
2012
2013
2015 public:
2017 "CopyBuffer",
2018 {kWorkgroupSize, 1, 1},
2019 {
2020 {
2021 ResourceType::kStorageBuffer,
2024 0,
2025 "inputBlock { uint4 in_data[]; }\n",
2026 },
2027 {
2028 ResourceType::kStorageBuffer,
2031 1,
2032 "outputBlock { uint4 out_data[]; }\n",
2033 }
2034 }) {}
2035 ~CopyBuffer() override = default;
2036
2038 return R"(
2039 void main() {
2040 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x];
2041 }
2042 )";
2043 }
2044 } copyStep;
2045
2046 std::unique_ptr<Recorder> recorder = context->makeRecorder();
2048
2049 constexpr size_t kElementCount = 4 * kWorkgroupSize;
2050 constexpr size_t kBufferSize =
sizeof(uint32_t) * kElementCount;
2051 auto [
_,
output] = recorder->priv().drawBufferManager()->getStoragePointer(
kBufferSize);
2052
2054
2055
2056 {
2057 auto scratch = recorder->priv().drawBufferManager()->getScratchStorage(
kBufferSize);
2060
2061
2062 }
2063 builder.appendStep(&garbageStep, {{1, 1, 1}});
2065
2066
2068 {
2069 auto scratch = recorder->priv().drawBufferManager()->getScratchStorage(
kBufferSize);
2072 }
2074 builder.appendStep(©Step, {{1, 1, 1}});
2076
2078
2079 auto outputBuffer = sync_buffer_to_cpu(recorder.get(),
output.fBuffer);
2080
2081
2082 std::unique_ptr<Recording> recording = submit_recording(context, testContext, recorder.get());
2083 if (!recording) {
2085 return;
2086 }
2087
2088
2089 uint32_t* outData = static_cast<uint32_t*>(
2090 map_buffer(context, testContext, outputBuffer.get(),
output.fOffset));
2091 SkASSERT(outputBuffer->isMapped() && outData !=
nullptr);
2092 for (unsigned int i = 0; i < kElementCount; ++i) {
2093 const uint32_t found = outData[i];
2095 }
2096}
2097
2100 context,
2101 testContext) {
2102
2103 if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {
2104 return;
2105 }
2106
2107 std::unique_ptr<Recorder> recorder = context->makeRecorder();
2108
2109 constexpr uint32_t kWorkgroupCount = 32;
2110 constexpr uint32_t kWorkgroupSize = 64;
2111
2112
2113
2114
2116 public:
2117 IndirectStep()
2119 "TestIndirectDispatch_IndirectStep",
2120 {kWorkgroupSize, 1, 1},
2121
2122 {{
2123 ResourceType::kIndirectBuffer,
2126 0,
2127
2128
2129
2130 "ssbo { uint indirect[]; }",
2131 }}) {}
2132 ~IndirectStep() override = default;
2133
2134
2135
2137 return R"(
2138 // This needs to match `kWorkgroupCount` declared above.
2139 const uint kWorkgroupCount = 32;
2140
2141 void main() {
2142 if (sk_LocalInvocationID.x == 0) {
2143 indirect[0] = kWorkgroupCount;
2144 indirect[1] = 1;
2145 indirect[2] = 1;
2146 }
2147 }
2148 )";
2149 }
2150
2154 SkASSERT(r.fFlow == DataFlow::kShared);
2156 }
2157
2160 }
2161 } indirectStep;
2162
2164 public:
2165 CountStep()
2167 "TestIndirectDispatch_CountStep",
2168 {kWorkgroupSize, 1, 1},
2169
2170 {{
2171 ResourceType::kStorageBuffer,
2174 1,
2175 "ssbo { atomicUint globalCounter; }",
2176 }}) {}
2177 ~CountStep() override = default;
2178
2180 return R"(
2181 workgroup atomicUint localCounter;
2182
2183 void main() {
2184 // Initialize the local counter.
2185 if (sk_LocalInvocationID.x == 0) {
2186 atomicStore(localCounter, 0);
2187 }
2188
2189 // Synchronize the threads in the workgroup so they all see the initial value.
2190 workgroupBarrier();
2191
2192 // All threads increment the counter.
2193 atomicAdd(localCounter, 1);
2194
2195 // Synchronize the threads again to ensure they have all executed the increment
2196 // and the following load reads the same value across all threads in the
2197 // workgroup.
2198 workgroupBarrier();
2199
2200 // Add the workgroup-only tally to the global counter.
2201 if (sk_LocalInvocationID.x == 0) {
2202 atomicAdd(globalCounter, atomicLoad(localCounter));
2203 }
2204 }
2205 )";
2206 }
2207
2211 SkASSERT(r.fFlow == DataFlow::kShared);
2212 return sizeof(uint32_t);
2213 }
2214
2216 const ResourceDesc& r,
2218 size_t bufferSize) const override {
2220 *
static_cast<uint32_t*
>(
buffer) = 0;
2221 }
2222 } countStep;
2223
2225 builder.appendStep(&indirectStep);
2227 if (!indirectBufferInfo) {
2229 return;
2230 }
2232
2236 return;
2237 }
2238
2239
2243
2244
2245 auto buffer = sync_buffer_to_cpu(recorder.get(),
info.fBuffer);
2246
2247
2248 std::unique_ptr<Recording> recording = recorder->snap();
2249 if (!recording) {
2251 return;
2252 }
2253
2256 context->insertRecording(insertInfo);
2257 testContext->syncedSubmit(context);
2258
2259
2260 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
2261 const uint32_t
result =
static_cast<const uint32_t*
>(
2262 map_buffer(context, testContext,
buffer.get(),
info.fOffset))[0];
2264 result == kExpectedCount,
2265 "expected '%u', found '%u'",
2266 kExpectedCount,
2268}
2269
2272 context,
2273 testContext) {
2274 std::unique_ptr<Recorder> recorder = context->makeRecorder();
2275
2276 constexpr uint32_t kWorkgroupCount = 32;
2277 constexpr uint32_t kWorkgroupSize = 1024;
2278
2280 public:
2282 "TestAtomicOperationsMetal",
2283 {kWorkgroupSize, 1, 1},
2284 {
2285 {
2286 ResourceType::kStorageBuffer,
2289 0,
2290 }
2291 },
2292 {},
2294 ~TestComputeStep() override = default;
2295
2298 static constexpr std::string_view
kSource = R
"(
2299 #include <metal_stdlib>
2300
2301 using namespace metal;
2302
2303 kernel void atomicCount(uint3 localId [[thread_position_in_threadgroup]],
2304 device atomic_uint& globalCounter [[buffer(0)]]) {
2305 threadgroup atomic_uint localCounter;
2306
2307 // Initialize the local counter.
2308 if (localId.x == 0u) {
2309 atomic_store_explicit(&localCounter, 0u, memory_order_relaxed);
2310 }
2311
2312 // Synchronize the threads in the workgroup so they all see the initial value.
2313 threadgroup_barrier(mem_flags::mem_threadgroup);
2314
2315 // All threads increment the counter.
2316 atomic_fetch_add_explicit(&localCounter, 1u, memory_order_relaxed);
2317
2318 // Synchronize the threads again to ensure they have all executed the increment
2319 // and the following load reads the same value across all threads in the
2320 // workgroup.
2321 threadgroup_barrier(mem_flags::mem_threadgroup);
2322
2323 // Add the workgroup-only tally to the global counter.
2324 if (localId.x == 0u) {
2325 uint tally = atomic_load_explicit(&localCounter, memory_order_relaxed);
2326 atomic_fetch_add_explicit(&globalCounter, tally, memory_order_relaxed);
2327 }
2328 }
2329 )";
2330 return {
kSource,
"atomicCount"};
2331 }
2332
2336 SkASSERT(r.fFlow == DataFlow::kShared);
2337 return sizeof(uint32_t);
2338 }
2339
2342 }
2343
2345 const ResourceDesc& r,
2347 size_t bufferSize) const override {
2349 *
static_cast<uint32_t*
>(
buffer) = 0;
2350 }
2352
2355
2359 return;
2360 }
2361
2362
2366
2367
2368 auto buffer = sync_buffer_to_cpu(recorder.get(),
info.fBuffer);
2369
2370
2371 std::unique_ptr<Recording> recording = recorder->snap();
2372 if (!recording) {
2374 return;
2375 }
2376
2379 context->insertRecording(insertInfo);
2380 testContext->syncedSubmit(context);
2381
2382
2383 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
2384 const uint32_t
result =
static_cast<const uint32_t*
>(
2385 map_buffer(context, testContext,
buffer.get(),
info.fOffset))[0];
2387 result == kExpectedCount,
2388 "expected '%u', found '%u'",
2389 kExpectedCount,
2391}
2392
2395 context,
2396 testContext) {
2397 std::unique_ptr<Recorder> recorder = context->makeRecorder();
2398
2399 constexpr uint32_t kWorkgroupCount = 32;
2400 constexpr uint32_t kWorkgroupSize = 1024;
2401
2403 public:
2405 "TestAtomicOperationsMetal",
2406 {kWorkgroupSize, 1, 1},
2407 {
2408 {
2409 ResourceType::kStorageBuffer,
2412 0,
2413 }
2414 },
2415 {
2416 {
2417 sizeof(uint32_t),
2418 0u,
2419 }
2420 },
2422 ~TestComputeStep() override = default;
2423
2424
2425
2426
2429 static constexpr std::string_view
kSource = R
"(
2430 #include <metal_stdlib>
2431
2432 using namespace metal;
2433
2434 kernel void atomicCount(uint3 localId [[thread_position_in_threadgroup]],
2435 device atomic_uint& globalCounter [[buffer(0)]],
2436 threadgroup atomic_uint& localCounter [[threadgroup(0)]]) {
2437 // Initialize the local counter.
2438 if (localId.x == 0u) {
2439 atomic_store_explicit(&localCounter, 0u, memory_order_relaxed);
2440 }
2441
2442 // Synchronize the threads in the workgroup so they all see the initial value.
2443 threadgroup_barrier(mem_flags::mem_threadgroup);
2444
2445 // All threads increment the counter.
2446 atomic_fetch_add_explicit(&localCounter, 1u, memory_order_relaxed);
2447
2448 // Synchronize the threads again to ensure they have all executed the increment
2449 // and the following load reads the same value across all threads in the
2450 // workgroup.
2451 threadgroup_barrier(mem_flags::mem_threadgroup);
2452
2453 // Add the workgroup-only tally to the global counter.
2454 if (localId.x == 0u) {
2455 uint tally = atomic_load_explicit(&localCounter, memory_order_relaxed);
2456 atomic_fetch_add_explicit(&globalCounter, tally, memory_order_relaxed);
2457 }
2458 }
2459 )";
2460 return {
kSource,
"atomicCount"};
2461 }
2462
2466 SkASSERT(r.fFlow == DataFlow::kShared);
2467 return sizeof(uint32_t);
2468 }
2469
2472 }
2473
2475 const ResourceDesc& r,
2477 size_t bufferSize) const override {
2479 *
static_cast<uint32_t*
>(
buffer) = 0;
2480 }
2482
2485
2489 return;
2490 }
2491
2492
2496
2497
2498 auto buffer = sync_buffer_to_cpu(recorder.get(),
info.fBuffer);
2499
2500
2501 std::unique_ptr<Recording> recording = recorder->snap();
2502 if (!recording) {
2504 return;
2505 }
2506
2509 context->insertRecording(insertInfo);
2510 testContext->syncedSubmit(context);
2511
2512
2513 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
2514 const uint32_t
result =
static_cast<const uint32_t*
>(
2515 map_buffer(context, testContext,
buffer.get(),
info.fOffset))[0];
2517 result == kExpectedCount,
2518 "expected '%u', found '%u'",
2519 kExpectedCount,
2521}
2522
2524
2525 if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {
2526 return;
2527 }
2528
2529 std::unique_ptr<Recorder> recorder = context->makeRecorder();
2530
2531 constexpr uint32_t kWorkgroupCount = 32;
2532 constexpr uint32_t kWorkgroupSize = 256;
2533
2535 public:
2537 "TestAtomicOperationsWGSL",
2538 {kWorkgroupSize, 1, 1},
2539 {
2540 {
2541 ResourceType::kStorageBuffer,
2544 0,
2545 }
2546 },
2547 {},
2549 ~TestComputeStep() override = default;
2550
2553 static constexpr std::string_view
kSource = R
"(
2554 @group(0) @binding(0) var<storage, read_write> globalCounter: atomic<u32>;
2555
2556 var<workgroup> localCounter: atomic<u32>;
2557
2558 @compute @workgroup_size(256)
2559 fn atomicCount(@builtin(local_invocation_id) localId: vec3u) {
2560 // Initialize the local counter.
2561 if localId.x == 0u {
2562 atomicStore(&localCounter, 0u);
2563 }
2564
2565 // Synchronize the threads in the workgroup so they all see the initial value.
2566 workgroupBarrier();
2567
2568 // All threads increment the counter.
2569 atomicAdd(&localCounter, 1u);
2570
2571 // Synchronize the threads again to ensure they have all executed the increment
2572 // and the following load reads the same value across all threads in the
2573 // workgroup.
2574 workgroupBarrier();
2575
2576 // Add the workgroup-only tally to the global counter.
2577 if localId.x == 0u {
2578 let tally = atomicLoad(&localCounter);
2579 atomicAdd(&globalCounter, tally);
2580 }
2581 }
2582 )";
2583 return {
kSource,
"atomicCount"};
2584 }
2585
2589 SkASSERT(r.fFlow == DataFlow::kShared);
2590 return sizeof(uint32_t);
2591 }
2592
2595 }
2596
2598 const ResourceDesc& r,
2600 size_t bufferSize) const override {
2602 *
static_cast<uint32_t*
>(
buffer) = 0;
2603 }
2605
2608
2612 return;
2613 }
2614
2615
2619
2620
2621 auto buffer = sync_buffer_to_cpu(recorder.get(),
info.fBuffer);
2622
2623
2624 std::unique_ptr<Recording> recording = recorder->snap();
2625 if (!recording) {
2627 return;
2628 }
2629
2632 context->insertRecording(insertInfo);
2633 testContext->syncedSubmit(context);
2634
2635
2636 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
2637 const uint32_t
result =
static_cast<const uint32_t*
>(
2638 map_buffer(context, testContext,
buffer.get(),
info.fOffset))[0];
2640 result == kExpectedCount,
2641 "expected '%u', found '%u'",
2642 kExpectedCount,
2644}
static int step(int x, SkScalar min, SkScalar max)
#define DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS( name, reporter, graphite_context, test_context)
static void info(const char *fmt,...) SK_PRINTF_LIKE(1
@ kRGBA_8888_SkColorType
pixel with 8 bits for red, green, blue, alpha; in 32-bit word
static constexpr SkColor SkColorSetARGB(U8CPU a, U8CPU r, U8CPU g, U8CPU b)
constexpr SkColor SK_ColorGREEN
static const size_t kBufferSize
#define DEF_GRAPHITE_TEST_FOR_DAWN_CONTEXT(name, reporter, graphite_context, test_context)
#define DEF_GRAPHITE_TEST_FOR_METAL_CONTEXT(name, reporter, graphite_context, test_context)
#define REPORTER_ASSERT(r, cond,...)
void allocPixels(const SkImageInfo &info, size_t rowBytes)
bool peekPixels(SkPixmap *pixmap) const
const SkImageInfo & info() const
SkColor4f getColor4f(int x, int y) const
uint32_t * writable_addr32(int x, int y) const
const void * addr() const
virtual SamplerDesc calculateSamplerParameters(int resourceIndex, const ResourceDesc &) const
virtual std::string computeSkSL() const
virtual WorkgroupSize calculateGlobalDispatchSize() const
virtual void prepareStorageBuffer(int resourceIndex, const ResourceDesc &resource, void *buffer, size_t bufferSize) const
virtual std::tuple< SkISize, SkColorType > calculateTextureParameters(int resourceIndex, const ResourceDesc &) const
virtual NativeShaderSource nativeShaderSource(NativeShaderFormat) const
virtual void prepareUniformBuffer(int resourceIndex, const ResourceDesc &, UniformManager *) const
virtual size_t calculateBufferSize(int resourceIndex, const ResourceDesc &) const
static sk_sp< ComputeTask > Make(DispatchGroupList dispatchGroups)
static sk_sp< TextureProxy > Make(const Caps *, ResourceProvider *, SkISize dimensions, const TextureInfo &, skgpu::Budgeted)
static UploadInstance Make(Recorder *, sk_sp< TextureProxy > targetProxy, const SkColorInfo &srcColorInfo, const SkColorInfo &dstColorInfo, SkSpan< const MipLevel > levels, const SkIRect &dstRect, std::unique_ptr< ConditionalUploadContext >)
static sk_sp< UploadTask > Make(UploadList *)
static const uint8_t buffer[]
uint32_t uint32_t * format
DEF_SWITCHES_START aot vmservice shared library Name of the *so containing AOT compiled Dart assets for launching the service isolate vm snapshot data
constexpr size_t kIndirectDispatchArgumentSize
static constexpr SkIRect MakeWH(int32_t w, int32_t h)
const SkColorInfo & colorInfo() const
static SkImageInfo Make(int width, int height, SkColorType ct, SkAlphaType at)
static SkRGBA4f FromBytes_RGBA(uint32_t color)
static SkRGBA4f FromColor(SkColor color)