Flutter Engine
The Flutter Engine
Loading...
Searching...
No Matches
ComputeTest.cpp
Go to the documentation of this file.
1/*
2 * Copyright 2022 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "tests/Test.h"
9
28
30
31using namespace skgpu::graphite;
32using namespace skiatest::graphite;
33
34namespace {
35
36void* map_buffer(Context* context,
39 size_t offset) {
41 if (context->priv().caps()->bufferMapsAreAsync()) {
42 buffer->asyncMap();
43 while (!buffer->isMapped()) {
44 testContext->tick();
45 }
46 }
47 std::byte* ptr = static_cast<std::byte*>(buffer->map());
48 SkASSERT(ptr);
49
50 return ptr + offset;
51}
52
53sk_sp<Buffer> sync_buffer_to_cpu(Recorder* recorder, const Buffer* buffer) {
54 if (recorder->priv().caps()->drawBufferCanBeMappedForReadback()) {
55 // `buffer` can be mapped directly, however it may still require a synchronization step
56 // by the underlying API (e.g. a managed buffer in Metal). SynchronizeToCpuTask
57 // automatically handles this for us.
59 return sk_ref_sp(buffer);
60 }
61
62 // The backend requires a transfer buffer for CPU read-back
63 auto xferBuffer =
64 recorder->priv().resourceProvider()->findOrCreateBuffer(buffer->size(),
65 BufferType::kXferGpuToCpu,
66 AccessPattern::kHostVisible,
67 "ComputeTest_TransferToCpu");
68 SkASSERT(xferBuffer);
69
71 /*srcOffset=*/0,
72 xferBuffer,
73 /*dstOffset=*/0,
74 buffer->size()));
75 return xferBuffer;
76}
77
78std::unique_ptr<Recording> submit_recording(Context* context,
79 GraphiteTestContext* testContext,
80 Recorder* recorder) {
81 std::unique_ptr<Recording> recording = recorder->snap();
82 if (!recording) {
83 return nullptr;
84 }
85
86 InsertRecordingInfo insertInfo;
87 insertInfo.fRecording = recording.get();
88 context->insertRecording(insertInfo);
89 testContext->syncedSubmit(context);
90
91 return recording;
92}
93
94bool is_dawn_or_metal_context_type(skiatest::GpuContextType ctxType) {
96}
97
98} // namespace
99
100#define DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS( \
101 name, reporter, graphite_context, test_context) \
102 DEF_GRAPHITE_TEST_FOR_CONTEXTS(name, \
103 is_dawn_or_metal_context_type, \
104 reporter, \
105 graphite_context, \
106 test_context, \
107 CtsEnforcement::kNever)
108
109// TODO(b/262427430, b/262429132): Enable this test on other backends once they all support
110// compute programs.
112 reporter,
113 context,
114 testContext) {
115 constexpr uint32_t kProblemSize = 512;
116 constexpr float kFactor = 4.f;
117
118 // The ComputeStep packs kProblemSize floats into kProblemSize / 4 vectors and each thread
119 // processes 1 vector at a time.
120 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
121
122 std::unique_ptr<Recorder> recorder = context->makeRecorder();
123
124 class TestComputeStep : public ComputeStep {
125 public:
126 // TODO(skia:40045541): SkSL doesn't support std430 layout well, so the buffers
127 // below all pack their data into vectors to be compatible with SPIR-V/WGSL.
128 TestComputeStep() : ComputeStep(
129 /*name=*/"TestArrayMultiply",
130 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
131 /*resources=*/{
132 // Input buffer:
133 {
134 // TODO(b/299979165): Declare this binding as read-only.
135 /*type=*/ResourceType::kStorageBuffer,
136 /*flow=*/DataFlow::kPrivate,
137 /*policy=*/ResourcePolicy::kMapped,
138 /*sksl=*/"inputBlock {\n"
139 " float factor;\n"
140 " layout(offset=16) float4 in_data[];\n"
141 "}",
142 },
143 // Output buffer:
144 {
145 /*type=*/ResourceType::kStorageBuffer,
146 /*flow=*/DataFlow::kShared, // shared to allow us to access it from the
147 // Builder
148 /*policy=*/ResourcePolicy::kMapped, // mappable for read-back
149 /*slot=*/0,
150 /*sksl=*/"outputBlock { float4 out_data[]; }",
151 }
152 }) {}
153 ~TestComputeStep() override = default;
154
155 // A kernel that multiplies a large array of floats by a supplied factor.
156 std::string computeSkSL() const override {
157 return R"(
158 void main() {
159 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor;
160 }
161 )";
162 }
163
164 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
165 if (index == 0) {
166 SkASSERT(r.fFlow == DataFlow::kPrivate);
167 return sizeof(float) * (kProblemSize + 4);
168 }
169 SkASSERT(index == 1);
170 SkASSERT(r.fSlot == 0);
171 SkASSERT(r.fFlow == DataFlow::kShared);
172 return sizeof(float) * kProblemSize;
173 }
174
175 void prepareStorageBuffer(int resourceIndex,
176 const ResourceDesc& r,
177 void* buffer,
178 size_t bufferSize) const override {
179 // Only initialize the input buffer.
180 if (resourceIndex != 0) {
181 return;
182 }
183 SkASSERT(r.fFlow == DataFlow::kPrivate);
184
185 size_t dataCount = sizeof(float) * (kProblemSize + 4);
186 SkASSERT(bufferSize == dataCount);
187 SkSpan<float> inData(static_cast<float*>(buffer), dataCount);
188 inData[0] = kFactor;
189 for (unsigned int i = 0; i < kProblemSize; ++i) {
190 inData[i + 4] = i + 1;
191 }
192 }
193
195 return WorkgroupSize(1, 1, 1);
196 }
197 } step;
198
199 DispatchGroup::Builder builder(recorder.get());
200 if (!builder.appendStep(&step)) {
201 ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");
202 return;
203 }
204
205 // The output buffer should have been placed in the right output slot.
206 BindBufferInfo outputInfo = builder.getSharedBufferResource(0);
207 if (!outputInfo) {
208 ERRORF(reporter, "Failed to allocate an output buffer at slot 0");
209 return;
210 }
211
212 // Record the compute task
214 groups.push_back(builder.finalize());
215 recorder->priv().add(ComputeTask::Make(std::move(groups)));
216
217 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
218 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.fBuffer);
219
220 // Submit the work and wait for it to complete.
221 std::unique_ptr<Recording> recording = recorder->snap();
222 if (!recording) {
223 ERRORF(reporter, "Failed to make recording");
224 return;
225 }
226
227 InsertRecordingInfo insertInfo;
228 insertInfo.fRecording = recording.get();
229 context->insertRecording(insertInfo);
230 testContext->syncedSubmit(context);
231
232 // Verify the contents of the output buffer.
233 float* outData = static_cast<float*>(
234 map_buffer(context, testContext, outputBuffer.get(), outputInfo.fOffset));
235 SkASSERT(outputBuffer->isMapped() && outData != nullptr);
236 for (unsigned int i = 0; i < kProblemSize; ++i) {
237 const float expected = (i + 1) * kFactor;
238 const float found = outData[i];
239 REPORTER_ASSERT(reporter, expected == found, "expected '%f', found '%f'", expected, found);
240 }
241}
242
243// TODO(b/262427430, b/262429132): Enable this test on other backends once they all support
244// compute programs.
246 reporter,
247 context,
248 testContext) {
249 // TODO(b/315834710): This fails on Dawn D3D11
250 if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {
251 return;
252 }
253
254 constexpr uint32_t kProblemSize = 512;
255 constexpr float kFactor1 = 4.f;
256 constexpr float kFactor2 = 3.f;
257
258 // The ComputeStep packs kProblemSize floats into kProblemSize / 4 vectors and each thread
259 // processes 1 vector at a time.
260 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
261
262 std::unique_ptr<Recorder> recorder = context->makeRecorder();
263
264 // Define two steps that perform two multiplication passes over the same input.
265
266 class TestComputeStep1 : public ComputeStep {
267 public:
268 // TODO(skia:40045541): SkSL doesn't support std430 layout well, so the buffers
269 // below all pack their data into vectors to be compatible with SPIR-V/WGSL.
270 TestComputeStep1() : ComputeStep(
271 /*name=*/"TestArrayMultiplyFirstPass",
272 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
273 /*resources=*/{
274 // Input buffer:
275 {
276 // TODO(b/299979165): Declare this binding as read-only.
277 /*type=*/ResourceType::kStorageBuffer,
278 /*flow=*/DataFlow::kPrivate,
279 /*policy=*/ResourcePolicy::kMapped, // mappable for read-back
280 /*sksl=*/"inputBlock {\n"
281 " float factor;\n"
282 " layout(offset=16) float4 in_data[];\n"
283 "}",
284 },
285 // Output buffers:
286 {
287 /*type=*/ResourceType::kStorageBuffer,
288 /*flow=*/DataFlow::kShared,
289 /*policy=*/ResourcePolicy::kNone, // GPU-only, read by second step
290 /*slot=*/0,
291 /*sksl=*/"outputBlock1 { float4 forward_data[]; }",
292 },
293 {
294 /*type=*/ResourceType::kStorageBuffer,
295 /*flow=*/DataFlow::kShared,
296 /*policy=*/ResourcePolicy::kMapped, // mappable for read-back
297 /*slot=*/1,
298 /*sksl=*/"outputBlock2 { float2 extra_data; }",
299 }
300 }) {}
301 ~TestComputeStep1() override = default;
302
303 // A kernel that multiplies a large array of floats by a supplied factor.
304 std::string computeSkSL() const override {
305 return R"(
306 void main() {
307 uint idx = sk_GlobalInvocationID.x;
308 forward_data[idx] = in_data[idx] * factor;
309 if (idx == 0) {
310 extra_data.x = factor;
311 extra_data.y = 2 * factor;
312 }
313 }
314 )";
315 }
316
317 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
318 if (index == 0) {
319 SkASSERT(r.fFlow == DataFlow::kPrivate);
320 return sizeof(float) * (kProblemSize + 4);
321 }
322 if (index == 1) {
323 SkASSERT(r.fFlow == DataFlow::kShared);
324 SkASSERT(r.fSlot == 0);
325 return sizeof(float) * kProblemSize;
326 }
327
328 SkASSERT(index == 2);
329 SkASSERT(r.fSlot == 1);
330 SkASSERT(r.fFlow == DataFlow::kShared);
331 return 2 * sizeof(float);
332 }
333
334 void prepareStorageBuffer(int resourceIndex,
335 const ResourceDesc& r,
336 void* buffer,
337 size_t bufferSize) const override {
338 if (resourceIndex != 0) {
339 return;
340 }
341
342 size_t dataCount = sizeof(float) * (kProblemSize + 4);
343 SkASSERT(bufferSize == dataCount);
344 SkSpan<float> inData(static_cast<float*>(buffer), dataCount);
345 inData[0] = kFactor1;
346 for (unsigned int i = 0; i < kProblemSize; ++i) {
347 inData[i + 4] = i + 1;
348 }
349 }
350
352 return WorkgroupSize(1, 1, 1);
353 }
354 } step1;
355
356 class TestComputeStep2 : public ComputeStep {
357 public:
358 TestComputeStep2() : ComputeStep(
359 /*name=*/"TestArrayMultiplySecondPass",
360 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
361 /*resources=*/{
362 // Input buffer:
363 {
364 /*type=*/ResourceType::kStorageBuffer,
365 /*flow=*/DataFlow::kShared,
366 /*policy=*/ResourcePolicy::kNone, // GPU-only
367 /*slot=*/0, // this is the output from the first step
368 /*sksl=*/"inputBlock { float4 in_data[]; }",
369 },
370 {
371 /*type=*/ResourceType::kStorageBuffer,
372 /*flow=*/DataFlow::kPrivate,
373 /*policy=*/ResourcePolicy::kMapped,
374 /*sksl=*/"factorBlock { float factor; }"
375 },
376 // Output buffer:
377 {
378 /*type=*/ResourceType::kStorageBuffer,
379 /*flow=*/DataFlow::kShared,
380 /*policy=*/ResourcePolicy::kMapped, // mappable for read-back
381 /*slot=*/2,
382 /*sksl=*/"outputBlock { float4 out_data[]; }",
383 }
384 }) {}
385 ~TestComputeStep2() override = default;
386
387 // A kernel that multiplies a large array of floats by a supplied factor.
388 std::string computeSkSL() const override {
389 return R"(
390 void main() {
391 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor;
392 }
393 )";
394 }
395
396 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
397 SkASSERT(index != 0);
398 if (index == 1) {
399 SkASSERT(r.fFlow == DataFlow::kPrivate);
400 return sizeof(float) * 4;
401 }
402 SkASSERT(index == 2);
403 SkASSERT(r.fSlot == 2);
404 SkASSERT(r.fFlow == DataFlow::kShared);
405 return sizeof(float) * kProblemSize;
406 }
407
408 void prepareStorageBuffer(int resourceIndex,
409 const ResourceDesc& r,
410 void* buffer,
411 size_t bufferSize) const override {
412 if (resourceIndex != 1) {
413 return;
414 }
415 SkASSERT(r.fFlow == DataFlow::kPrivate);
416 *static_cast<float*>(buffer) = kFactor2;
417 }
418
420 return WorkgroupSize(1, 1, 1);
421 }
422 } step2;
423
424 DispatchGroup::Builder builder(recorder.get());
425 builder.appendStep(&step1);
426 builder.appendStep(&step2);
427
428 // Slots 0, 1, and 2 should all contain shared buffers. Slot 1 contains the extra output buffer
429 // from step 1 while slot 2 contains the result of the second multiplication pass from step 1.
430 // Slot 0 is not mappable.
432 std::holds_alternative<BufferView>(builder.outputTable().fSharedSlots[0]),
433 "shared resource at slot 0 is missing");
434 BindBufferInfo outputInfo = builder.getSharedBufferResource(2);
435 if (!outputInfo) {
436 ERRORF(reporter, "Failed to allocate an output buffer at slot 0");
437 return;
438 }
439
440 // Extra output buffer from step 1 (corresponding to 'outputBlock2')
441 BindBufferInfo extraOutputInfo = builder.getSharedBufferResource(1);
442 if (!extraOutputInfo) {
443 ERRORF(reporter, "shared resource at slot 1 is missing");
444 return;
445 }
446
447 // Record the compute task
449 groups.push_back(builder.finalize());
450 recorder->priv().add(ComputeTask::Make(std::move(groups)));
451
452 // Ensure the output buffers get synchronized to the CPU once the GPU submission has finished.
453 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.fBuffer);
454 auto extraOutputBuffer = sync_buffer_to_cpu(recorder.get(), extraOutputInfo.fBuffer);
455
456 // Submit the work and wait for it to complete.
457 std::unique_ptr<Recording> recording = recorder->snap();
458 if (!recording) {
459 ERRORF(reporter, "Failed to make recording");
460 return;
461 }
462
463 InsertRecordingInfo insertInfo;
464 insertInfo.fRecording = recording.get();
465 context->insertRecording(insertInfo);
466 testContext->syncedSubmit(context);
467
468 // Verify the contents of the output buffer from step 2
469 float* outData = static_cast<float*>(
470 map_buffer(context, testContext, outputBuffer.get(), outputInfo.fOffset));
471 SkASSERT(outputBuffer->isMapped() && outData != nullptr);
472 for (unsigned int i = 0; i < kProblemSize; ++i) {
473 const float expected = (i + 1) * kFactor1 * kFactor2;
474 const float found = outData[i];
475 REPORTER_ASSERT(reporter, expected == found, "expected '%f', found '%f'", expected, found);
476 }
477
478 // Verify the contents of the extra output buffer from step 1
479 float* extraOutData = static_cast<float*>(
480 map_buffer(context, testContext, extraOutputBuffer.get(), extraOutputInfo.fOffset));
481 SkASSERT(extraOutputBuffer->isMapped() && extraOutData != nullptr);
483 kFactor1 == extraOutData[0],
484 "expected '%f', found '%f'",
485 kFactor1,
486 extraOutData[0]);
488 2 * kFactor1 == extraOutData[1],
489 "expected '%f', found '%f'",
490 2 * kFactor2,
491 extraOutData[1]);
492}
493
494// TODO(b/262427430, b/262429132): Enable this test on other backends once they all support
495// compute programs.
497 reporter,
498 context,
499 testContext) {
500 // TODO(b/315834710): This fails on Dawn D3D11
501 if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {
502 return;
503 }
504
505 constexpr uint32_t kProblemSize = 512;
506 constexpr float kFactor = 4.f;
507
508 // The ComputeStep packs kProblemSize floats into kProblemSize / 4 vectors and each thread
509 // processes 1 vector at a time.
510 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
511
512 std::unique_ptr<Recorder> recorder = context->makeRecorder();
513
514 class TestComputeStep : public ComputeStep {
515 public:
516 TestComputeStep() : ComputeStep(
517 /*name=*/"TestArrayMultiply",
518 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
519 /*resources=*/{
520 // Uniform buffer:
521 {
522 /*type=*/ResourceType::kUniformBuffer,
523 /*flow=*/DataFlow::kPrivate,
524 /*policy=*/ResourcePolicy::kMapped,
525 /*sksl=*/"uniformBlock { float factor; }"
526 },
527 // Input buffer:
528 {
529 /*type=*/ResourceType::kStorageBuffer,
530 /*flow=*/DataFlow::kPrivate,
531 /*policy=*/ResourcePolicy::kMapped,
532 /*sksl=*/"inputBlock { float4 in_data[]; }",
533 },
534 // Output buffer:
535 {
536 /*type=*/ResourceType::kStorageBuffer,
537 /*flow=*/DataFlow::kShared, // shared to allow us to access it from the
538 // Builder
539 /*policy=*/ResourcePolicy::kMapped, // mappable for read-back
540 /*slot=*/0,
541 /*sksl=*/"outputBlock { float4 out_data[]; }",
542 }
543 }) {}
544 ~TestComputeStep() override = default;
545
546 // A kernel that multiplies a large array of floats by a supplied factor.
547 std::string computeSkSL() const override {
548 return R"(
549 void main() {
550 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor;
551 }
552 )";
553 }
554
555 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
556 if (index == 0) {
557 SkASSERT(r.fFlow == DataFlow::kPrivate);
558 return sizeof(float);
559 }
560 if (index == 1) {
561 SkASSERT(r.fFlow == DataFlow::kPrivate);
562 return sizeof(float) * kProblemSize;
563 }
564 SkASSERT(index == 2);
565 SkASSERT(r.fSlot == 0);
566 SkASSERT(r.fFlow == DataFlow::kShared);
567 return sizeof(float) * kProblemSize;
568 }
569
570 void prepareStorageBuffer(int resourceIndex,
571 const ResourceDesc& r,
572 void* buffer,
573 size_t bufferSize) const override {
574 // Only initialize the input storage buffer.
575 if (resourceIndex != 1) {
576 return;
577 }
578 SkASSERT(r.fFlow == DataFlow::kPrivate);
579 size_t dataCount = sizeof(float) * kProblemSize;
580 SkASSERT(bufferSize == dataCount);
581 SkSpan<float> inData(static_cast<float*>(buffer), dataCount);
582 for (unsigned int i = 0; i < kProblemSize; ++i) {
583 inData[i] = i + 1;
584 }
585 }
586
587 void prepareUniformBuffer(int resourceIndex,
588 const ResourceDesc&,
589 UniformManager* mgr) const override {
590 SkASSERT(resourceIndex == 0);
592 const Uniform uniforms[] = {{"factor", SkSLType::kFloat}};
593 mgr->setExpectedUniforms(uniforms);
594 )
595 mgr->write(kFactor);
596 }
597
599 return WorkgroupSize(1, 1, 1);
600 }
601 } step;
602
603 DispatchGroup::Builder builder(recorder.get());
604 if (!builder.appendStep(&step)) {
605 ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");
606 return;
607 }
608
609 // The output buffer should have been placed in the right output slot.
610 BindBufferInfo outputInfo = builder.getSharedBufferResource(0);
611 if (!outputInfo) {
612 ERRORF(reporter, "Failed to allocate an output buffer at slot 0");
613 return;
614 }
615
616 // Record the compute task
618 groups.push_back(builder.finalize());
619 recorder->priv().add(ComputeTask::Make(std::move(groups)));
620
621 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
622 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.fBuffer);
623
624 // Submit the work and wait for it to complete.
625 std::unique_ptr<Recording> recording = recorder->snap();
626 if (!recording) {
627 ERRORF(reporter, "Failed to make recording");
628 return;
629 }
630
631 InsertRecordingInfo insertInfo;
632 insertInfo.fRecording = recording.get();
633 context->insertRecording(insertInfo);
634 testContext->syncedSubmit(context);
635
636 // Verify the contents of the output buffer.
637 float* outData = static_cast<float*>(
638 map_buffer(context, testContext, outputBuffer.get(), outputInfo.fOffset));
639 SkASSERT(outputBuffer->isMapped() && outData != nullptr);
640 for (unsigned int i = 0; i < kProblemSize; ++i) {
641 const float expected = (i + 1) * kFactor;
642 const float found = outData[i];
643 REPORTER_ASSERT(reporter, expected == found, "expected '%f', found '%f'", expected, found);
644 }
645}
646
647// TODO(b/262427430, b/262429132): Enable this test on other backends once they all support
648// compute programs.
649DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_ExternallyAssignedBuffer,
650 reporter,
651 context,
652 testContext) {
653 constexpr uint32_t kProblemSize = 512;
654 constexpr float kFactor = 4.f;
655
656 // The ComputeStep packs kProblemSize floats into kProblemSize / 4 vectors and each thread
657 // processes 1 vector at a time.
658 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
659
660 std::unique_ptr<Recorder> recorder = context->makeRecorder();
661
662 class TestComputeStep : public ComputeStep {
663 public:
664 TestComputeStep() : ComputeStep(
665 /*name=*/"ExternallyAssignedBuffer",
666 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
667 /*resources=*/{
668 // Input buffer:
669 {
670 /*type=*/ResourceType::kStorageBuffer,
671 /*flow=*/DataFlow::kPrivate,
672 /*policy=*/ResourcePolicy::kMapped,
673 /*sksl=*/"inputBlock {\n"
674 " float factor;\n"
675 " layout(offset = 16) float4 in_data[];\n"
676 "}\n",
677 },
678 // Output buffer:
679 {
680 /*type=*/ResourceType::kStorageBuffer,
681 /*flow=*/DataFlow::kShared, // shared to allow us to access it from the
682 // Builder
683 /*policy=*/ResourcePolicy::kMapped, // mappable for read-back
684 /*slot=*/0,
685 /*sksl=*/"outputBlock { float4 out_data[]; }",
686 }
687 }) {}
688 ~TestComputeStep() override = default;
689
690 // A kernel that multiplies a large array of floats by a supplied factor.
691 std::string computeSkSL() const override {
692 return R"(
693 void main() {
694 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor;
695 }
696 )";
697 }
698
699 size_t calculateBufferSize(int resourceIndex, const ResourceDesc& r) const override {
700 SkASSERT(resourceIndex == 0);
701 SkASSERT(r.fFlow == DataFlow::kPrivate);
702 return sizeof(float) * (kProblemSize + 4);
703 }
704
705 void prepareStorageBuffer(int resourceIndex,
706 const ResourceDesc& r,
707 void* buffer,
708 size_t bufferSize) const override {
709 SkASSERT(resourceIndex == 0);
710 SkASSERT(r.fFlow == DataFlow::kPrivate);
711
712 size_t dataCount = sizeof(float) * (kProblemSize + 4);
713 SkASSERT(bufferSize == dataCount);
714 SkSpan<float> inData(static_cast<float*>(buffer), dataCount);
715 inData[0] = kFactor;
716 for (unsigned int i = 0; i < kProblemSize; ++i) {
717 inData[i + 4] = i + 1;
718 }
719 }
720 } step;
721
722 // We allocate a buffer and directly assign it to the DispatchGroup::Builder. The ComputeStep
723 // will not participate in the creation of this buffer.
724 auto [_, outputInfo] =
725 recorder->priv().drawBufferManager()->getStoragePointer(sizeof(float) * kProblemSize);
726 REPORTER_ASSERT(reporter, outputInfo, "Failed to allocate output buffer");
727
728 DispatchGroup::Builder builder(recorder.get());
729 builder.assignSharedBuffer({outputInfo, sizeof(float) * kProblemSize}, 0);
730
731 // Initialize the step with a pre-determined global size
732 if (!builder.appendStep(&step, {WorkgroupSize(1, 1, 1)})) {
733 ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");
734 return;
735 }
736
737 // Record the compute task
739 groups.push_back(builder.finalize());
740 recorder->priv().add(ComputeTask::Make(std::move(groups)));
741
742 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
743 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.fBuffer);
744
745 // Submit the work and wait for it to complete.
746 std::unique_ptr<Recording> recording = recorder->snap();
747 if (!recording) {
748 ERRORF(reporter, "Failed to make recording");
749 return;
750 }
751
752 InsertRecordingInfo insertInfo;
753 insertInfo.fRecording = recording.get();
754 context->insertRecording(insertInfo);
755 testContext->syncedSubmit(context);
756
757 // Verify the contents of the output buffer.
758 float* outData = static_cast<float*>(
759 map_buffer(context, testContext, outputBuffer.get(), outputInfo.fOffset));
760 SkASSERT(outputBuffer->isMapped() && outData != nullptr);
761 for (unsigned int i = 0; i < kProblemSize; ++i) {
762 const float expected = (i + 1) * kFactor;
763 const float found = outData[i];
764 REPORTER_ASSERT(reporter, expected == found, "expected '%f', found '%f'", expected, found);
765 }
766}
767
768// Tests the storage texture binding for a compute dispatch that writes the same color to every
769// pixel of a storage texture.
771 reporter,
772 context,
773 testContext) {
774 std::unique_ptr<Recorder> recorder = context->makeRecorder();
775
776 // For this test we allocate a 16x16 tile which is written to by a single workgroup of the same
777 // size.
778 constexpr uint32_t kDim = 16;
779
780 class TestComputeStep : public ComputeStep {
781 public:
782 TestComputeStep() : ComputeStep(
783 /*name=*/"TestStorageTexture",
784 /*localDispatchSize=*/{kDim, kDim, 1},
785 /*resources=*/{
786 {
787 /*type=*/ResourceType::kWriteOnlyStorageTexture,
788 /*flow=*/DataFlow::kShared,
789 /*policy=*/ResourcePolicy::kNone,
790 /*slot=*/0,
791 /*sksl=*/"dst",
792 }
793 }) {}
794 ~TestComputeStep() override = default;
795
796 std::string computeSkSL() const override {
797 return R"(
798 void main() {
799 textureWrite(dst, sk_LocalInvocationID.xy, half4(0.0, 1.0, 0.0, 1.0));
800 }
801 )";
802 }
803
804 std::tuple<SkISize, SkColorType> calculateTextureParameters(
805 int index, const ResourceDesc& r) const override {
806 return {{kDim, kDim}, kRGBA_8888_SkColorType};
807 }
808
810 return WorkgroupSize(1, 1, 1);
811 }
812 } step;
813
814 DispatchGroup::Builder builder(recorder.get());
815 if (!builder.appendStep(&step)) {
816 ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");
817 return;
818 }
819
820 sk_sp<TextureProxy> texture = builder.getSharedTextureResource(0);
821 if (!texture) {
822 ERRORF(reporter, "Shared resource at slot 0 is missing");
823 return;
824 }
825
826 // Record the compute task
828 groups.push_back(builder.finalize());
829 recorder->priv().add(ComputeTask::Make(std::move(groups)));
830
831 // Submit the work and wait for it to complete.
832 std::unique_ptr<Recording> recording = recorder->snap();
833 if (!recording) {
834 ERRORF(reporter, "Failed to make recording");
835 return;
836 }
837
838 InsertRecordingInfo insertInfo;
839 insertInfo.fRecording = recording.get();
840 context->insertRecording(insertInfo);
841 testContext->syncedSubmit(context);
842
844 SkImageInfo imgInfo =
846 bitmap.allocPixels(imgInfo);
847
848 SkPixmap pixels;
849 bool peekPixelsSuccess = bitmap.peekPixels(&pixels);
850 REPORTER_ASSERT(reporter, peekPixelsSuccess);
851
852 bool readPixelsSuccess = context->priv().readPixels(pixels, texture.get(), imgInfo, 0, 0);
853 REPORTER_ASSERT(reporter, readPixelsSuccess);
854
855 for (uint32_t x = 0; x < kDim; ++x) {
856 for (uint32_t y = 0; y < kDim; ++y) {
858 SkColor4f color = pixels.getColor4f(x, y);
859 REPORTER_ASSERT(reporter, expected == color,
860 "At position {%u, %u}, "
861 "expected {%.1f, %.1f, %.1f, %.1f}, "
862 "found {%.1f, %.1f, %.1f, %.1f}",
863 x, y,
864 expected.fR, expected.fG, expected.fB, expected.fA,
865 color.fR, color.fG, color.fB, color.fA);
866 }
867 }
868}
869
870// Tests the readonly texture binding for a compute dispatch that random-access reads from a
871// CPU-populated texture and copies it to a storage texture.
872DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_StorageTextureReadAndWrite,
873 reporter,
874 context,
875 testContext) {
876 std::unique_ptr<Recorder> recorder = context->makeRecorder();
877
878 // For this test we allocate a 16x16 tile which is written to by a single workgroup of the same
879 // size.
880 constexpr uint32_t kDim = 16;
881
882 class TestComputeStep : public ComputeStep {
883 public:
884 TestComputeStep() : ComputeStep(
885 /*name=*/"TestStorageTextureReadAndWrite",
886 /*localDispatchSize=*/{kDim, kDim, 1},
887 /*resources=*/{
888 {
889 /*type=*/ResourceType::kReadOnlyTexture,
890 /*flow=*/DataFlow::kShared,
891 /*policy=*/ResourcePolicy::kNone,
892 /*slot=*/0,
893 /*sksl=*/"src",
894 },
895 {
896 /*type=*/ResourceType::kWriteOnlyStorageTexture,
897 /*flow=*/DataFlow::kShared,
898 /*policy=*/ResourcePolicy::kNone,
899 /*slot=*/1,
900 /*sksl=*/"dst",
901 }
902 }) {}
903 ~TestComputeStep() override = default;
904
905 std::string computeSkSL() const override {
906 return R"(
907 void main() {
908 half4 color = textureRead(src, sk_LocalInvocationID.xy);
909 textureWrite(dst, sk_LocalInvocationID.xy, color);
910 }
911 )";
912 }
913
914 std::tuple<SkISize, SkColorType> calculateTextureParameters(
915 int index, const ResourceDesc& r) const override {
916 SkASSERT(index == 1);
917 return {{kDim, kDim}, kRGBA_8888_SkColorType};
918 }
919
921 return WorkgroupSize(1, 1, 1);
922 }
923 } step;
924
925 // Create and populate an input texture.
926 SkBitmap srcBitmap;
927 SkImageInfo srcInfo =
929 srcBitmap.allocPixels(srcInfo);
930 SkPixmap srcPixels;
931 bool srcPeekPixelsSuccess = srcBitmap.peekPixels(&srcPixels);
932 REPORTER_ASSERT(reporter, srcPeekPixelsSuccess);
933 for (uint32_t x = 0; x < kDim; ++x) {
934 for (uint32_t y = 0; y < kDim; ++y) {
935 *srcPixels.writable_addr32(x, y) =
936 SkColorSetARGB(255, x * 256 / kDim, y * 256 / kDim, 0);
937 }
938 }
939
941 skgpu::Mipmapped::kNo,
942 skgpu::Protected::kNo,
943 skgpu::Renderable::kNo);
944 sk_sp<TextureProxy> srcProxy = TextureProxy::Make(context->priv().caps(),
945 recorder->priv().resourceProvider(),
946 {kDim, kDim},
947 texInfo,
949 MipLevel mipLevel;
950 mipLevel.fPixels = srcPixels.addr();
951 mipLevel.fRowBytes = srcPixels.rowBytes();
953 srcProxy,
954 srcPixels.info().colorInfo(),
955 srcPixels.info().colorInfo(),
956 {mipLevel},
957 SkIRect::MakeWH(kDim, kDim),
958 std::make_unique<ImageUploadContext>());
959 if (!upload.isValid()) {
960 ERRORF(reporter, "Could not create UploadInstance");
961 return;
962 }
963 recorder->priv().add(UploadTask::Make(std::move(upload)));
964
965 DispatchGroup::Builder builder(recorder.get());
966
967 // Assign the input texture to slot 0. This corresponds to the ComputeStep's "src" texture
968 // binding.
969 builder.assignSharedTexture(std::move(srcProxy), 0);
970
971 if (!builder.appendStep(&step)) {
972 ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");
973 return;
974 }
975
976 sk_sp<TextureProxy> dst = builder.getSharedTextureResource(1);
977 if (!dst) {
978 ERRORF(reporter, "shared resource at slot 1 is missing");
979 return;
980 }
981
982 // Record the compute task
984 groups.push_back(builder.finalize());
985 recorder->priv().add(ComputeTask::Make(std::move(groups)));
986
987 // Submit the work and wait for it to complete.
988 std::unique_ptr<Recording> recording = recorder->snap();
989 if (!recording) {
990 ERRORF(reporter, "Failed to make recording");
991 return;
992 }
993
994 InsertRecordingInfo insertInfo;
995 insertInfo.fRecording = recording.get();
996 context->insertRecording(insertInfo);
997 testContext->syncedSubmit(context);
998
1000 SkImageInfo imgInfo =
1002 bitmap.allocPixels(imgInfo);
1003
1004 SkPixmap pixels;
1005 bool peekPixelsSuccess = bitmap.peekPixels(&pixels);
1006 REPORTER_ASSERT(reporter, peekPixelsSuccess);
1007
1008 bool readPixelsSuccess = context->priv().readPixels(pixels, dst.get(), imgInfo, 0, 0);
1009 REPORTER_ASSERT(reporter, readPixelsSuccess);
1010
1011 for (uint32_t x = 0; x < kDim; ++x) {
1012 for (uint32_t y = 0; y < kDim; ++y) {
1014 SkColorSetARGB(255, x * 256 / kDim, y * 256 / kDim, 0));
1015 SkColor4f color = pixels.getColor4f(x, y);
1016 REPORTER_ASSERT(reporter, expected == color,
1017 "At position {%u, %u}, "
1018 "expected {%.1f, %.1f, %.1f, %.1f}, "
1019 "found {%.1f, %.1f, %.1f, %.1f}",
1020 x, y,
1021 expected.fR, expected.fG, expected.fB, expected.fA,
1022 color.fR, color.fG, color.fB, color.fA);
1023 }
1024 }
1025}
1026
1028 reporter,
1029 context,
1030 testContext) {
1031 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1032
1033 // For this test we allocate a 16x16 tile which is written to by a single workgroup of the same
1034 // size.
1035 constexpr uint32_t kDim = 16;
1036
1037 class TestComputeStep : public ComputeStep {
1038 public:
1039 TestComputeStep() : ComputeStep(
1040 /*name=*/"TestReadOnlyStorageBuffer",
1041 /*localDispatchSize=*/{kDim, kDim, 1},
1042 /*resources=*/{
1043 {
1044 /*type=*/ResourceType::kReadOnlyStorageBuffer,
1045 /*flow=*/DataFlow::kShared,
1046 /*policy=*/ResourcePolicy::kMapped,
1047 /*slot=*/0,
1048 /*sksl=*/"src { uint in_data[]; }",
1049 },
1050 {
1051 /*type=*/ResourceType::kWriteOnlyStorageTexture,
1052 /*flow=*/DataFlow::kShared,
1053 /*policy=*/ResourcePolicy::kNone,
1054 /*slot=*/1,
1055 /*sksl=*/"dst",
1056 }
1057 }) {}
1058 ~TestComputeStep() override = default;
1059
1060 std::string computeSkSL() const override {
1061 return R"(
1062 void main() {
1063 uint ix = sk_LocalInvocationID.y * 16 + sk_LocalInvocationID.x;
1064 uint value = in_data[ix];
1065 half4 splat = half4(
1066 half(value & 0xFF),
1067 half((value >> 8) & 0xFF),
1068 half((value >> 16) & 0xFF),
1069 half((value >> 24) & 0xFF)
1070 );
1071 textureWrite(dst, sk_LocalInvocationID.xy, splat / 255.0);
1072 }
1073 )";
1074 }
1075
1076 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
1077 SkASSERT(index == 0);
1078 return kDim * kDim * sizeof(uint32_t);
1079 }
1080
1081 void prepareStorageBuffer(int index,
1082 const ResourceDesc&,
1083 void* buffer,
1084 size_t bufferSize) const override {
1085 SkASSERT(index == 0);
1086 SkASSERT(bufferSize == kDim * kDim * sizeof(uint32_t));
1087
1088 uint32_t* inputs = reinterpret_cast<uint32_t*>(buffer);
1089 for (uint32_t y = 0; y < kDim; ++y) {
1090 for (uint32_t x = 0; x < kDim; ++x) {
1091 uint32_t value =
1092 ((x * 256 / kDim) & 0xFF) | ((y * 256 / kDim) & 0xFF) << 8 | 255 << 24;
1093 *(inputs++) = value;
1094 }
1095 }
1096 }
1097
1098 std::tuple<SkISize, SkColorType> calculateTextureParameters(
1099 int index, const ResourceDesc& r) const override {
1100 SkASSERT(index == 1);
1101 return {{kDim, kDim}, kRGBA_8888_SkColorType};
1102 }
1103
1105 return WorkgroupSize(1, 1, 1);
1106 }
1107 } step;
1108
1109 DispatchGroup::Builder builder(recorder.get());
1110 if (!builder.appendStep(&step)) {
1111 ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");
1112 return;
1113 }
1114
1115 sk_sp<TextureProxy> dst = builder.getSharedTextureResource(1);
1116 if (!dst) {
1117 ERRORF(reporter, "shared resource at slot 1 is missing");
1118 return;
1119 }
1120
1121 // Record the compute task
1123 groups.push_back(builder.finalize());
1124 recorder->priv().add(ComputeTask::Make(std::move(groups)));
1125
1126 // Submit the work and wait for it to complete.
1127 std::unique_ptr<Recording> recording = recorder->snap();
1128 if (!recording) {
1129 ERRORF(reporter, "Failed to make recording");
1130 return;
1131 }
1132
1133 InsertRecordingInfo insertInfo;
1134 insertInfo.fRecording = recording.get();
1135 context->insertRecording(insertInfo);
1136 testContext->syncedSubmit(context);
1137
1139 SkImageInfo imgInfo =
1141 bitmap.allocPixels(imgInfo);
1142
1143 SkPixmap pixels;
1144 bool peekPixelsSuccess = bitmap.peekPixels(&pixels);
1145 REPORTER_ASSERT(reporter, peekPixelsSuccess);
1146
1147 bool readPixelsSuccess = context->priv().readPixels(pixels, dst.get(), imgInfo, 0, 0);
1148 REPORTER_ASSERT(reporter, readPixelsSuccess);
1149
1150 for (uint32_t x = 0; x < kDim; ++x) {
1151 for (uint32_t y = 0; y < kDim; ++y) {
1152 SkColor4f expected =
1153 SkColor4f::FromColor(SkColorSetARGB(255, x * 256 / kDim, y * 256 / kDim, 0));
1154 SkColor4f color = pixels.getColor4f(x, y);
1155 bool pass = true;
1156 for (int i = 0; i < 4; i++) {
1157 pass &= color[i] == expected[i];
1158 }
1160 "At position {%u, %u}, "
1161 "expected {%.1f, %.1f, %.1f, %.1f}, "
1162 "found {%.1f, %.1f, %.1f, %.1f}",
1163 x, y,
1164 expected.fR, expected.fG, expected.fB, expected.fA,
1165 color.fR, color.fG, color.fB, color.fA);
1166 }
1167 }
1168}
1169
1170// Tests that a texture written by one compute step can be sampled by a subsequent step.
1171DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_StorageTextureMultipleComputeSteps,
1172 reporter,
1173 context,
1174 testContext) {
1175 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1176
1177 // For this test we allocate a 16x16 tile which is written to by a single workgroup of the same
1178 // size.
1179 constexpr uint32_t kDim = 16;
1180
1181 // Writes to a texture in slot 0.
1182 class TestComputeStep1 : public ComputeStep {
1183 public:
1184 TestComputeStep1() : ComputeStep(
1185 /*name=*/"TestStorageTexturesFirstPass",
1186 /*localDispatchSize=*/{kDim, kDim, 1},
1187 /*resources=*/{
1188 {
1189 /*type=*/ResourceType::kWriteOnlyStorageTexture,
1190 /*flow=*/DataFlow::kShared,
1191 /*policy=*/ResourcePolicy::kNone,
1192 /*slot=*/0,
1193 /*sksl=*/"dst",
1194 }
1195 }) {}
1196 ~TestComputeStep1() override = default;
1197
1198 std::string computeSkSL() const override {
1199 return R"(
1200 void main() {
1201 textureWrite(dst, sk_LocalInvocationID.xy, half4(0.0, 1.0, 0.0, 1.0));
1202 }
1203 )";
1204 }
1205
1206 std::tuple<SkISize, SkColorType> calculateTextureParameters(
1207 int index, const ResourceDesc& r) const override {
1208 SkASSERT(index == 0);
1209 return {{kDim, kDim}, kRGBA_8888_SkColorType};
1210 }
1211
1213 return WorkgroupSize(1, 1, 1);
1214 }
1215 } step1;
1216
1217 // Reads from the texture in slot 0 and writes it to another texture in slot 1.
1218 class TestComputeStep2 : public ComputeStep {
1219 public:
1220 TestComputeStep2() : ComputeStep(
1221 /*name=*/"TestStorageTexturesSecondPass",
1222 /*localDispatchSize=*/{kDim, kDim, 1},
1223 /*resources=*/{
1224 {
1225 /*type=*/ResourceType::kReadOnlyTexture,
1226 /*flow=*/DataFlow::kShared,
1227 /*policy=*/ResourcePolicy::kNone,
1228 /*slot=*/0,
1229 /*sksl=*/"src",
1230 },
1231 {
1232 /*type=*/ResourceType::kWriteOnlyStorageTexture,
1233 /*flow=*/DataFlow::kShared,
1234 /*policy=*/ResourcePolicy::kNone,
1235 /*slot=*/1,
1236 /*sksl=*/"dst",
1237 }
1238 }) {}
1239 ~TestComputeStep2() override = default;
1240
1241 std::string computeSkSL() const override {
1242 return R"(
1243 void main() {
1244 half4 color = textureRead(src, sk_LocalInvocationID.xy);
1245 textureWrite(dst, sk_LocalInvocationID.xy, color);
1246 }
1247 )";
1248 }
1249
1250 std::tuple<SkISize, SkColorType> calculateTextureParameters(
1251 int index, const ResourceDesc& r) const override {
1252 SkASSERT(index == 1);
1253 return {{kDim, kDim}, kRGBA_8888_SkColorType};
1254 }
1255
1257 return WorkgroupSize(1, 1, 1);
1258 }
1259 } step2;
1260
1261 DispatchGroup::Builder builder(recorder.get());
1262 builder.appendStep(&step1);
1263 builder.appendStep(&step2);
1264
1265 sk_sp<TextureProxy> dst = builder.getSharedTextureResource(1);
1266 if (!dst) {
1267 ERRORF(reporter, "shared resource at slot 1 is missing");
1268 return;
1269 }
1270
1271 // Record the compute task
1273 groups.push_back(builder.finalize());
1274 recorder->priv().add(ComputeTask::Make(std::move(groups)));
1275
1276 // Submit the work and wait for it to complete.
1277 std::unique_ptr<Recording> recording = recorder->snap();
1278 if (!recording) {
1279 ERRORF(reporter, "Failed to make recording");
1280 return;
1281 }
1282
1283 InsertRecordingInfo insertInfo;
1284 insertInfo.fRecording = recording.get();
1285 context->insertRecording(insertInfo);
1286 testContext->syncedSubmit(context);
1287
1289 SkImageInfo imgInfo =
1291 bitmap.allocPixels(imgInfo);
1292
1293 SkPixmap pixels;
1294 bool peekPixelsSuccess = bitmap.peekPixels(&pixels);
1295 REPORTER_ASSERT(reporter, peekPixelsSuccess);
1296
1297 bool readPixelsSuccess = context->priv().readPixels(pixels, dst.get(), imgInfo, 0, 0);
1298 REPORTER_ASSERT(reporter, readPixelsSuccess);
1299
1300 for (uint32_t x = 0; x < kDim; ++x) {
1301 for (uint32_t y = 0; y < kDim; ++y) {
1303 SkColor4f color = pixels.getColor4f(x, y);
1304 REPORTER_ASSERT(reporter, expected == color,
1305 "At position {%u, %u}, "
1306 "expected {%.1f, %.1f, %.1f, %.1f}, "
1307 "found {%.1f, %.1f, %.1f, %.1f}",
1308 x, y,
1309 expected.fR, expected.fG, expected.fB, expected.fA,
1310 color.fR, color.fG, color.fB, color.fA);
1311 }
1312 }
1313}
1314
1315// Tests that a texture can be sampled by a compute step using a sampler.
1316// TODO(armansito): Once the previous TODO is done, add additional tests that exercise mixed use of
1317// texture, buffer, and sampler bindings.
1319 reporter,
1320 context,
1321 testContext) {
1322 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1323
1324 // The first ComputeStep initializes a 16x16 texture with a checkerboard pattern of alternating
1325 // red and black pixels. The second ComputeStep downsamples this texture into a 4x4 using
1326 // bilinear filtering at pixel borders, intentionally averaging the values of each 4x4 tile in
1327 // the source texture, and writes the result to the destination texture.
1328 constexpr uint32_t kSrcDim = 16;
1329 constexpr uint32_t kDstDim = 4;
1330
1331 class TestComputeStep1 : public ComputeStep {
1332 public:
1333 TestComputeStep1() : ComputeStep(
1334 /*name=*/"Test_SampledTexture_Init",
1335 /*localDispatchSize=*/{kSrcDim, kSrcDim, 1},
1336 /*resources=*/{
1337 {
1338 /*type=*/ResourceType::kWriteOnlyStorageTexture,
1339 /*flow=*/DataFlow::kShared,
1340 /*policy=*/ResourcePolicy::kNone,
1341 /*slot=*/0,
1342 /*sksl=*/"dst",
1343 }
1344 }) {}
1345 ~TestComputeStep1() override = default;
1346
1347 std::string computeSkSL() const override {
1348 return R"(
1349 void main() {
1350 uint2 c = sk_LocalInvocationID.xy;
1351 uint checkerBoardColor = (c.x + (c.y % 2)) % 2;
1352 textureWrite(dst, c, half4(checkerBoardColor, 0, 0, 1));
1353 }
1354 )";
1355 }
1356
1357 std::tuple<SkISize, SkColorType> calculateTextureParameters(
1358 int index, const ResourceDesc& r) const override {
1359 SkASSERT(index == 0);
1360 return {{kSrcDim, kSrcDim}, kRGBA_8888_SkColorType};
1361 }
1362
1364 return WorkgroupSize(1, 1, 1);
1365 }
1366 } step1;
1367
1368 class TestComputeStep2 : public ComputeStep {
1369 public:
1370 TestComputeStep2() : ComputeStep(
1371 /*name=*/"Test_SampledTexture_Sample",
1372 /*localDispatchSize=*/{kDstDim, kDstDim, 1},
1373 /*resources=*/{
1374 // Declare the storage texture before the sampled texture. This tests that
1375 // binding index assignment works consistently across all backends when a
1376 // sampler-less texture and a texture+sampler pair are intermixed and sampler
1377 // bindings aren't necessarily contiguous when the ranges are distinct.
1378 {
1379 /*type=*/ResourceType::kWriteOnlyStorageTexture,
1380 /*flow=*/DataFlow::kShared,
1381 /*policy=*/ResourcePolicy::kNone,
1382 /*slot=*/1,
1383 /*sksl=*/"dst",
1384 },
1385 {
1386 /*type=*/ResourceType::kSampledTexture,
1387 /*flow=*/DataFlow::kShared,
1388 /*policy=*/ResourcePolicy::kNone,
1389 /*slot=*/0,
1390 /*sksl=*/"src",
1391 }
1392 }) {}
1393 ~TestComputeStep2() override = default;
1394
1395 std::string computeSkSL() const override {
1396 return R"(
1397 void main() {
1398 // Normalize the 4x4 invocation indices and sample the source texture using
1399 // that.
1400 uint2 dstCoord = sk_LocalInvocationID.xy;
1401 const float2 dstSizeInv = float2(0.25, 0.25);
1402 float2 unormCoord = float2(dstCoord) * dstSizeInv;
1403
1404 // Use explicit LOD, as quad derivatives are not available to a compute shader.
1405 half4 color = sampleLod(src, unormCoord, 0);
1406 textureWrite(dst, dstCoord, color);
1407 }
1408 )";
1409 }
1410
1411 std::tuple<SkISize, SkColorType> calculateTextureParameters(
1412 int index, const ResourceDesc& r) const override {
1413 SkASSERT(index == 0 || index == 1);
1414 return {{kDstDim, kDstDim}, kRGBA_8888_SkColorType};
1415 }
1416
1417 SamplerDesc calculateSamplerParameters(int index, const ResourceDesc&) const override {
1418 SkASSERT(index == 1);
1419 // Use the repeat tile mode to sample an infinite checkerboard.
1420 constexpr SkTileMode kTileModes[2] = {SkTileMode::kRepeat, SkTileMode::kRepeat};
1421 return {SkFilterMode::kLinear, kTileModes};
1422 }
1423
1425 return WorkgroupSize(1, 1, 1);
1426 }
1427 } step2;
1428
1429 DispatchGroup::Builder builder(recorder.get());
1430 builder.appendStep(&step1);
1431 builder.appendStep(&step2);
1432
1433 sk_sp<TextureProxy> dst = builder.getSharedTextureResource(1);
1434 if (!dst) {
1435 ERRORF(reporter, "shared resource at slot 1 is missing");
1436 return;
1437 }
1438
1439 // Record the compute task
1441 groups.push_back(builder.finalize());
1442 recorder->priv().add(ComputeTask::Make(std::move(groups)));
1443
1444 // Submit the work and wait for it to complete.
1445 std::unique_ptr<Recording> recording = recorder->snap();
1446 if (!recording) {
1447 ERRORF(reporter, "Failed to make recording");
1448 return;
1449 }
1450
1451 InsertRecordingInfo insertInfo;
1452 insertInfo.fRecording = recording.get();
1453 context->insertRecording(insertInfo);
1454 testContext->syncedSubmit(context);
1455
1457 SkImageInfo imgInfo =
1459 bitmap.allocPixels(imgInfo);
1460
1461 SkPixmap pixels;
1462 bool peekPixelsSuccess = bitmap.peekPixels(&pixels);
1463 REPORTER_ASSERT(reporter, peekPixelsSuccess);
1464
1465 bool readPixelsSuccess = context->priv().readPixels(pixels, dst.get(), imgInfo, 0, 0);
1466 REPORTER_ASSERT(reporter, readPixelsSuccess);
1467
1468 for (uint32_t x = 0; x < kDstDim; ++x) {
1469 for (uint32_t y = 0; y < kDstDim; ++y) {
1470 SkColor4f color = pixels.getColor4f(x, y);
1471 REPORTER_ASSERT(reporter, color.fR > 0.49 && color.fR < 0.51,
1472 "At position {%u, %u}, "
1473 "expected red channel in range [0.49, 0.51], "
1474 "found {%.3f}",
1475 x, y, color.fR);
1476 }
1477 }
1478}
1479
1480// TODO(b/260622403): The shader tested here is identical to
1481// `resources/sksl/compute/AtomicsOperations.compute`. It would be nice to be able to exercise SkSL
1482// features like this as part of SkSLTest.cpp instead of as a graphite test.
1483// TODO(b/262427430, b/262429132): Enable this test on other backends once they all support
1484// compute programs.
1486 reporter,
1487 context,
1488 testContext) {
1489 // This fails on Dawn D3D11, b/315834710
1490 if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {
1491 return;
1492 }
1493
1494 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1495
1496 constexpr uint32_t kWorkgroupCount = 32;
1497 constexpr uint32_t kWorkgroupSize = 256;
1498
1499 class TestComputeStep : public ComputeStep {
1500 public:
1501 TestComputeStep() : ComputeStep(
1502 /*name=*/"TestAtomicOperations",
1503 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
1504 /*resources=*/{
1505 {
1506 /*type=*/ResourceType::kStorageBuffer,
1507 /*flow=*/DataFlow::kShared,
1508 /*policy=*/ResourcePolicy::kMapped,
1509 /*slot=*/0,
1510 /*sksl=*/"ssbo { atomicUint globalCounter; }",
1511 }
1512 }) {}
1513 ~TestComputeStep() override = default;
1514
1515 // A kernel that increments a global (device memory) counter across multiple workgroups.
1516 // Each workgroup maintains its own independent tally in a workgroup-shared counter which
1517 // is then added to the global count.
1518 //
1519 // This exercises atomic store/load/add and coherent reads and writes over memory in storage
1520 // and workgroup address spaces.
1521 std::string computeSkSL() const override {
1522 return R"(
1523 workgroup atomicUint localCounter;
1524
1525 void main() {
1526 // Initialize the local counter.
1527 if (sk_LocalInvocationID.x == 0) {
1528 atomicStore(localCounter, 0);
1529 }
1530
1531 // Synchronize the threads in the workgroup so they all see the initial value.
1532 workgroupBarrier();
1533
1534 // All threads increment the counter.
1535 atomicAdd(localCounter, 1);
1536
1537 // Synchronize the threads again to ensure they have all executed the increment
1538 // and the following load reads the same value across all threads in the
1539 // workgroup.
1540 workgroupBarrier();
1541
1542 // Add the workgroup-only tally to the global counter.
1543 if (sk_LocalInvocationID.x == 0) {
1544 atomicAdd(globalCounter, atomicLoad(localCounter));
1545 }
1546 }
1547 )";
1548 }
1549
1550 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
1551 SkASSERT(index == 0);
1552 SkASSERT(r.fSlot == 0);
1553 SkASSERT(r.fFlow == DataFlow::kShared);
1554 return sizeof(uint32_t);
1555 }
1556
1558 return WorkgroupSize(kWorkgroupCount, 1, 1);
1559 }
1560
1561 void prepareStorageBuffer(int resourceIndex,
1562 const ResourceDesc& r,
1563 void* buffer,
1564 size_t bufferSize) const override {
1565 SkASSERT(resourceIndex == 0);
1566 *static_cast<uint32_t*>(buffer) = 0;
1567 }
1568 } step;
1569
1570 DispatchGroup::Builder builder(recorder.get());
1571 builder.appendStep(&step);
1572
1573 BindBufferInfo info = builder.getSharedBufferResource(0);
1574 if (!info) {
1575 ERRORF(reporter, "shared resource at slot 0 is missing");
1576 return;
1577 }
1578
1579 // Record the compute pass task.
1581 groups.push_back(builder.finalize());
1582 recorder->priv().add(ComputeTask::Make(std::move(groups)));
1583
1584 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
1585 auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer);
1586
1587 // Submit the work and wait for it to complete.
1588 std::unique_ptr<Recording> recording = recorder->snap();
1589 if (!recording) {
1590 ERRORF(reporter, "Failed to make recording");
1591 return;
1592 }
1593
1594 InsertRecordingInfo insertInfo;
1595 insertInfo.fRecording = recording.get();
1596 context->insertRecording(insertInfo);
1597 testContext->syncedSubmit(context);
1598
1599 // Verify the contents of the output buffer.
1600 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
1601 const uint32_t result = static_cast<const uint32_t*>(
1602 map_buffer(context, testContext, buffer.get(), info.fOffset))[0];
1604 result == kExpectedCount,
1605 "expected '%u', found '%u'",
1606 kExpectedCount,
1607 result);
1608}
1609
1610// TODO(b/260622403): The shader tested here is identical to
1611// `resources/sksl/compute/AtomicsOperationsOverArrayAndStruct.compute`. It would be nice to be able
1612// to exercise SkSL features like this as part of SkSLTest.cpp instead of as a graphite test.
1613// TODO(b/262427430, b/262429132): Enable this test on other backends once they all support
1614// compute programs.
1615DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_AtomicOperationsOverArrayAndStructTest,
1616 reporter,
1617 context,
1618 testContext) {
1619 // This fails on Dawn D3D11, b/315834710
1620 if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {
1621 return;
1622 }
1623
1624 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1625
1626 constexpr uint32_t kWorkgroupCount = 32;
1627 constexpr uint32_t kWorkgroupSize = 256;
1628
1629 class TestComputeStep : public ComputeStep {
1630 public:
1631 TestComputeStep() : ComputeStep(
1632 /*name=*/"TestAtomicOperationsOverArrayAndStruct",
1633 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
1634 /*resources=*/{
1635 {
1636 /*type=*/ResourceType::kStorageBuffer,
1637 /*flow=*/DataFlow::kShared,
1638 /*policy=*/ResourcePolicy::kMapped,
1639 /*slot=*/0,
1640 /*sksl=*/"ssbo {\n"
1641 " atomicUint globalCountsFirstHalf;\n"
1642 " atomicUint globalCountsSecondHalf;\n"
1643 "}\n"
1644 }
1645 }) {}
1646 ~TestComputeStep() override = default;
1647
1648 // Construct a kernel that increments a two global (device memory) counters across multiple
1649 // workgroups. Each workgroup maintains its own independent tallies in workgroup-shared
1650 // counters which are then added to the global counts.
1651 //
1652 // This exercises atomic store/load/add and coherent reads and writes over memory in storage
1653 // and workgroup address spaces.
1654 std::string computeSkSL() const override {
1655 return R"(
1656 const uint WORKGROUP_SIZE = 256;
1657
1658 workgroup atomicUint localCounts[2];
1659
1660 void main() {
1661 // Initialize the local counts.
1662 if (sk_LocalInvocationID.x == 0) {
1663 atomicStore(localCounts[0], 0);
1664 atomicStore(localCounts[1], 0);
1665 }
1666
1667 // Synchronize the threads in the workgroup so they all see the initial value.
1668 workgroupBarrier();
1669
1670 // Each thread increments one of the local counters based on its invocation
1671 // index.
1672 uint idx = sk_LocalInvocationID.x < (WORKGROUP_SIZE / 2) ? 0 : 1;
1673 atomicAdd(localCounts[idx], 1);
1674
1675 // Synchronize the threads again to ensure they have all executed the increments
1676 // and the following load reads the same value across all threads in the
1677 // workgroup.
1678 workgroupBarrier();
1679
1680 // Add the workgroup-only tally to the global counter.
1681 if (sk_LocalInvocationID.x == 0) {
1682 atomicAdd(globalCountsFirstHalf, atomicLoad(localCounts[0]));
1683 atomicAdd(globalCountsSecondHalf, atomicLoad(localCounts[1]));
1684 }
1685 }
1686 )";
1687 }
1688
1689 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
1690 SkASSERT(index == 0);
1691 SkASSERT(r.fSlot == 0);
1692 SkASSERT(r.fFlow == DataFlow::kShared);
1693 return 2 * sizeof(uint32_t);
1694 }
1695
1697 return WorkgroupSize(kWorkgroupCount, 1, 1);
1698 }
1699
1700 void prepareStorageBuffer(int resourceIndex,
1701 const ResourceDesc& r,
1702 void* buffer,
1703 size_t bufferSize) const override {
1704 SkASSERT(resourceIndex == 0);
1705 uint32_t* data = static_cast<uint32_t*>(buffer);
1706 data[0] = 0;
1707 data[1] = 0;
1708 }
1709 } step;
1710
1711 DispatchGroup::Builder builder(recorder.get());
1712 builder.appendStep(&step);
1713
1714 BindBufferInfo info = builder.getSharedBufferResource(0);
1715 if (!info) {
1716 ERRORF(reporter, "shared resource at slot 0 is missing");
1717 return;
1718 }
1719
1720 // Record the compute pass task.
1722 groups.push_back(builder.finalize());
1723 recorder->priv().add(ComputeTask::Make(std::move(groups)));
1724
1725 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
1726 auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer);
1727
1728 // Submit the work and wait for it to complete.
1729 std::unique_ptr<Recording> recording = recorder->snap();
1730 if (!recording) {
1731 ERRORF(reporter, "Failed to make recording");
1732 return;
1733 }
1734
1735 InsertRecordingInfo insertInfo;
1736 insertInfo.fRecording = recording.get();
1737 context->insertRecording(insertInfo);
1738 testContext->syncedSubmit(context);
1739
1740 // Verify the contents of the output buffer.
1741 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize / 2;
1742
1743 const uint32_t* ssboData = static_cast<const uint32_t*>(
1744 map_buffer(context, testContext, buffer.get(), info.fOffset));
1745 const uint32_t firstHalfCount = ssboData[0];
1746 const uint32_t secondHalfCount = ssboData[1];
1748 firstHalfCount == kExpectedCount,
1749 "expected '%u', found '%u'",
1750 kExpectedCount,
1751 firstHalfCount);
1753 secondHalfCount == kExpectedCount,
1754 "expected '%u', found '%u'",
1755 kExpectedCount,
1756 secondHalfCount);
1757}
1758
1760 reporter,
1761 context,
1762 testContext) {
1763 constexpr uint32_t kProblemSize = 512;
1764
1765 // The ComputeStep packs kProblemSize floats into kProblemSize / 4 vectors and each thread
1766 // processes 1 vector at a time.
1767 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
1768
1769 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1770
1771 // The ComputeStep requests an unmapped buffer that is zero-initialized. It writes the output to
1772 // a mapped buffer which test verifies.
1773 class TestComputeStep : public ComputeStep {
1774 public:
1775 TestComputeStep() : ComputeStep(
1776 /*name=*/"TestClearedBuffer",
1777 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
1778 /*resources=*/{
1779 // Zero initialized input buffer
1780 {
1781 // TODO(b/299979165): Declare this binding as read-only.
1782 /*type=*/ResourceType::kStorageBuffer,
1783 /*flow=*/DataFlow::kPrivate,
1784 /*policy=*/ResourcePolicy::kClear,
1785 /*sksl=*/"inputBlock { uint4 in_data[]; }\n",
1786 },
1787 // Output buffer:
1788 {
1789 /*type=*/ResourceType::kStorageBuffer,
1790 /*flow=*/DataFlow::kShared, // shared to allow us to access it from the
1791 // Builder
1792 /*policy=*/ResourcePolicy::kMapped, // mappable for read-back
1793 /*slot=*/0,
1794 /*sksl=*/"outputBlock { uint4 out_data[]; }\n",
1795 }
1796 }) {}
1797 ~TestComputeStep() override = default;
1798
1799 std::string computeSkSL() const override {
1800 return R"(
1801 void main() {
1802 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x];
1803 }
1804 )";
1805 }
1806
1807 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
1808 return sizeof(uint32_t) * kProblemSize;
1809 }
1810
1811 void prepareStorageBuffer(int resourceIndex,
1812 const ResourceDesc& r,
1813 void* buffer,
1814 size_t bufferSize) const override {
1815 // Should receive this call only for the mapped buffer.
1816 SkASSERT(resourceIndex == 1);
1817 }
1818
1820 return WorkgroupSize(1, 1, 1);
1821 }
1822 } step;
1823
1824 DispatchGroup::Builder builder(recorder.get());
1825 if (!builder.appendStep(&step)) {
1826 ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");
1827 return;
1828 }
1829
1830 // The output buffer should have been placed in the right output slot.
1831 BindBufferInfo outputInfo = builder.getSharedBufferResource(0);
1832 if (!outputInfo) {
1833 ERRORF(reporter, "Failed to allocate an output buffer at slot 0");
1834 return;
1835 }
1836
1837 // Record the compute task
1839 groups.push_back(builder.finalize());
1840 recorder->priv().add(ComputeTask::Make(std::move(groups)));
1841
1842 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
1843 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.fBuffer);
1844
1845 // Submit the work and wait for it to complete.
1846 std::unique_ptr<Recording> recording = recorder->snap();
1847 if (!recording) {
1848 ERRORF(reporter, "Failed to make recording");
1849 return;
1850 }
1851
1852 InsertRecordingInfo insertInfo;
1853 insertInfo.fRecording = recording.get();
1854 context->insertRecording(insertInfo);
1855 testContext->syncedSubmit(context);
1856
1857 // Verify the contents of the output buffer.
1858 uint32_t* outData = static_cast<uint32_t*>(
1859 map_buffer(context, testContext, outputBuffer.get(), outputInfo.fOffset));
1860 SkASSERT(outputBuffer->isMapped() && outData != nullptr);
1861 for (unsigned int i = 0; i < kProblemSize; ++i) {
1862 const uint32_t found = outData[i];
1863 REPORTER_ASSERT(reporter, 0u == found, "expected '0u', found '%u'", found);
1864 }
1865}
1866
1868 reporter,
1869 context,
1870 testContext) {
1871 // Initiate two independent DispatchGroups operating on the same buffer. The first group
1872 // writes garbage to the buffer and the second group copies the contents to an output buffer.
1873 // This test validates that the reads, writes, and clear occur in the expected order.
1874 constexpr uint32_t kWorkgroupSize = 64;
1875
1876 // Initialize buffer with non-zero data.
1877 class FillWithGarbage : public ComputeStep {
1878 public:
1879 FillWithGarbage() : ComputeStep(
1880 /*name=*/"FillWithGarbage",
1881 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
1882 /*resources=*/{
1883 {
1884 /*type=*/ResourceType::kStorageBuffer,
1885 /*flow=*/DataFlow::kShared,
1886 /*policy=*/ResourcePolicy::kNone,
1887 /*slot=*/0,
1888 /*sksl=*/"outputBlock { uint4 out_data[]; }\n",
1889 }
1890 }) {}
1891 ~FillWithGarbage() override = default;
1892
1893 std::string computeSkSL() const override {
1894 return R"(
1895 void main() {
1896 out_data[sk_GlobalInvocationID.x] = uint4(0xFE);
1897 }
1898 )";
1899 }
1900 } garbageStep;
1901
1902 // Second stage just copies the data to a destination buffer. This is only to verify that this
1903 // stage, issued in a separate DispatchGroup, observes the clear.
1904 class CopyBuffer : public ComputeStep {
1905 public:
1906 CopyBuffer() : ComputeStep(
1907 /*name=*/"CopyBuffer",
1908 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
1909 /*resources=*/{
1910 {
1911 /*type=*/ResourceType::kStorageBuffer,
1912 /*flow=*/DataFlow::kShared,
1913 /*policy=*/ResourcePolicy::kNone,
1914 /*slot=*/0,
1915 /*sksl=*/"inputBlock { uint4 in_data[]; }\n",
1916 },
1917 {
1918 /*type=*/ResourceType::kStorageBuffer,
1919 /*flow=*/DataFlow::kShared,
1920 /*policy=*/ResourcePolicy::kNone,
1921 /*slot=*/1,
1922 /*sksl=*/"outputBlock { uint4 out_data[]; }\n",
1923 }
1924 }) {}
1925 ~CopyBuffer() override = default;
1926
1927 std::string computeSkSL() const override {
1928 return R"(
1929 void main() {
1930 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x];
1931 }
1932 )";
1933 }
1934 } copyStep;
1935
1936 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1937 DispatchGroup::Builder builder(recorder.get());
1938
1939 constexpr size_t kElementCount = 4 * kWorkgroupSize;
1940 constexpr size_t kBufferSize = sizeof(uint32_t) * kElementCount;
1941 auto input = recorder->priv().drawBufferManager()->getStorage(kBufferSize);
1942 auto [_, output] = recorder->priv().drawBufferManager()->getStoragePointer(kBufferSize);
1943
1945
1946 // First group.
1947 builder.assignSharedBuffer({input, kBufferSize}, 0);
1948 builder.appendStep(&garbageStep, {{1, 1, 1}});
1949 groups.push_back(builder.finalize());
1950
1951 // Second group.
1952 builder.reset();
1953 builder.assignSharedBuffer({input, kBufferSize}, 0, ClearBuffer::kYes);
1954 builder.assignSharedBuffer({output, kBufferSize}, 1);
1955 builder.appendStep(&copyStep, {{1, 1, 1}});
1956 groups.push_back(builder.finalize());
1957
1958 recorder->priv().add(ComputeTask::Make(std::move(groups)));
1959 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
1960 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), output.fBuffer);
1961
1962 // Submit the work and wait for it to complete.
1963 std::unique_ptr<Recording> recording = submit_recording(context, testContext, recorder.get());
1964 if (!recording) {
1965 ERRORF(reporter, "Failed to make recording");
1966 return;
1967 }
1968
1969 // Verify the contents of the output buffer.
1970 uint32_t* outData = static_cast<uint32_t*>(
1971 map_buffer(context, testContext, outputBuffer.get(), output.fOffset));
1972 SkASSERT(outputBuffer->isMapped() && outData != nullptr);
1973 for (unsigned int i = 0; i < kElementCount; ++i) {
1974 const uint32_t found = outData[i];
1975 REPORTER_ASSERT(reporter, 0u == found, "expected '0u', found '%u'", found);
1976 }
1977}
1978
1979DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_ClearOrderingScratchBuffers,
1980 reporter,
1981 context,
1982 testContext) {
1983 // This test is the same as the ClearOrdering test but the two stages write to a recycled
1984 // ScratchBuffer. This is primarily to test ScratchBuffer reuse.
1985 constexpr uint32_t kWorkgroupSize = 64;
1986
1987 // Initialize buffer with non-zero data.
1988 class FillWithGarbage : public ComputeStep {
1989 public:
1990 FillWithGarbage() : ComputeStep(
1991 /*name=*/"FillWithGarbage",
1992 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
1993 /*resources=*/{
1994 {
1995 /*type=*/ResourceType::kStorageBuffer,
1996 /*flow=*/DataFlow::kShared,
1997 /*policy=*/ResourcePolicy::kNone,
1998 /*slot=*/0,
1999 /*sksl=*/"outputBlock { uint4 out_data[]; }\n",
2000 }
2001 }) {}
2002 ~FillWithGarbage() override = default;
2003
2004 std::string computeSkSL() const override {
2005 return R"(
2006 void main() {
2007 out_data[sk_GlobalInvocationID.x] = uint4(0xFE);
2008 }
2009 )";
2010 }
2011 } garbageStep;
2012
2013 // Second stage just copies the data to a destination buffer. This is only to verify that this
2014 // stage (issued in a separate DispatchGroup) sees the changes.
2015 class CopyBuffer : public ComputeStep {
2016 public:
2017 CopyBuffer() : ComputeStep(
2018 /*name=*/"CopyBuffer",
2019 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
2020 /*resources=*/{
2021 {
2022 /*type=*/ResourceType::kStorageBuffer,
2023 /*flow=*/DataFlow::kShared,
2024 /*policy=*/ResourcePolicy::kNone,
2025 /*slot=*/0,
2026 /*sksl=*/"inputBlock { uint4 in_data[]; }\n",
2027 },
2028 {
2029 /*type=*/ResourceType::kStorageBuffer,
2030 /*flow=*/DataFlow::kShared,
2031 /*policy=*/ResourcePolicy::kNone,
2032 /*slot=*/1,
2033 /*sksl=*/"outputBlock { uint4 out_data[]; }\n",
2034 }
2035 }) {}
2036 ~CopyBuffer() override = default;
2037
2038 std::string computeSkSL() const override {
2039 return R"(
2040 void main() {
2041 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x];
2042 }
2043 )";
2044 }
2045 } copyStep;
2046
2047 std::unique_ptr<Recorder> recorder = context->makeRecorder();
2048 DispatchGroup::Builder builder(recorder.get());
2049
2050 constexpr size_t kElementCount = 4 * kWorkgroupSize;
2051 constexpr size_t kBufferSize = sizeof(uint32_t) * kElementCount;
2052 auto [_, output] = recorder->priv().drawBufferManager()->getStoragePointer(kBufferSize);
2053
2055
2056 // First group.
2057 {
2058 auto scratch = recorder->priv().drawBufferManager()->getScratchStorage(kBufferSize);
2059 auto input = scratch.suballocate(kBufferSize);
2060 builder.assignSharedBuffer({input, kBufferSize}, 0);
2061
2062 // `scratch` returns to the scratch buffer pool when it goes out of scope
2063 }
2064 builder.appendStep(&garbageStep, {{1, 1, 1}});
2065 groups.push_back(builder.finalize());
2066
2067 // Second group.
2068 builder.reset();
2069 {
2070 auto scratch = recorder->priv().drawBufferManager()->getScratchStorage(kBufferSize);
2071 auto input = scratch.suballocate(kBufferSize);
2072 builder.assignSharedBuffer({input, kBufferSize}, 0, ClearBuffer::kYes);
2073 }
2074 builder.assignSharedBuffer({output, kBufferSize}, 1);
2075 builder.appendStep(&copyStep, {{1, 1, 1}});
2076 groups.push_back(builder.finalize());
2077
2078 recorder->priv().add(ComputeTask::Make(std::move(groups)));
2079 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
2080 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), output.fBuffer);
2081
2082 // Submit the work and wait for it to complete.
2083 std::unique_ptr<Recording> recording = submit_recording(context, testContext, recorder.get());
2084 if (!recording) {
2085 ERRORF(reporter, "Failed to make recording");
2086 return;
2087 }
2088
2089 // Verify the contents of the output buffer.
2090 uint32_t* outData = static_cast<uint32_t*>(
2091 map_buffer(context, testContext, outputBuffer.get(), output.fOffset));
2092 SkASSERT(outputBuffer->isMapped() && outData != nullptr);
2093 for (unsigned int i = 0; i < kElementCount; ++i) {
2094 const uint32_t found = outData[i];
2095 REPORTER_ASSERT(reporter, 0u == found, "expected '0u', found '%u'", found);
2096 }
2097}
2098
2100 reporter,
2101 context,
2102 testContext) {
2103 // This fails on Dawn D3D11, b/315834710
2104 if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {
2105 return;
2106 }
2107
2108 std::unique_ptr<Recorder> recorder = context->makeRecorder();
2109
2110 constexpr uint32_t kWorkgroupCount = 32;
2111 constexpr uint32_t kWorkgroupSize = 64;
2112
2113 // `IndirectStep` populates a buffer with the global workgroup count for `CountStep`.
2114 // `CountStep` is recorded using `DispatchGroup::appendStepIndirect()` and its workgroups get
2115 // dispatched according to the values computed by `IndirectStep` on the GPU.
2116 class IndirectStep : public ComputeStep {
2117 public:
2118 IndirectStep()
2119 : ComputeStep(
2120 /*name=*/"TestIndirectDispatch_IndirectStep",
2121 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
2122 /*resources=*/
2123 {{
2124 /*type=*/ResourceType::kIndirectBuffer,
2125 /*flow=*/DataFlow::kShared,
2126 /*policy=*/ResourcePolicy::kClear,
2127 /*slot=*/0,
2128 // TODO(armansito): Ideally the SSBO would have a single member of
2129 // type `IndirectDispatchArgs` struct type. SkSL modules don't
2130 // support struct declarations so this is currently not possible.
2131 /*sksl=*/"ssbo { uint indirect[]; }",
2132 }}) {}
2133 ~IndirectStep() override = default;
2134
2135 // Kernel that specifies a workgroup size of `kWorkgroupCount` to be used by the indirect
2136 // dispatch.
2137 std::string computeSkSL() const override {
2138 return R"(
2139 // This needs to match `kWorkgroupCount` declared above.
2140 const uint kWorkgroupCount = 32;
2141
2142 void main() {
2143 if (sk_LocalInvocationID.x == 0) {
2144 indirect[0] = kWorkgroupCount;
2145 indirect[1] = 1;
2146 indirect[2] = 1;
2147 }
2148 }
2149 )";
2150 }
2151
2152 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
2153 SkASSERT(index == 0);
2154 SkASSERT(r.fSlot == 0);
2155 SkASSERT(r.fFlow == DataFlow::kShared);
2157 }
2158
2160 return WorkgroupSize(1, 1, 1);
2161 }
2162 } indirectStep;
2163
2164 class CountStep : public ComputeStep {
2165 public:
2166 CountStep()
2167 : ComputeStep(
2168 /*name=*/"TestIndirectDispatch_CountStep",
2169 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
2170 /*resources=*/
2171 {{
2172 /*type=*/ResourceType::kStorageBuffer,
2173 /*flow=*/DataFlow::kShared,
2174 /*policy=*/ResourcePolicy::kMapped,
2175 /*slot=*/1,
2176 /*sksl=*/"ssbo { atomicUint globalCounter; }",
2177 }}) {}
2178 ~CountStep() override = default;
2179
2180 std::string computeSkSL() const override {
2181 return R"(
2182 workgroup atomicUint localCounter;
2183
2184 void main() {
2185 // Initialize the local counter.
2186 if (sk_LocalInvocationID.x == 0) {
2187 atomicStore(localCounter, 0);
2188 }
2189
2190 // Synchronize the threads in the workgroup so they all see the initial value.
2191 workgroupBarrier();
2192
2193 // All threads increment the counter.
2194 atomicAdd(localCounter, 1);
2195
2196 // Synchronize the threads again to ensure they have all executed the increment
2197 // and the following load reads the same value across all threads in the
2198 // workgroup.
2199 workgroupBarrier();
2200
2201 // Add the workgroup-only tally to the global counter.
2202 if (sk_LocalInvocationID.x == 0) {
2203 atomicAdd(globalCounter, atomicLoad(localCounter));
2204 }
2205 }
2206 )";
2207 }
2208
2209 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
2210 SkASSERT(index == 0);
2211 SkASSERT(r.fSlot == 1);
2212 SkASSERT(r.fFlow == DataFlow::kShared);
2213 return sizeof(uint32_t);
2214 }
2215
2216 void prepareStorageBuffer(int resourceIndex,
2217 const ResourceDesc& r,
2218 void* buffer,
2219 size_t bufferSize) const override {
2220 SkASSERT(resourceIndex == 0);
2221 *static_cast<uint32_t*>(buffer) = 0;
2222 }
2223 } countStep;
2224
2225 DispatchGroup::Builder builder(recorder.get());
2226 builder.appendStep(&indirectStep);
2227 BindBufferInfo indirectBufferInfo = builder.getSharedBufferResource(0);
2228 if (!indirectBufferInfo) {
2229 ERRORF(reporter, "Shared resource at slot 0 is missing");
2230 return;
2231 }
2232 builder.appendStepIndirect(&countStep, {indirectBufferInfo, kIndirectDispatchArgumentSize});
2233
2234 BindBufferInfo info = builder.getSharedBufferResource(1);
2235 if (!info) {
2236 ERRORF(reporter, "Shared resource at slot 1 is missing");
2237 return;
2238 }
2239
2240 // Record the compute pass task.
2242 groups.push_back(builder.finalize());
2243 recorder->priv().add(ComputeTask::Make(std::move(groups)));
2244
2245 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
2246 auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer);
2247
2248 // Submit the work and wait for it to complete.
2249 std::unique_ptr<Recording> recording = recorder->snap();
2250 if (!recording) {
2251 ERRORF(reporter, "Failed to make recording");
2252 return;
2253 }
2254
2255 InsertRecordingInfo insertInfo;
2256 insertInfo.fRecording = recording.get();
2257 context->insertRecording(insertInfo);
2258 testContext->syncedSubmit(context);
2259
2260 // Verify the contents of the output buffer.
2261 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
2262 const uint32_t result = static_cast<const uint32_t*>(
2263 map_buffer(context, testContext, buffer.get(), info.fOffset))[0];
2265 result == kExpectedCount,
2266 "expected '%u', found '%u'",
2267 kExpectedCount,
2268 result);
2269}
2270
2271DEF_GRAPHITE_TEST_FOR_METAL_CONTEXT(Compute_NativeShaderSourceMetal,
2272 reporter,
2273 context,
2274 testContext) {
2275 std::unique_ptr<Recorder> recorder = context->makeRecorder();
2276
2277 constexpr uint32_t kWorkgroupCount = 32;
2278 constexpr uint32_t kWorkgroupSize = 1024;
2279
2280 class TestComputeStep : public ComputeStep {
2281 public:
2282 TestComputeStep() : ComputeStep(
2283 /*name=*/"TestAtomicOperationsMetal",
2284 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
2285 /*resources=*/{
2286 {
2287 /*type=*/ResourceType::kStorageBuffer,
2288 /*flow=*/DataFlow::kShared,
2289 /*policy=*/ResourcePolicy::kMapped,
2290 /*slot=*/0,
2291 }
2292 },
2293 /*workgroupBuffers=*/{},
2294 /*baseFlags=*/Flags::kSupportsNativeShader) {}
2295 ~TestComputeStep() override = default;
2296
2297 NativeShaderSource nativeShaderSource(NativeShaderFormat format) const override {
2298 SkASSERT(format == NativeShaderFormat::kMSL);
2299 static constexpr std::string_view kSource = R"(
2300 #include <metal_stdlib>
2301
2302 using namespace metal;
2303
2304 kernel void atomicCount(uint3 localId [[thread_position_in_threadgroup]],
2305 device atomic_uint& globalCounter [[buffer(0)]]) {
2306 threadgroup atomic_uint localCounter;
2307
2308 // Initialize the local counter.
2309 if (localId.x == 0u) {
2310 atomic_store_explicit(&localCounter, 0u, memory_order_relaxed);
2311 }
2312
2313 // Synchronize the threads in the workgroup so they all see the initial value.
2314 threadgroup_barrier(mem_flags::mem_threadgroup);
2315
2316 // All threads increment the counter.
2317 atomic_fetch_add_explicit(&localCounter, 1u, memory_order_relaxed);
2318
2319 // Synchronize the threads again to ensure they have all executed the increment
2320 // and the following load reads the same value across all threads in the
2321 // workgroup.
2322 threadgroup_barrier(mem_flags::mem_threadgroup);
2323
2324 // Add the workgroup-only tally to the global counter.
2325 if (localId.x == 0u) {
2326 uint tally = atomic_load_explicit(&localCounter, memory_order_relaxed);
2327 atomic_fetch_add_explicit(&globalCounter, tally, memory_order_relaxed);
2328 }
2329 }
2330 )";
2331 return {kSource, "atomicCount"};
2332 }
2333
2334 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
2335 SkASSERT(index == 0);
2336 SkASSERT(r.fSlot == 0);
2337 SkASSERT(r.fFlow == DataFlow::kShared);
2338 return sizeof(uint32_t);
2339 }
2340
2342 return WorkgroupSize(kWorkgroupCount, 1, 1);
2343 }
2344
2345 void prepareStorageBuffer(int resourceIndex,
2346 const ResourceDesc& r,
2347 void* buffer,
2348 size_t bufferSize) const override {
2349 SkASSERT(resourceIndex == 0);
2350 *static_cast<uint32_t*>(buffer) = 0;
2351 }
2352 } step;
2353
2354 DispatchGroup::Builder builder(recorder.get());
2355 builder.appendStep(&step);
2356
2357 BindBufferInfo info = builder.getSharedBufferResource(0);
2358 if (!info) {
2359 ERRORF(reporter, "shared resource at slot 0 is missing");
2360 return;
2361 }
2362
2363 // Record the compute pass task.
2365 groups.push_back(builder.finalize());
2366 recorder->priv().add(ComputeTask::Make(std::move(groups)));
2367
2368 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
2369 auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer);
2370
2371 // Submit the work and wait for it to complete.
2372 std::unique_ptr<Recording> recording = recorder->snap();
2373 if (!recording) {
2374 ERRORF(reporter, "Failed to make recording");
2375 return;
2376 }
2377
2378 InsertRecordingInfo insertInfo;
2379 insertInfo.fRecording = recording.get();
2380 context->insertRecording(insertInfo);
2381 testContext->syncedSubmit(context);
2382
2383 // Verify the contents of the output buffer.
2384 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
2385 const uint32_t result = static_cast<const uint32_t*>(
2386 map_buffer(context, testContext, buffer.get(), info.fOffset))[0];
2388 result == kExpectedCount,
2389 "expected '%u', found '%u'",
2390 kExpectedCount,
2391 result);
2392}
2393
2394DEF_GRAPHITE_TEST_FOR_METAL_CONTEXT(Compute_WorkgroupBufferDescMetal,
2395 reporter,
2396 context,
2397 testContext) {
2398 std::unique_ptr<Recorder> recorder = context->makeRecorder();
2399
2400 constexpr uint32_t kWorkgroupCount = 32;
2401 constexpr uint32_t kWorkgroupSize = 1024;
2402
2403 class TestComputeStep : public ComputeStep {
2404 public:
2405 TestComputeStep() : ComputeStep(
2406 /*name=*/"TestAtomicOperationsMetal",
2407 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
2408 /*resources=*/{
2409 {
2410 /*type=*/ResourceType::kStorageBuffer,
2411 /*flow=*/DataFlow::kShared,
2412 /*policy=*/ResourcePolicy::kMapped,
2413 /*slot=*/0,
2414 }
2415 },
2416 /*workgroupBuffers=*/{
2417 {
2418 /*size=*/sizeof(uint32_t),
2419 /*index=*/0u,
2420 }
2421 },
2422 /*baseFlags=*/Flags::kSupportsNativeShader) {}
2423 ~TestComputeStep() override = default;
2424
2425 // This is the same MSL kernel as in Compute_NativeShaderSourceMetal, except `localCounter`
2426 // is an entry-point parameter instead of a local variable. This forces the workgroup
2427 // binding to be encoded explicitly in the command encoder.
2428 NativeShaderSource nativeShaderSource(NativeShaderFormat format) const override {
2429 SkASSERT(format == NativeShaderFormat::kMSL);
2430 static constexpr std::string_view kSource = R"(
2431 #include <metal_stdlib>
2432
2433 using namespace metal;
2434
2435 kernel void atomicCount(uint3 localId [[thread_position_in_threadgroup]],
2436 device atomic_uint& globalCounter [[buffer(0)]],
2437 threadgroup atomic_uint& localCounter [[threadgroup(0)]]) {
2438 // Initialize the local counter.
2439 if (localId.x == 0u) {
2440 atomic_store_explicit(&localCounter, 0u, memory_order_relaxed);
2441 }
2442
2443 // Synchronize the threads in the workgroup so they all see the initial value.
2444 threadgroup_barrier(mem_flags::mem_threadgroup);
2445
2446 // All threads increment the counter.
2447 atomic_fetch_add_explicit(&localCounter, 1u, memory_order_relaxed);
2448
2449 // Synchronize the threads again to ensure they have all executed the increment
2450 // and the following load reads the same value across all threads in the
2451 // workgroup.
2452 threadgroup_barrier(mem_flags::mem_threadgroup);
2453
2454 // Add the workgroup-only tally to the global counter.
2455 if (localId.x == 0u) {
2456 uint tally = atomic_load_explicit(&localCounter, memory_order_relaxed);
2457 atomic_fetch_add_explicit(&globalCounter, tally, memory_order_relaxed);
2458 }
2459 }
2460 )";
2461 return {kSource, "atomicCount"};
2462 }
2463
2464 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
2465 SkASSERT(index == 0);
2466 SkASSERT(r.fSlot == 0);
2467 SkASSERT(r.fFlow == DataFlow::kShared);
2468 return sizeof(uint32_t);
2469 }
2470
2472 return WorkgroupSize(kWorkgroupCount, 1, 1);
2473 }
2474
2475 void prepareStorageBuffer(int resourceIndex,
2476 const ResourceDesc& r,
2477 void* buffer,
2478 size_t bufferSize) const override {
2479 SkASSERT(resourceIndex == 0);
2480 *static_cast<uint32_t*>(buffer) = 0;
2481 }
2482 } step;
2483
2484 DispatchGroup::Builder builder(recorder.get());
2485 builder.appendStep(&step);
2486
2487 BindBufferInfo info = builder.getSharedBufferResource(0);
2488 if (!info) {
2489 ERRORF(reporter, "shared resource at slot 0 is missing");
2490 return;
2491 }
2492
2493 // Record the compute pass task.
2495 groups.push_back(builder.finalize());
2496 recorder->priv().add(ComputeTask::Make(std::move(groups)));
2497
2498 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
2499 auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer);
2500
2501 // Submit the work and wait for it to complete.
2502 std::unique_ptr<Recording> recording = recorder->snap();
2503 if (!recording) {
2504 ERRORF(reporter, "Failed to make recording");
2505 return;
2506 }
2507
2508 InsertRecordingInfo insertInfo;
2509 insertInfo.fRecording = recording.get();
2510 context->insertRecording(insertInfo);
2511 testContext->syncedSubmit(context);
2512
2513 // Verify the contents of the output buffer.
2514 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
2515 const uint32_t result = static_cast<const uint32_t*>(
2516 map_buffer(context, testContext, buffer.get(), info.fOffset))[0];
2518 result == kExpectedCount,
2519 "expected '%u', found '%u'",
2520 kExpectedCount,
2521 result);
2522}
2523
2524DEF_GRAPHITE_TEST_FOR_DAWN_CONTEXT(Compute_NativeShaderSourceWGSL, reporter, context, testContext) {
2525 // This fails on Dawn D3D11, b/315834710
2526 if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {
2527 return;
2528 }
2529
2530 std::unique_ptr<Recorder> recorder = context->makeRecorder();
2531
2532 constexpr uint32_t kWorkgroupCount = 32;
2533 constexpr uint32_t kWorkgroupSize = 256; // The WebGPU default workgroup size limit is 256
2534
2535 class TestComputeStep : public ComputeStep {
2536 public:
2537 TestComputeStep() : ComputeStep(
2538 /*name=*/"TestAtomicOperationsWGSL",
2539 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
2540 /*resources=*/{
2541 {
2542 /*type=*/ResourceType::kStorageBuffer,
2543 /*flow=*/DataFlow::kShared,
2544 /*policy=*/ResourcePolicy::kMapped,
2545 /*slot=*/0,
2546 }
2547 },
2548 /*workgroupBuffers=*/{},
2549 /*baseFlags=*/Flags::kSupportsNativeShader) {}
2550 ~TestComputeStep() override = default;
2551
2552 NativeShaderSource nativeShaderSource(NativeShaderFormat format) const override {
2553 SkASSERT(format == NativeShaderFormat::kWGSL);
2554 static constexpr std::string_view kSource = R"(
2555 @group(0) @binding(0) var<storage, read_write> globalCounter: atomic<u32>;
2556
2557 var<workgroup> localCounter: atomic<u32>;
2558
2559 @compute @workgroup_size(256)
2560 fn atomicCount(@builtin(local_invocation_id) localId: vec3u) {
2561 // Initialize the local counter.
2562 if localId.x == 0u {
2563 atomicStore(&localCounter, 0u);
2564 }
2565
2566 // Synchronize the threads in the workgroup so they all see the initial value.
2567 workgroupBarrier();
2568
2569 // All threads increment the counter.
2570 atomicAdd(&localCounter, 1u);
2571
2572 // Synchronize the threads again to ensure they have all executed the increment
2573 // and the following load reads the same value across all threads in the
2574 // workgroup.
2575 workgroupBarrier();
2576
2577 // Add the workgroup-only tally to the global counter.
2578 if localId.x == 0u {
2579 let tally = atomicLoad(&localCounter);
2580 atomicAdd(&globalCounter, tally);
2581 }
2582 }
2583 )";
2584 return {kSource, "atomicCount"};
2585 }
2586
2587 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
2588 SkASSERT(index == 0);
2589 SkASSERT(r.fSlot == 0);
2590 SkASSERT(r.fFlow == DataFlow::kShared);
2591 return sizeof(uint32_t);
2592 }
2593
2595 return WorkgroupSize(kWorkgroupCount, 1, 1);
2596 }
2597
2598 void prepareStorageBuffer(int resourceIndex,
2599 const ResourceDesc& r,
2600 void* buffer,
2601 size_t bufferSize) const override {
2602 SkASSERT(resourceIndex == 0);
2603 *static_cast<uint32_t*>(buffer) = 0;
2604 }
2605 } step;
2606
2607 DispatchGroup::Builder builder(recorder.get());
2608 builder.appendStep(&step);
2609
2610 BindBufferInfo info = builder.getSharedBufferResource(0);
2611 if (!info) {
2612 ERRORF(reporter, "shared resource at slot 0 is missing");
2613 return;
2614 }
2615
2616 // Record the compute pass task.
2618 groups.push_back(builder.finalize());
2619 recorder->priv().add(ComputeTask::Make(std::move(groups)));
2620
2621 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
2622 auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer);
2623
2624 // Submit the work and wait for it to complete.
2625 std::unique_ptr<Recording> recording = recorder->snap();
2626 if (!recording) {
2627 ERRORF(reporter, "Failed to make recording");
2628 return;
2629 }
2630
2631 InsertRecordingInfo insertInfo;
2632 insertInfo.fRecording = recording.get();
2633 context->insertRecording(insertInfo);
2634 testContext->syncedSubmit(context);
2635
2636 // Verify the contents of the output buffer.
2637 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
2638 const uint32_t result = static_cast<const uint32_t*>(
2639 map_buffer(context, testContext, buffer.get(), info.fOffset))[0];
2641 result == kExpectedCount,
2642 "expected '%u', found '%u'",
2643 kExpectedCount,
2644 result);
2645}
static int step(int x, SkScalar min, SkScalar max)
Definition BlurTest.cpp:215
#define DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS( name, reporter, graphite_context, test_context)
static void info(const char *fmt,...) SK_PRINTF_LIKE(1
Definition DM.cpp:213
reporter
SkColor4f color
kUnpremul_SkAlphaType
#define SkASSERT(cond)
Definition SkAssert.h:116
@ kRGBA_8888_SkColorType
pixel with 8 bits for red, green, blue, alpha; in 32-bit word
Definition SkColorType.h:24
static constexpr SkColor SkColorSetARGB(U8CPU a, U8CPU r, U8CPU g, U8CPU b)
Definition SkColor.h:49
constexpr SkColor SK_ColorGREEN
Definition SkColor.h:131
#define SkDEBUGCODE(...)
Definition SkDebug.h:23
sk_sp< T > sk_ref_sp(T *obj)
Definition SkRefCnt.h:381
static const size_t kBufferSize
Definition SkString.cpp:27
SkTileMode
Definition SkTileMode.h:13
#define DEF_GRAPHITE_TEST_FOR_DAWN_CONTEXT(name, reporter, graphite_context, test_context)
Definition Test.h:389
#define DEF_GRAPHITE_TEST_FOR_METAL_CONTEXT(name, reporter, graphite_context, test_context)
Definition Test.h:385
#define REPORTER_ASSERT(r, cond,...)
Definition Test.h:286
#define ERRORF(r,...)
Definition Test.h:293
void allocPixels(const SkImageInfo &info, size_t rowBytes)
Definition SkBitmap.cpp:258
bool peekPixels(SkPixmap *pixmap) const
Definition SkBitmap.cpp:635
size_t rowBytes() const
Definition SkPixmap.h:145
const SkImageInfo & info() const
Definition SkPixmap.h:135
SkColor4f getColor4f(int x, int y) const
Definition SkPixmap.cpp:388
uint32_t * writable_addr32(int x, int y) const
Definition SkPixmap.h:537
const void * addr() const
Definition SkPixmap.h:153
bool bufferMapsAreAsync() const
Definition Caps.h:244
virtual TextureInfo getDefaultSampledTextureInfo(SkColorType, Mipmapped mipmapped, Protected, Renderable) const =0
virtual SamplerDesc calculateSamplerParameters(int resourceIndex, const ResourceDesc &) const
virtual std::string computeSkSL() const
virtual WorkgroupSize calculateGlobalDispatchSize() const
virtual void prepareStorageBuffer(int resourceIndex, const ResourceDesc &resource, void *buffer, size_t bufferSize) const
virtual std::tuple< SkISize, SkColorType > calculateTextureParameters(int resourceIndex, const ResourceDesc &) const
virtual NativeShaderSource nativeShaderSource(NativeShaderFormat) const
virtual void prepareUniformBuffer(int resourceIndex, const ResourceDesc &, UniformManager *) const
virtual size_t calculateBufferSize(int resourceIndex, const ResourceDesc &) const
static sk_sp< ComputeTask > Make(DispatchGroupList dispatchGroups)
const Caps * caps() const
Definition ContextPriv.h:32
std::unique_ptr< Recorder > makeRecorder(const RecorderOptions &={})
Definition Context.cpp:130
bool insertRecording(const InsertRecordingInfo &)
Definition Context.cpp:142
static sk_sp< CopyBufferToBufferTask > Make(const Buffer *srcBuffer, size_t srcOffset, sk_sp< Buffer > dstBuffer, size_t dstOffset, size_t size)
Definition CopyTask.cpp:18
const Caps * caps() const
ResourceProvider * resourceProvider()
void add(sk_sp< Task >)
Definition Recorder.cpp:477
std::unique_ptr< Recording > snap()
Definition Recorder.cpp:149
sk_sp< Buffer > findOrCreateBuffer(size_t size, BufferType type, AccessPattern, std::string_view label)
static sk_sp< SynchronizeToCpuTask > Make(sk_sp< Buffer >)
static sk_sp< TextureProxy > Make(const Caps *, ResourceProvider *, SkISize dimensions, const TextureInfo &, skgpu::Budgeted)
static UploadInstance Make(Recorder *, sk_sp< TextureProxy > targetProxy, const SkColorInfo &srcColorInfo, const SkColorInfo &dstColorInfo, SkSpan< const MipLevel > levels, const SkIRect &dstRect, std::unique_ptr< ConditionalUploadContext >)
static sk_sp< UploadTask > Make(UploadList *)
void syncedSubmit(skgpu::graphite::Context *)
virtual skgpu::ContextType contextType()=0
static const uint8_t buffer[]
uint8_t value
GAsyncResult * result
uint32_t uint32_t * format
FlTexture * texture
double y
double x
constexpr size_t kIndirectDispatchArgumentSize
nullptr_t GpuContextType
Definition Test.h:210
bool IsMetalContextType(skgpu::ContextType type)
bool IsDawnContextType(skgpu::ContextType type)
Point offset
static constexpr SkIRect MakeWH(int32_t w, int32_t h)
Definition SkRect.h:56
const SkColorInfo & colorInfo() const
static SkImageInfo Make(int width, int height, SkColorType ct, SkAlphaType at)
static SkRGBA4f FromBytes_RGBA(uint32_t color)
static SkRGBA4f FromColor(SkColor color)