Flutter Engine
The Flutter Engine
Loading...
Searching...
No Matches
Macros | Functions
ComputeTest.cpp File Reference
#include "tests/Test.h"
#include "include/core/SkBitmap.h"
#include "include/gpu/graphite/Context.h"
#include "include/gpu/graphite/Recorder.h"
#include "include/gpu/graphite/Recording.h"
#include "src/gpu/graphite/Buffer.h"
#include "src/gpu/graphite/Caps.h"
#include "src/gpu/graphite/ComputePipelineDesc.h"
#include "src/gpu/graphite/ComputeTypes.h"
#include "src/gpu/graphite/ContextPriv.h"
#include "src/gpu/graphite/RecorderPriv.h"
#include "src/gpu/graphite/ResourceProvider.h"
#include "src/gpu/graphite/UniformManager.h"
#include "src/gpu/graphite/compute/ComputeStep.h"
#include "src/gpu/graphite/compute/DispatchGroup.h"
#include "src/gpu/graphite/task/ComputeTask.h"
#include "src/gpu/graphite/task/CopyTask.h"
#include "src/gpu/graphite/task/SynchronizeToCpuTask.h"
#include "src/gpu/graphite/task/UploadTask.h"
#include "tools/graphite/GraphiteTestContext.h"

Go to the source code of this file.

Macros

#define DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS( name, reporter, graphite_context, test_context)
 

Functions

 DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS (Compute_SingleDispatchTest, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS (Compute_DispatchGroupTest, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS (Compute_UniformBufferTest, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS (Compute_ExternallyAssignedBuffer, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS (Compute_StorageTexture, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS (Compute_StorageTextureReadAndWrite, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS (Compute_ReadOnlyStorageBuffer, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS (Compute_StorageTextureMultipleComputeSteps, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS (Compute_SampledTexture, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS (Compute_AtomicOperationsTest, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS (Compute_AtomicOperationsOverArrayAndStructTest, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS (Compute_ClearedBuffer, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS (Compute_ClearOrdering, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS (Compute_ClearOrderingScratchBuffers, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS (Compute_IndirectDispatch, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_METAL_CONTEXT (Compute_NativeShaderSourceMetal, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_METAL_CONTEXT (Compute_WorkgroupBufferDescMetal, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_DAWN_CONTEXT (Compute_NativeShaderSourceWGSL, reporter, context, testContext)
 

Macro Definition Documentation

◆ DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS

#define DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS (   name,
  reporter,
  graphite_context,
  test_context 
)
Value:
is_dawn_or_metal_context_type, \
graphite_context, \
test_context, \
reporter
#define DEF_GRAPHITE_TEST_FOR_CONTEXTS(name, context_filter, reporter, graphite_ctx, test_ctx, ctsEnforcement)
Definition Test.h:368
const char * name
Definition fuchsia.cc:50

Definition at line 100 of file ComputeTest.cpp.

113 {
114 constexpr uint32_t kProblemSize = 512;
115 constexpr float kFactor = 4.f;
116
117 // The ComputeStep packs kProblemSize floats into kProblemSize / 4 vectors and each thread
118 // processes 1 vector at a time.
119 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
120
121 std::unique_ptr<Recorder> recorder = context->makeRecorder();
122
123 class TestComputeStep : public ComputeStep {
124 public:
125 // TODO(skia:40045541): SkSL doesn't support std430 layout well, so the buffers
126 // below all pack their data into vectors to be compatible with SPIR-V/WGSL.
127 TestComputeStep() : ComputeStep(
128 /*name=*/"TestArrayMultiply",
129 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
130 /*resources=*/{
131 // Input buffer:
132 {
133 // TODO(b/299979165): Declare this binding as read-only.
134 /*type=*/ResourceType::kStorageBuffer,
135 /*flow=*/DataFlow::kPrivate,
136 /*policy=*/ResourcePolicy::kMapped,
137 /*sksl=*/"inputBlock {\n"
138 " float factor;\n"
139 " layout(offset=16) float4 in_data[];\n"
140 "}",
141 },
142 // Output buffer:
143 {
144 /*type=*/ResourceType::kStorageBuffer,
145 /*flow=*/DataFlow::kShared, // shared to allow us to access it from the
146 // Builder
147 /*policy=*/ResourcePolicy::kMapped, // mappable for read-back
148 /*slot=*/0,
149 /*sksl=*/"outputBlock { float4 out_data[]; }",
150 }
151 }) {}
152 ~TestComputeStep() override = default;
153
154 // A kernel that multiplies a large array of floats by a supplied factor.
155 std::string computeSkSL() const override {
156 return R"(
157 void main() {
158 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor;
159 }
160 )";
161 }
162
163 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
164 if (index == 0) {
165 SkASSERT(r.fFlow == DataFlow::kPrivate);
166 return sizeof(float) * (kProblemSize + 4);
167 }
168 SkASSERT(index == 1);
169 SkASSERT(r.fSlot == 0);
170 SkASSERT(r.fFlow == DataFlow::kShared);
171 return sizeof(float) * kProblemSize;
172 }
173
174 void prepareStorageBuffer(int resourceIndex,
175 const ResourceDesc& r,
176 void* buffer,
177 size_t bufferSize) const override {
178 // Only initialize the input buffer.
179 if (resourceIndex != 0) {
180 return;
181 }
182 SkASSERT(r.fFlow == DataFlow::kPrivate);
183
184 size_t dataCount = sizeof(float) * (kProblemSize + 4);
185 SkASSERT(bufferSize == dataCount);
186 SkSpan<float> inData(static_cast<float*>(buffer), dataCount);
187 inData[0] = kFactor;
188 for (unsigned int i = 0; i < kProblemSize; ++i) {
189 inData[i + 4] = i + 1;
190 }
191 }
192
194 return WorkgroupSize(1, 1, 1);
195 }
196 } step;
197
198 DispatchGroup::Builder builder(recorder.get());
199 if (!builder.appendStep(&step)) {
200 ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");
201 return;
202 }
203
204 // The output buffer should have been placed in the right output slot.
205 BindBufferInfo outputInfo = builder.getSharedBufferResource(0);
206 if (!outputInfo) {
207 ERRORF(reporter, "Failed to allocate an output buffer at slot 0");
208 return;
209 }
210
211 // Record the compute task
213 groups.push_back(builder.finalize());
214 recorder->priv().add(ComputeTask::Make(std::move(groups)));
215
216 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
217 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.fBuffer);
218
219 // Submit the work and wait for it to complete.
220 std::unique_ptr<Recording> recording = recorder->snap();
221 if (!recording) {
222 ERRORF(reporter, "Failed to make recording");
223 return;
224 }
225
226 InsertRecordingInfo insertInfo;
227 insertInfo.fRecording = recording.get();
228 context->insertRecording(insertInfo);
229 testContext->syncedSubmit(context);
230
231 // Verify the contents of the output buffer.
232 float* outData = static_cast<float*>(
233 map_buffer(context, testContext, outputBuffer.get(), outputInfo.fOffset));
234 SkASSERT(outputBuffer->isMapped() && outData != nullptr);
235 for (unsigned int i = 0; i < kProblemSize; ++i) {
236 const float expected = (i + 1) * kFactor;
237 const float found = outData[i];
238 REPORTER_ASSERT(reporter, expected == found, "expected '%f', found '%f'", expected, found);
239 }
240}
241
242// TODO(b/262427430, b/262429132): Enable this test on other backends once they all support
243// compute programs.
244DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_DispatchGroupTest,
245 reporter,
246 context,
247 testContext) {
248 // TODO(b/315834710): This fails on Dawn D3D11
249 if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {
250 return;
251 }
252
253 constexpr uint32_t kProblemSize = 512;
254 constexpr float kFactor1 = 4.f;
255 constexpr float kFactor2 = 3.f;
256
257 // The ComputeStep packs kProblemSize floats into kProblemSize / 4 vectors and each thread
258 // processes 1 vector at a time.
259 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
260
261 std::unique_ptr<Recorder> recorder = context->makeRecorder();
262
263 // Define two steps that perform two multiplication passes over the same input.
264
265 class TestComputeStep1 : public ComputeStep {
266 public:
267 // TODO(skia:40045541): SkSL doesn't support std430 layout well, so the buffers
268 // below all pack their data into vectors to be compatible with SPIR-V/WGSL.
269 TestComputeStep1() : ComputeStep(
270 /*name=*/"TestArrayMultiplyFirstPass",
271 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
272 /*resources=*/{
273 // Input buffer:
274 {
275 // TODO(b/299979165): Declare this binding as read-only.
276 /*type=*/ResourceType::kStorageBuffer,
277 /*flow=*/DataFlow::kPrivate,
278 /*policy=*/ResourcePolicy::kMapped, // mappable for read-back
279 /*sksl=*/"inputBlock {\n"
280 " float factor;\n"
281 " layout(offset=16) float4 in_data[];\n"
282 "}",
283 },
284 // Output buffers:
285 {
286 /*type=*/ResourceType::kStorageBuffer,
287 /*flow=*/DataFlow::kShared,
288 /*policy=*/ResourcePolicy::kNone, // GPU-only, read by second step
289 /*slot=*/0,
290 /*sksl=*/"outputBlock1 { float4 forward_data[]; }",
291 },
292 {
293 /*type=*/ResourceType::kStorageBuffer,
294 /*flow=*/DataFlow::kShared,
295 /*policy=*/ResourcePolicy::kMapped, // mappable for read-back
296 /*slot=*/1,
297 /*sksl=*/"outputBlock2 { float2 extra_data; }",
298 }
299 }) {}
300 ~TestComputeStep1() override = default;
301
302 // A kernel that multiplies a large array of floats by a supplied factor.
303 std::string computeSkSL() const override {
304 return R"(
305 void main() {
306 uint idx = sk_GlobalInvocationID.x;
307 forward_data[idx] = in_data[idx] * factor;
308 if (idx == 0) {
309 extra_data.x = factor;
310 extra_data.y = 2 * factor;
311 }
312 }
313 )";
314 }
315
316 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
317 if (index == 0) {
318 SkASSERT(r.fFlow == DataFlow::kPrivate);
319 return sizeof(float) * (kProblemSize + 4);
320 }
321 if (index == 1) {
322 SkASSERT(r.fFlow == DataFlow::kShared);
323 SkASSERT(r.fSlot == 0);
324 return sizeof(float) * kProblemSize;
325 }
326
327 SkASSERT(index == 2);
328 SkASSERT(r.fSlot == 1);
329 SkASSERT(r.fFlow == DataFlow::kShared);
330 return 2 * sizeof(float);
331 }
332
333 void prepareStorageBuffer(int resourceIndex,
334 const ResourceDesc& r,
335 void* buffer,
336 size_t bufferSize) const override {
337 if (resourceIndex != 0) {
338 return;
339 }
340
341 size_t dataCount = sizeof(float) * (kProblemSize + 4);
342 SkASSERT(bufferSize == dataCount);
343 SkSpan<float> inData(static_cast<float*>(buffer), dataCount);
344 inData[0] = kFactor1;
345 for (unsigned int i = 0; i < kProblemSize; ++i) {
346 inData[i + 4] = i + 1;
347 }
348 }
349
351 return WorkgroupSize(1, 1, 1);
352 }
353 } step1;
354
355 class TestComputeStep2 : public ComputeStep {
356 public:
357 TestComputeStep2() : ComputeStep(
358 /*name=*/"TestArrayMultiplySecondPass",
359 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
360 /*resources=*/{
361 // Input buffer:
362 {
363 /*type=*/ResourceType::kStorageBuffer,
364 /*flow=*/DataFlow::kShared,
365 /*policy=*/ResourcePolicy::kNone, // GPU-only
366 /*slot=*/0, // this is the output from the first step
367 /*sksl=*/"inputBlock { float4 in_data[]; }",
368 },
369 {
370 /*type=*/ResourceType::kStorageBuffer,
371 /*flow=*/DataFlow::kPrivate,
372 /*policy=*/ResourcePolicy::kMapped,
373 /*sksl=*/"factorBlock { float factor; }"
374 },
375 // Output buffer:
376 {
377 /*type=*/ResourceType::kStorageBuffer,
378 /*flow=*/DataFlow::kShared,
379 /*policy=*/ResourcePolicy::kMapped, // mappable for read-back
380 /*slot=*/2,
381 /*sksl=*/"outputBlock { float4 out_data[]; }",
382 }
383 }) {}
384 ~TestComputeStep2() override = default;
385
386 // A kernel that multiplies a large array of floats by a supplied factor.
387 std::string computeSkSL() const override {
388 return R"(
389 void main() {
390 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor;
391 }
392 )";
393 }
394
395 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
396 SkASSERT(index != 0);
397 if (index == 1) {
398 SkASSERT(r.fFlow == DataFlow::kPrivate);
399 return sizeof(float) * 4;
400 }
401 SkASSERT(index == 2);
402 SkASSERT(r.fSlot == 2);
403 SkASSERT(r.fFlow == DataFlow::kShared);
404 return sizeof(float) * kProblemSize;
405 }
406
407 void prepareStorageBuffer(int resourceIndex,
408 const ResourceDesc& r,
409 void* buffer,
410 size_t bufferSize) const override {
411 if (resourceIndex != 1) {
412 return;
413 }
414 SkASSERT(r.fFlow == DataFlow::kPrivate);
415 *static_cast<float*>(buffer) = kFactor2;
416 }
417
419 return WorkgroupSize(1, 1, 1);
420 }
421 } step2;
422
423 DispatchGroup::Builder builder(recorder.get());
424 builder.appendStep(&step1);
425 builder.appendStep(&step2);
426
427 // Slots 0, 1, and 2 should all contain shared buffers. Slot 1 contains the extra output buffer
428 // from step 1 while slot 2 contains the result of the second multiplication pass from step 1.
429 // Slot 0 is not mappable.
431 std::holds_alternative<BufferView>(builder.outputTable().fSharedSlots[0]),
432 "shared resource at slot 0 is missing");
433 BindBufferInfo outputInfo = builder.getSharedBufferResource(2);
434 if (!outputInfo) {
435 ERRORF(reporter, "Failed to allocate an output buffer at slot 0");
436 return;
437 }
438
439 // Extra output buffer from step 1 (corresponding to 'outputBlock2')
440 BindBufferInfo extraOutputInfo = builder.getSharedBufferResource(1);
441 if (!extraOutputInfo) {
442 ERRORF(reporter, "shared resource at slot 1 is missing");
443 return;
444 }
445
446 // Record the compute task
448 groups.push_back(builder.finalize());
449 recorder->priv().add(ComputeTask::Make(std::move(groups)));
450
451 // Ensure the output buffers get synchronized to the CPU once the GPU submission has finished.
452 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.fBuffer);
453 auto extraOutputBuffer = sync_buffer_to_cpu(recorder.get(), extraOutputInfo.fBuffer);
454
455 // Submit the work and wait for it to complete.
456 std::unique_ptr<Recording> recording = recorder->snap();
457 if (!recording) {
458 ERRORF(reporter, "Failed to make recording");
459 return;
460 }
461
462 InsertRecordingInfo insertInfo;
463 insertInfo.fRecording = recording.get();
464 context->insertRecording(insertInfo);
465 testContext->syncedSubmit(context);
466
467 // Verify the contents of the output buffer from step 2
468 float* outData = static_cast<float*>(
469 map_buffer(context, testContext, outputBuffer.get(), outputInfo.fOffset));
470 SkASSERT(outputBuffer->isMapped() && outData != nullptr);
471 for (unsigned int i = 0; i < kProblemSize; ++i) {
472 const float expected = (i + 1) * kFactor1 * kFactor2;
473 const float found = outData[i];
474 REPORTER_ASSERT(reporter, expected == found, "expected '%f', found '%f'", expected, found);
475 }
476
477 // Verify the contents of the extra output buffer from step 1
478 float* extraOutData = static_cast<float*>(
479 map_buffer(context, testContext, extraOutputBuffer.get(), extraOutputInfo.fOffset));
480 SkASSERT(extraOutputBuffer->isMapped() && extraOutData != nullptr);
482 kFactor1 == extraOutData[0],
483 "expected '%f', found '%f'",
484 kFactor1,
485 extraOutData[0]);
487 2 * kFactor1 == extraOutData[1],
488 "expected '%f', found '%f'",
489 2 * kFactor2,
490 extraOutData[1]);
491}
492
493// TODO(b/262427430, b/262429132): Enable this test on other backends once they all support
494// compute programs.
495DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_UniformBufferTest,
496 reporter,
497 context,
498 testContext) {
499 // TODO(b/315834710): This fails on Dawn D3D11
500 if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {
501 return;
502 }
503
504 constexpr uint32_t kProblemSize = 512;
505 constexpr float kFactor = 4.f;
506
507 // The ComputeStep packs kProblemSize floats into kProblemSize / 4 vectors and each thread
508 // processes 1 vector at a time.
509 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
510
511 std::unique_ptr<Recorder> recorder = context->makeRecorder();
512
513 class TestComputeStep : public ComputeStep {
514 public:
515 TestComputeStep() : ComputeStep(
516 /*name=*/"TestArrayMultiply",
517 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
518 /*resources=*/{
519 // Uniform buffer:
520 {
521 /*type=*/ResourceType::kUniformBuffer,
522 /*flow=*/DataFlow::kPrivate,
523 /*policy=*/ResourcePolicy::kMapped,
524 /*sksl=*/"uniformBlock { float factor; }"
525 },
526 // Input buffer:
527 {
528 /*type=*/ResourceType::kStorageBuffer,
529 /*flow=*/DataFlow::kPrivate,
530 /*policy=*/ResourcePolicy::kMapped,
531 /*sksl=*/"inputBlock { float4 in_data[]; }",
532 },
533 // Output buffer:
534 {
535 /*type=*/ResourceType::kStorageBuffer,
536 /*flow=*/DataFlow::kShared, // shared to allow us to access it from the
537 // Builder
538 /*policy=*/ResourcePolicy::kMapped, // mappable for read-back
539 /*slot=*/0,
540 /*sksl=*/"outputBlock { float4 out_data[]; }",
541 }
542 }) {}
543 ~TestComputeStep() override = default;
544
545 // A kernel that multiplies a large array of floats by a supplied factor.
546 std::string computeSkSL() const override {
547 return R"(
548 void main() {
549 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor;
550 }
551 )";
552 }
553
554 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
555 if (index == 0) {
556 SkASSERT(r.fFlow == DataFlow::kPrivate);
557 return sizeof(float);
558 }
559 if (index == 1) {
560 SkASSERT(r.fFlow == DataFlow::kPrivate);
561 return sizeof(float) * kProblemSize;
562 }
563 SkASSERT(index == 2);
564 SkASSERT(r.fSlot == 0);
565 SkASSERT(r.fFlow == DataFlow::kShared);
566 return sizeof(float) * kProblemSize;
567 }
568
569 void prepareStorageBuffer(int resourceIndex,
570 const ResourceDesc& r,
571 void* buffer,
572 size_t bufferSize) const override {
573 // Only initialize the input storage buffer.
574 if (resourceIndex != 1) {
575 return;
576 }
577 SkASSERT(r.fFlow == DataFlow::kPrivate);
578 size_t dataCount = sizeof(float) * kProblemSize;
579 SkASSERT(bufferSize == dataCount);
580 SkSpan<float> inData(static_cast<float*>(buffer), dataCount);
581 for (unsigned int i = 0; i < kProblemSize; ++i) {
582 inData[i] = i + 1;
583 }
584 }
585
586 void prepareUniformBuffer(int resourceIndex,
587 const ResourceDesc&,
588 UniformManager* mgr) const override {
589 SkASSERT(resourceIndex == 0);
591 const Uniform uniforms[] = {{"factor", SkSLType::kFloat}};
592 mgr->setExpectedUniforms(uniforms);
593 )
594 mgr->write(kFactor);
595 }
596
598 return WorkgroupSize(1, 1, 1);
599 }
600 } step;
601
602 DispatchGroup::Builder builder(recorder.get());
603 if (!builder.appendStep(&step)) {
604 ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");
605 return;
606 }
607
608 // The output buffer should have been placed in the right output slot.
609 BindBufferInfo outputInfo = builder.getSharedBufferResource(0);
610 if (!outputInfo) {
611 ERRORF(reporter, "Failed to allocate an output buffer at slot 0");
612 return;
613 }
614
615 // Record the compute task
617 groups.push_back(builder.finalize());
618 recorder->priv().add(ComputeTask::Make(std::move(groups)));
619
620 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
621 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.fBuffer);
622
623 // Submit the work and wait for it to complete.
624 std::unique_ptr<Recording> recording = recorder->snap();
625 if (!recording) {
626 ERRORF(reporter, "Failed to make recording");
627 return;
628 }
629
630 InsertRecordingInfo insertInfo;
631 insertInfo.fRecording = recording.get();
632 context->insertRecording(insertInfo);
633 testContext->syncedSubmit(context);
634
635 // Verify the contents of the output buffer.
636 float* outData = static_cast<float*>(
637 map_buffer(context, testContext, outputBuffer.get(), outputInfo.fOffset));
638 SkASSERT(outputBuffer->isMapped() && outData != nullptr);
639 for (unsigned int i = 0; i < kProblemSize; ++i) {
640 const float expected = (i + 1) * kFactor;
641 const float found = outData[i];
642 REPORTER_ASSERT(reporter, expected == found, "expected '%f', found '%f'", expected, found);
643 }
644}
645
646// TODO(b/262427430, b/262429132): Enable this test on other backends once they all support
647// compute programs.
648DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_ExternallyAssignedBuffer,
649 reporter,
650 context,
651 testContext) {
652 constexpr uint32_t kProblemSize = 512;
653 constexpr float kFactor = 4.f;
654
655 // The ComputeStep packs kProblemSize floats into kProblemSize / 4 vectors and each thread
656 // processes 1 vector at a time.
657 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
658
659 std::unique_ptr<Recorder> recorder = context->makeRecorder();
660
661 class TestComputeStep : public ComputeStep {
662 public:
663 TestComputeStep() : ComputeStep(
664 /*name=*/"ExternallyAssignedBuffer",
665 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
666 /*resources=*/{
667 // Input buffer:
668 {
669 /*type=*/ResourceType::kStorageBuffer,
670 /*flow=*/DataFlow::kPrivate,
671 /*policy=*/ResourcePolicy::kMapped,
672 /*sksl=*/"inputBlock {\n"
673 " float factor;\n"
674 " layout(offset = 16) float4 in_data[];\n"
675 "}\n",
676 },
677 // Output buffer:
678 {
679 /*type=*/ResourceType::kStorageBuffer,
680 /*flow=*/DataFlow::kShared, // shared to allow us to access it from the
681 // Builder
682 /*policy=*/ResourcePolicy::kMapped, // mappable for read-back
683 /*slot=*/0,
684 /*sksl=*/"outputBlock { float4 out_data[]; }",
685 }
686 }) {}
687 ~TestComputeStep() override = default;
688
689 // A kernel that multiplies a large array of floats by a supplied factor.
690 std::string computeSkSL() const override {
691 return R"(
692 void main() {
693 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor;
694 }
695 )";
696 }
697
698 size_t calculateBufferSize(int resourceIndex, const ResourceDesc& r) const override {
699 SkASSERT(resourceIndex == 0);
700 SkASSERT(r.fFlow == DataFlow::kPrivate);
701 return sizeof(float) * (kProblemSize + 4);
702 }
703
704 void prepareStorageBuffer(int resourceIndex,
705 const ResourceDesc& r,
706 void* buffer,
707 size_t bufferSize) const override {
708 SkASSERT(resourceIndex == 0);
709 SkASSERT(r.fFlow == DataFlow::kPrivate);
710
711 size_t dataCount = sizeof(float) * (kProblemSize + 4);
712 SkASSERT(bufferSize == dataCount);
713 SkSpan<float> inData(static_cast<float*>(buffer), dataCount);
714 inData[0] = kFactor;
715 for (unsigned int i = 0; i < kProblemSize; ++i) {
716 inData[i + 4] = i + 1;
717 }
718 }
719 } step;
720
721 // We allocate a buffer and directly assign it to the DispatchGroup::Builder. The ComputeStep
722 // will not participate in the creation of this buffer.
723 auto [_, outputInfo] =
724 recorder->priv().drawBufferManager()->getStoragePointer(sizeof(float) * kProblemSize);
725 REPORTER_ASSERT(reporter, outputInfo, "Failed to allocate output buffer");
726
727 DispatchGroup::Builder builder(recorder.get());
728 builder.assignSharedBuffer({outputInfo, sizeof(float) * kProblemSize}, 0);
729
730 // Initialize the step with a pre-determined global size
731 if (!builder.appendStep(&step, {WorkgroupSize(1, 1, 1)})) {
732 ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");
733 return;
734 }
735
736 // Record the compute task
738 groups.push_back(builder.finalize());
739 recorder->priv().add(ComputeTask::Make(std::move(groups)));
740
741 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
742 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.fBuffer);
743
744 // Submit the work and wait for it to complete.
745 std::unique_ptr<Recording> recording = recorder->snap();
746 if (!recording) {
747 ERRORF(reporter, "Failed to make recording");
748 return;
749 }
750
751 InsertRecordingInfo insertInfo;
752 insertInfo.fRecording = recording.get();
753 context->insertRecording(insertInfo);
754 testContext->syncedSubmit(context);
755
756 // Verify the contents of the output buffer.
757 float* outData = static_cast<float*>(
758 map_buffer(context, testContext, outputBuffer.get(), outputInfo.fOffset));
759 SkASSERT(outputBuffer->isMapped() && outData != nullptr);
760 for (unsigned int i = 0; i < kProblemSize; ++i) {
761 const float expected = (i + 1) * kFactor;
762 const float found = outData[i];
763 REPORTER_ASSERT(reporter, expected == found, "expected '%f', found '%f'", expected, found);
764 }
765}
766
767// Tests the storage texture binding for a compute dispatch that writes the same color to every
768// pixel of a storage texture.
770 reporter,
771 context,
772 testContext) {
773 std::unique_ptr<Recorder> recorder = context->makeRecorder();
774
775 // For this test we allocate a 16x16 tile which is written to by a single workgroup of the same
776 // size.
777 constexpr uint32_t kDim = 16;
778
779 class TestComputeStep : public ComputeStep {
780 public:
781 TestComputeStep() : ComputeStep(
782 /*name=*/"TestStorageTexture",
783 /*localDispatchSize=*/{kDim, kDim, 1},
784 /*resources=*/{
785 {
786 /*type=*/ResourceType::kWriteOnlyStorageTexture,
787 /*flow=*/DataFlow::kShared,
788 /*policy=*/ResourcePolicy::kNone,
789 /*slot=*/0,
790 /*sksl=*/"dst",
791 }
792 }) {}
793 ~TestComputeStep() override = default;
794
795 std::string computeSkSL() const override {
796 return R"(
797 void main() {
798 textureWrite(dst, sk_LocalInvocationID.xy, half4(0.0, 1.0, 0.0, 1.0));
799 }
800 )";
801 }
802
803 std::tuple<SkISize, SkColorType> calculateTextureParameters(
804 int index, const ResourceDesc& r) const override {
805 return {{kDim, kDim}, kRGBA_8888_SkColorType};
806 }
807
809 return WorkgroupSize(1, 1, 1);
810 }
811 } step;
812
813 DispatchGroup::Builder builder(recorder.get());
814 if (!builder.appendStep(&step)) {
815 ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");
816 return;
817 }
818
819 sk_sp<TextureProxy> texture = builder.getSharedTextureResource(0);
820 if (!texture) {
821 ERRORF(reporter, "Shared resource at slot 0 is missing");
822 return;
823 }
824
825 // Record the compute task
827 groups.push_back(builder.finalize());
828 recorder->priv().add(ComputeTask::Make(std::move(groups)));
829
830 // Submit the work and wait for it to complete.
831 std::unique_ptr<Recording> recording = recorder->snap();
832 if (!recording) {
833 ERRORF(reporter, "Failed to make recording");
834 return;
835 }
836
837 InsertRecordingInfo insertInfo;
838 insertInfo.fRecording = recording.get();
839 context->insertRecording(insertInfo);
840 testContext->syncedSubmit(context);
841
843 SkImageInfo imgInfo =
845 bitmap.allocPixels(imgInfo);
846
847 SkPixmap pixels;
848 bool peekPixelsSuccess = bitmap.peekPixels(&pixels);
849 REPORTER_ASSERT(reporter, peekPixelsSuccess);
850
851 bool readPixelsSuccess = context->priv().readPixels(pixels, texture.get(), imgInfo, 0, 0);
852 REPORTER_ASSERT(reporter, readPixelsSuccess);
853
854 for (uint32_t x = 0; x < kDim; ++x) {
855 for (uint32_t y = 0; y < kDim; ++y) {
857 SkColor4f color = pixels.getColor4f(x, y);
858 REPORTER_ASSERT(reporter, expected == color,
859 "At position {%u, %u}, "
860 "expected {%.1f, %.1f, %.1f, %.1f}, "
861 "found {%.1f, %.1f, %.1f, %.1f}",
862 x, y,
863 expected.fR, expected.fG, expected.fB, expected.fA,
864 color.fR, color.fG, color.fB, color.fA);
865 }
866 }
867}
868
869// Tests the readonly texture binding for a compute dispatch that random-access reads from a
870// CPU-populated texture and copies it to a storage texture.
871DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_StorageTextureReadAndWrite,
872 reporter,
873 context,
874 testContext) {
875 std::unique_ptr<Recorder> recorder = context->makeRecorder();
876
877 // For this test we allocate a 16x16 tile which is written to by a single workgroup of the same
878 // size.
879 constexpr uint32_t kDim = 16;
880
881 class TestComputeStep : public ComputeStep {
882 public:
883 TestComputeStep() : ComputeStep(
884 /*name=*/"TestStorageTextureReadAndWrite",
885 /*localDispatchSize=*/{kDim, kDim, 1},
886 /*resources=*/{
887 {
888 /*type=*/ResourceType::kReadOnlyTexture,
889 /*flow=*/DataFlow::kShared,
890 /*policy=*/ResourcePolicy::kNone,
891 /*slot=*/0,
892 /*sksl=*/"src",
893 },
894 {
895 /*type=*/ResourceType::kWriteOnlyStorageTexture,
896 /*flow=*/DataFlow::kShared,
897 /*policy=*/ResourcePolicy::kNone,
898 /*slot=*/1,
899 /*sksl=*/"dst",
900 }
901 }) {}
902 ~TestComputeStep() override = default;
903
904 std::string computeSkSL() const override {
905 return R"(
906 void main() {
907 half4 color = textureRead(src, sk_LocalInvocationID.xy);
908 textureWrite(dst, sk_LocalInvocationID.xy, color);
909 }
910 )";
911 }
912
913 std::tuple<SkISize, SkColorType> calculateTextureParameters(
914 int index, const ResourceDesc& r) const override {
915 SkASSERT(index == 1);
916 return {{kDim, kDim}, kRGBA_8888_SkColorType};
917 }
918
920 return WorkgroupSize(1, 1, 1);
921 }
922 } step;
923
924 // Create and populate an input texture.
925 SkBitmap srcBitmap;
926 SkImageInfo srcInfo =
928 srcBitmap.allocPixels(srcInfo);
929 SkPixmap srcPixels;
930 bool srcPeekPixelsSuccess = srcBitmap.peekPixels(&srcPixels);
931 REPORTER_ASSERT(reporter, srcPeekPixelsSuccess);
932 for (uint32_t x = 0; x < kDim; ++x) {
933 for (uint32_t y = 0; y < kDim; ++y) {
934 *srcPixels.writable_addr32(x, y) =
935 SkColorSetARGB(255, x * 256 / kDim, y * 256 / kDim, 0);
936 }
937 }
938
939 auto texInfo = context->priv().caps()->getDefaultSampledTextureInfo(kRGBA_8888_SkColorType,
940 skgpu::Mipmapped::kNo,
941 skgpu::Protected::kNo,
942 skgpu::Renderable::kNo);
943 sk_sp<TextureProxy> srcProxy = TextureProxy::Make(context->priv().caps(),
944 recorder->priv().resourceProvider(),
945 {kDim, kDim},
946 texInfo,
948 MipLevel mipLevel;
949 mipLevel.fPixels = srcPixels.addr();
950 mipLevel.fRowBytes = srcPixels.rowBytes();
952 srcProxy,
953 srcPixels.info().colorInfo(),
954 srcPixels.info().colorInfo(),
955 {mipLevel},
956 SkIRect::MakeWH(kDim, kDim),
957 std::make_unique<ImageUploadContext>());
958 if (!upload.isValid()) {
959 ERRORF(reporter, "Could not create UploadInstance");
960 return;
961 }
962 recorder->priv().add(UploadTask::Make(std::move(upload)));
963
964 DispatchGroup::Builder builder(recorder.get());
965
966 // Assign the input texture to slot 0. This corresponds to the ComputeStep's "src" texture
967 // binding.
968 builder.assignSharedTexture(std::move(srcProxy), 0);
969
970 if (!builder.appendStep(&step)) {
971 ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");
972 return;
973 }
974
975 sk_sp<TextureProxy> dst = builder.getSharedTextureResource(1);
976 if (!dst) {
977 ERRORF(reporter, "shared resource at slot 1 is missing");
978 return;
979 }
980
981 // Record the compute task
983 groups.push_back(builder.finalize());
984 recorder->priv().add(ComputeTask::Make(std::move(groups)));
985
986 // Submit the work and wait for it to complete.
987 std::unique_ptr<Recording> recording = recorder->snap();
988 if (!recording) {
989 ERRORF(reporter, "Failed to make recording");
990 return;
991 }
992
993 InsertRecordingInfo insertInfo;
994 insertInfo.fRecording = recording.get();
995 context->insertRecording(insertInfo);
996 testContext->syncedSubmit(context);
997
999 SkImageInfo imgInfo =
1001 bitmap.allocPixels(imgInfo);
1002
1003 SkPixmap pixels;
1004 bool peekPixelsSuccess = bitmap.peekPixels(&pixels);
1005 REPORTER_ASSERT(reporter, peekPixelsSuccess);
1006
1007 bool readPixelsSuccess = context->priv().readPixels(pixels, dst.get(), imgInfo, 0, 0);
1008 REPORTER_ASSERT(reporter, readPixelsSuccess);
1009
1010 for (uint32_t x = 0; x < kDim; ++x) {
1011 for (uint32_t y = 0; y < kDim; ++y) {
1013 SkColorSetARGB(255, x * 256 / kDim, y * 256 / kDim, 0));
1014 SkColor4f color = pixels.getColor4f(x, y);
1015 REPORTER_ASSERT(reporter, expected == color,
1016 "At position {%u, %u}, "
1017 "expected {%.1f, %.1f, %.1f, %.1f}, "
1018 "found {%.1f, %.1f, %.1f, %.1f}",
1019 x, y,
1020 expected.fR, expected.fG, expected.fB, expected.fA,
1021 color.fR, color.fG, color.fB, color.fA);
1022 }
1023 }
1024}
1025
1026DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_ReadOnlyStorageBuffer,
1027 reporter,
1028 context,
1029 testContext) {
1030 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1031
1032 // For this test we allocate a 16x16 tile which is written to by a single workgroup of the same
1033 // size.
1034 constexpr uint32_t kDim = 16;
1035
1036 class TestComputeStep : public ComputeStep {
1037 public:
1038 TestComputeStep() : ComputeStep(
1039 /*name=*/"TestReadOnlyStorageBuffer",
1040 /*localDispatchSize=*/{kDim, kDim, 1},
1041 /*resources=*/{
1042 {
1043 /*type=*/ResourceType::kReadOnlyStorageBuffer,
1044 /*flow=*/DataFlow::kShared,
1045 /*policy=*/ResourcePolicy::kMapped,
1046 /*slot=*/0,
1047 /*sksl=*/"src { uint in_data[]; }",
1048 },
1049 {
1050 /*type=*/ResourceType::kWriteOnlyStorageTexture,
1051 /*flow=*/DataFlow::kShared,
1052 /*policy=*/ResourcePolicy::kNone,
1053 /*slot=*/1,
1054 /*sksl=*/"dst",
1055 }
1056 }) {}
1057 ~TestComputeStep() override = default;
1058
1059 std::string computeSkSL() const override {
1060 return R"(
1061 void main() {
1062 uint ix = sk_LocalInvocationID.y * 16 + sk_LocalInvocationID.x;
1063 uint value = in_data[ix];
1064 half4 splat = half4(
1065 half(value & 0xFF),
1066 half((value >> 8) & 0xFF),
1067 half((value >> 16) & 0xFF),
1068 half((value >> 24) & 0xFF)
1069 );
1070 textureWrite(dst, sk_LocalInvocationID.xy, splat / 255.0);
1071 }
1072 )";
1073 }
1074
1075 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
1076 SkASSERT(index == 0);
1077 return kDim * kDim * sizeof(uint32_t);
1078 }
1079
1080 void prepareStorageBuffer(int index,
1081 const ResourceDesc&,
1082 void* buffer,
1083 size_t bufferSize) const override {
1084 SkASSERT(index == 0);
1085 SkASSERT(bufferSize == kDim * kDim * sizeof(uint32_t));
1086
1087 uint32_t* inputs = reinterpret_cast<uint32_t*>(buffer);
1088 for (uint32_t y = 0; y < kDim; ++y) {
1089 for (uint32_t x = 0; x < kDim; ++x) {
1090 uint32_t value =
1091 ((x * 256 / kDim) & 0xFF) | ((y * 256 / kDim) & 0xFF) << 8 | 255 << 24;
1092 *(inputs++) = value;
1093 }
1094 }
1095 }
1096
1097 std::tuple<SkISize, SkColorType> calculateTextureParameters(
1098 int index, const ResourceDesc& r) const override {
1099 SkASSERT(index == 1);
1100 return {{kDim, kDim}, kRGBA_8888_SkColorType};
1101 }
1102
1104 return WorkgroupSize(1, 1, 1);
1105 }
1106 } step;
1107
1108 DispatchGroup::Builder builder(recorder.get());
1109 if (!builder.appendStep(&step)) {
1110 ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");
1111 return;
1112 }
1113
1114 sk_sp<TextureProxy> dst = builder.getSharedTextureResource(1);
1115 if (!dst) {
1116 ERRORF(reporter, "shared resource at slot 1 is missing");
1117 return;
1118 }
1119
1120 // Record the compute task
1122 groups.push_back(builder.finalize());
1123 recorder->priv().add(ComputeTask::Make(std::move(groups)));
1124
1125 // Submit the work and wait for it to complete.
1126 std::unique_ptr<Recording> recording = recorder->snap();
1127 if (!recording) {
1128 ERRORF(reporter, "Failed to make recording");
1129 return;
1130 }
1131
1132 InsertRecordingInfo insertInfo;
1133 insertInfo.fRecording = recording.get();
1134 context->insertRecording(insertInfo);
1135 testContext->syncedSubmit(context);
1136
1138 SkImageInfo imgInfo =
1140 bitmap.allocPixels(imgInfo);
1141
1142 SkPixmap pixels;
1143 bool peekPixelsSuccess = bitmap.peekPixels(&pixels);
1144 REPORTER_ASSERT(reporter, peekPixelsSuccess);
1145
1146 bool readPixelsSuccess = context->priv().readPixels(pixels, dst.get(), imgInfo, 0, 0);
1147 REPORTER_ASSERT(reporter, readPixelsSuccess);
1148
1149 for (uint32_t x = 0; x < kDim; ++x) {
1150 for (uint32_t y = 0; y < kDim; ++y) {
1151 SkColor4f expected =
1152 SkColor4f::FromColor(SkColorSetARGB(255, x * 256 / kDim, y * 256 / kDim, 0));
1153 SkColor4f color = pixels.getColor4f(x, y);
1154 bool pass = true;
1155 for (int i = 0; i < 4; i++) {
1156 pass &= color[i] == expected[i];
1157 }
1159 "At position {%u, %u}, "
1160 "expected {%.1f, %.1f, %.1f, %.1f}, "
1161 "found {%.1f, %.1f, %.1f, %.1f}",
1162 x, y,
1163 expected.fR, expected.fG, expected.fB, expected.fA,
1164 color.fR, color.fG, color.fB, color.fA);
1165 }
1166 }
1167}
1168
1169// Tests that a texture written by one compute step can be sampled by a subsequent step.
1170DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_StorageTextureMultipleComputeSteps,
1171 reporter,
1172 context,
1173 testContext) {
1174 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1175
1176 // For this test we allocate a 16x16 tile which is written to by a single workgroup of the same
1177 // size.
1178 constexpr uint32_t kDim = 16;
1179
1180 // Writes to a texture in slot 0.
1181 class TestComputeStep1 : public ComputeStep {
1182 public:
1183 TestComputeStep1() : ComputeStep(
1184 /*name=*/"TestStorageTexturesFirstPass",
1185 /*localDispatchSize=*/{kDim, kDim, 1},
1186 /*resources=*/{
1187 {
1188 /*type=*/ResourceType::kWriteOnlyStorageTexture,
1189 /*flow=*/DataFlow::kShared,
1190 /*policy=*/ResourcePolicy::kNone,
1191 /*slot=*/0,
1192 /*sksl=*/"dst",
1193 }
1194 }) {}
1195 ~TestComputeStep1() override = default;
1196
1197 std::string computeSkSL() const override {
1198 return R"(
1199 void main() {
1200 textureWrite(dst, sk_LocalInvocationID.xy, half4(0.0, 1.0, 0.0, 1.0));
1201 }
1202 )";
1203 }
1204
1205 std::tuple<SkISize, SkColorType> calculateTextureParameters(
1206 int index, const ResourceDesc& r) const override {
1207 SkASSERT(index == 0);
1208 return {{kDim, kDim}, kRGBA_8888_SkColorType};
1209 }
1210
1212 return WorkgroupSize(1, 1, 1);
1213 }
1214 } step1;
1215
1216 // Reads from the texture in slot 0 and writes it to another texture in slot 1.
1217 class TestComputeStep2 : public ComputeStep {
1218 public:
1219 TestComputeStep2() : ComputeStep(
1220 /*name=*/"TestStorageTexturesSecondPass",
1221 /*localDispatchSize=*/{kDim, kDim, 1},
1222 /*resources=*/{
1223 {
1224 /*type=*/ResourceType::kReadOnlyTexture,
1225 /*flow=*/DataFlow::kShared,
1226 /*policy=*/ResourcePolicy::kNone,
1227 /*slot=*/0,
1228 /*sksl=*/"src",
1229 },
1230 {
1231 /*type=*/ResourceType::kWriteOnlyStorageTexture,
1232 /*flow=*/DataFlow::kShared,
1233 /*policy=*/ResourcePolicy::kNone,
1234 /*slot=*/1,
1235 /*sksl=*/"dst",
1236 }
1237 }) {}
1238 ~TestComputeStep2() override = default;
1239
1240 std::string computeSkSL() const override {
1241 return R"(
1242 void main() {
1243 half4 color = textureRead(src, sk_LocalInvocationID.xy);
1244 textureWrite(dst, sk_LocalInvocationID.xy, color);
1245 }
1246 )";
1247 }
1248
1249 std::tuple<SkISize, SkColorType> calculateTextureParameters(
1250 int index, const ResourceDesc& r) const override {
1251 SkASSERT(index == 1);
1252 return {{kDim, kDim}, kRGBA_8888_SkColorType};
1253 }
1254
1256 return WorkgroupSize(1, 1, 1);
1257 }
1258 } step2;
1259
1260 DispatchGroup::Builder builder(recorder.get());
1261 builder.appendStep(&step1);
1262 builder.appendStep(&step2);
1263
1264 sk_sp<TextureProxy> dst = builder.getSharedTextureResource(1);
1265 if (!dst) {
1266 ERRORF(reporter, "shared resource at slot 1 is missing");
1267 return;
1268 }
1269
1270 // Record the compute task
1272 groups.push_back(builder.finalize());
1273 recorder->priv().add(ComputeTask::Make(std::move(groups)));
1274
1275 // Submit the work and wait for it to complete.
1276 std::unique_ptr<Recording> recording = recorder->snap();
1277 if (!recording) {
1278 ERRORF(reporter, "Failed to make recording");
1279 return;
1280 }
1281
1282 InsertRecordingInfo insertInfo;
1283 insertInfo.fRecording = recording.get();
1284 context->insertRecording(insertInfo);
1285 testContext->syncedSubmit(context);
1286
1288 SkImageInfo imgInfo =
1290 bitmap.allocPixels(imgInfo);
1291
1292 SkPixmap pixels;
1293 bool peekPixelsSuccess = bitmap.peekPixels(&pixels);
1294 REPORTER_ASSERT(reporter, peekPixelsSuccess);
1295
1296 bool readPixelsSuccess = context->priv().readPixels(pixels, dst.get(), imgInfo, 0, 0);
1297 REPORTER_ASSERT(reporter, readPixelsSuccess);
1298
1299 for (uint32_t x = 0; x < kDim; ++x) {
1300 for (uint32_t y = 0; y < kDim; ++y) {
1302 SkColor4f color = pixels.getColor4f(x, y);
1303 REPORTER_ASSERT(reporter, expected == color,
1304 "At position {%u, %u}, "
1305 "expected {%.1f, %.1f, %.1f, %.1f}, "
1306 "found {%.1f, %.1f, %.1f, %.1f}",
1307 x, y,
1308 expected.fR, expected.fG, expected.fB, expected.fA,
1309 color.fR, color.fG, color.fB, color.fA);
1310 }
1311 }
1312}
1313
1314// Tests that a texture can be sampled by a compute step using a sampler.
1315// TODO(armansito): Once the previous TODO is done, add additional tests that exercise mixed use of
1316// texture, buffer, and sampler bindings.
1318 reporter,
1319 context,
1320 testContext) {
1321 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1322
1323 // The first ComputeStep initializes a 16x16 texture with a checkerboard pattern of alternating
1324 // red and black pixels. The second ComputeStep downsamples this texture into a 4x4 using
1325 // bilinear filtering at pixel borders, intentionally averaging the values of each 4x4 tile in
1326 // the source texture, and writes the result to the destination texture.
1327 constexpr uint32_t kSrcDim = 16;
1328 constexpr uint32_t kDstDim = 4;
1329
1330 class TestComputeStep1 : public ComputeStep {
1331 public:
1332 TestComputeStep1() : ComputeStep(
1333 /*name=*/"Test_SampledTexture_Init",
1334 /*localDispatchSize=*/{kSrcDim, kSrcDim, 1},
1335 /*resources=*/{
1336 {
1337 /*type=*/ResourceType::kWriteOnlyStorageTexture,
1338 /*flow=*/DataFlow::kShared,
1339 /*policy=*/ResourcePolicy::kNone,
1340 /*slot=*/0,
1341 /*sksl=*/"dst",
1342 }
1343 }) {}
1344 ~TestComputeStep1() override = default;
1345
1346 std::string computeSkSL() const override {
1347 return R"(
1348 void main() {
1349 uint2 c = sk_LocalInvocationID.xy;
1350 uint checkerBoardColor = (c.x + (c.y % 2)) % 2;
1351 textureWrite(dst, c, half4(checkerBoardColor, 0, 0, 1));
1352 }
1353 )";
1354 }
1355
1356 std::tuple<SkISize, SkColorType> calculateTextureParameters(
1357 int index, const ResourceDesc& r) const override {
1358 SkASSERT(index == 0);
1359 return {{kSrcDim, kSrcDim}, kRGBA_8888_SkColorType};
1360 }
1361
1363 return WorkgroupSize(1, 1, 1);
1364 }
1365 } step1;
1366
1367 class TestComputeStep2 : public ComputeStep {
1368 public:
1369 TestComputeStep2() : ComputeStep(
1370 /*name=*/"Test_SampledTexture_Sample",
1371 /*localDispatchSize=*/{kDstDim, kDstDim, 1},
1372 /*resources=*/{
1373 // Declare the storage texture before the sampled texture. This tests that
1374 // binding index assignment works consistently across all backends when a
1375 // sampler-less texture and a texture+sampler pair are intermixed and sampler
1376 // bindings aren't necessarily contiguous when the ranges are distinct.
1377 {
1378 /*type=*/ResourceType::kWriteOnlyStorageTexture,
1379 /*flow=*/DataFlow::kShared,
1380 /*policy=*/ResourcePolicy::kNone,
1381 /*slot=*/1,
1382 /*sksl=*/"dst",
1383 },
1384 {
1385 /*type=*/ResourceType::kSampledTexture,
1386 /*flow=*/DataFlow::kShared,
1387 /*policy=*/ResourcePolicy::kNone,
1388 /*slot=*/0,
1389 /*sksl=*/"src",
1390 }
1391 }) {}
1392 ~TestComputeStep2() override = default;
1393
1394 std::string computeSkSL() const override {
1395 return R"(
1396 void main() {
1397 // Normalize the 4x4 invocation indices and sample the source texture using
1398 // that.
1399 uint2 dstCoord = sk_LocalInvocationID.xy;
1400 const float2 dstSizeInv = float2(0.25, 0.25);
1401 float2 unormCoord = float2(dstCoord) * dstSizeInv;
1402
1403 // Use explicit LOD, as quad derivatives are not available to a compute shader.
1404 half4 color = sampleLod(src, unormCoord, 0);
1405 textureWrite(dst, dstCoord, color);
1406 }
1407 )";
1408 }
1409
1410 std::tuple<SkISize, SkColorType> calculateTextureParameters(
1411 int index, const ResourceDesc& r) const override {
1412 SkASSERT(index == 0 || index == 1);
1413 return {{kDstDim, kDstDim}, kRGBA_8888_SkColorType};
1414 }
1415
1416 SamplerDesc calculateSamplerParameters(int index, const ResourceDesc&) const override {
1417 SkASSERT(index == 1);
1418 // Use the repeat tile mode to sample an infinite checkerboard.
1419 constexpr SkTileMode kTileModes[2] = {SkTileMode::kRepeat, SkTileMode::kRepeat};
1420 return {SkFilterMode::kLinear, kTileModes};
1421 }
1422
1424 return WorkgroupSize(1, 1, 1);
1425 }
1426 } step2;
1427
1428 DispatchGroup::Builder builder(recorder.get());
1429 builder.appendStep(&step1);
1430 builder.appendStep(&step2);
1431
1432 sk_sp<TextureProxy> dst = builder.getSharedTextureResource(1);
1433 if (!dst) {
1434 ERRORF(reporter, "shared resource at slot 1 is missing");
1435 return;
1436 }
1437
1438 // Record the compute task
1440 groups.push_back(builder.finalize());
1441 recorder->priv().add(ComputeTask::Make(std::move(groups)));
1442
1443 // Submit the work and wait for it to complete.
1444 std::unique_ptr<Recording> recording = recorder->snap();
1445 if (!recording) {
1446 ERRORF(reporter, "Failed to make recording");
1447 return;
1448 }
1449
1450 InsertRecordingInfo insertInfo;
1451 insertInfo.fRecording = recording.get();
1452 context->insertRecording(insertInfo);
1453 testContext->syncedSubmit(context);
1454
1456 SkImageInfo imgInfo =
1458 bitmap.allocPixels(imgInfo);
1459
1460 SkPixmap pixels;
1461 bool peekPixelsSuccess = bitmap.peekPixels(&pixels);
1462 REPORTER_ASSERT(reporter, peekPixelsSuccess);
1463
1464 bool readPixelsSuccess = context->priv().readPixels(pixels, dst.get(), imgInfo, 0, 0);
1465 REPORTER_ASSERT(reporter, readPixelsSuccess);
1466
1467 for (uint32_t x = 0; x < kDstDim; ++x) {
1468 for (uint32_t y = 0; y < kDstDim; ++y) {
1469 SkColor4f color = pixels.getColor4f(x, y);
1470 REPORTER_ASSERT(reporter, color.fR > 0.49 && color.fR < 0.51,
1471 "At position {%u, %u}, "
1472 "expected red channel in range [0.49, 0.51], "
1473 "found {%.3f}",
1474 x, y, color.fR);
1475 }
1476 }
1477}
1478
1479// TODO(b/260622403): The shader tested here is identical to
1480// `resources/sksl/compute/AtomicsOperations.compute`. It would be nice to be able to exercise SkSL
1481// features like this as part of SkSLTest.cpp instead of as a graphite test.
1482// TODO(b/262427430, b/262429132): Enable this test on other backends once they all support
1483// compute programs.
1484DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_AtomicOperationsTest,
1485 reporter,
1486 context,
1487 testContext) {
1488 // This fails on Dawn D3D11, b/315834710
1489 if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {
1490 return;
1491 }
1492
1493 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1494
1495 constexpr uint32_t kWorkgroupCount = 32;
1496 constexpr uint32_t kWorkgroupSize = 256;
1497
1498 class TestComputeStep : public ComputeStep {
1499 public:
1500 TestComputeStep() : ComputeStep(
1501 /*name=*/"TestAtomicOperations",
1502 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
1503 /*resources=*/{
1504 {
1505 /*type=*/ResourceType::kStorageBuffer,
1506 /*flow=*/DataFlow::kShared,
1507 /*policy=*/ResourcePolicy::kMapped,
1508 /*slot=*/0,
1509 /*sksl=*/"ssbo { atomicUint globalCounter; }",
1510 }
1511 }) {}
1512 ~TestComputeStep() override = default;
1513
1514 // A kernel that increments a global (device memory) counter across multiple workgroups.
1515 // Each workgroup maintains its own independent tally in a workgroup-shared counter which
1516 // is then added to the global count.
1517 //
1518 // This exercises atomic store/load/add and coherent reads and writes over memory in storage
1519 // and workgroup address spaces.
1520 std::string computeSkSL() const override {
1521 return R"(
1522 workgroup atomicUint localCounter;
1523
1524 void main() {
1525 // Initialize the local counter.
1526 if (sk_LocalInvocationID.x == 0) {
1527 atomicStore(localCounter, 0);
1528 }
1529
1530 // Synchronize the threads in the workgroup so they all see the initial value.
1531 workgroupBarrier();
1532
1533 // All threads increment the counter.
1534 atomicAdd(localCounter, 1);
1535
1536 // Synchronize the threads again to ensure they have all executed the increment
1537 // and the following load reads the same value across all threads in the
1538 // workgroup.
1539 workgroupBarrier();
1540
1541 // Add the workgroup-only tally to the global counter.
1542 if (sk_LocalInvocationID.x == 0) {
1543 atomicAdd(globalCounter, atomicLoad(localCounter));
1544 }
1545 }
1546 )";
1547 }
1548
1549 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
1550 SkASSERT(index == 0);
1551 SkASSERT(r.fSlot == 0);
1552 SkASSERT(r.fFlow == DataFlow::kShared);
1553 return sizeof(uint32_t);
1554 }
1555
1557 return WorkgroupSize(kWorkgroupCount, 1, 1);
1558 }
1559
1560 void prepareStorageBuffer(int resourceIndex,
1561 const ResourceDesc& r,
1562 void* buffer,
1563 size_t bufferSize) const override {
1564 SkASSERT(resourceIndex == 0);
1565 *static_cast<uint32_t*>(buffer) = 0;
1566 }
1567 } step;
1568
1569 DispatchGroup::Builder builder(recorder.get());
1570 builder.appendStep(&step);
1571
1572 BindBufferInfo info = builder.getSharedBufferResource(0);
1573 if (!info) {
1574 ERRORF(reporter, "shared resource at slot 0 is missing");
1575 return;
1576 }
1577
1578 // Record the compute pass task.
1580 groups.push_back(builder.finalize());
1581 recorder->priv().add(ComputeTask::Make(std::move(groups)));
1582
1583 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
1584 auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer);
1585
1586 // Submit the work and wait for it to complete.
1587 std::unique_ptr<Recording> recording = recorder->snap();
1588 if (!recording) {
1589 ERRORF(reporter, "Failed to make recording");
1590 return;
1591 }
1592
1593 InsertRecordingInfo insertInfo;
1594 insertInfo.fRecording = recording.get();
1595 context->insertRecording(insertInfo);
1596 testContext->syncedSubmit(context);
1597
1598 // Verify the contents of the output buffer.
1599 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
1600 const uint32_t result = static_cast<const uint32_t*>(
1601 map_buffer(context, testContext, buffer.get(), info.fOffset))[0];
1603 result == kExpectedCount,
1604 "expected '%u', found '%u'",
1605 kExpectedCount,
1606 result);
1607}
1608
1609// TODO(b/260622403): The shader tested here is identical to
1610// `resources/sksl/compute/AtomicsOperationsOverArrayAndStruct.compute`. It would be nice to be able
1611// to exercise SkSL features like this as part of SkSLTest.cpp instead of as a graphite test.
1612// TODO(b/262427430, b/262429132): Enable this test on other backends once they all support
1613// compute programs.
1614DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_AtomicOperationsOverArrayAndStructTest,
1615 reporter,
1616 context,
1617 testContext) {
1618 // This fails on Dawn D3D11, b/315834710
1619 if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {
1620 return;
1621 }
1622
1623 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1624
1625 constexpr uint32_t kWorkgroupCount = 32;
1626 constexpr uint32_t kWorkgroupSize = 256;
1627
1628 class TestComputeStep : public ComputeStep {
1629 public:
1630 TestComputeStep() : ComputeStep(
1631 /*name=*/"TestAtomicOperationsOverArrayAndStruct",
1632 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
1633 /*resources=*/{
1634 {
1635 /*type=*/ResourceType::kStorageBuffer,
1636 /*flow=*/DataFlow::kShared,
1637 /*policy=*/ResourcePolicy::kMapped,
1638 /*slot=*/0,
1639 /*sksl=*/"ssbo {\n"
1640 " atomicUint globalCountsFirstHalf;\n"
1641 " atomicUint globalCountsSecondHalf;\n"
1642 "}\n"
1643 }
1644 }) {}
1645 ~TestComputeStep() override = default;
1646
1647 // Construct a kernel that increments a two global (device memory) counters across multiple
1648 // workgroups. Each workgroup maintains its own independent tallies in workgroup-shared
1649 // counters which are then added to the global counts.
1650 //
1651 // This exercises atomic store/load/add and coherent reads and writes over memory in storage
1652 // and workgroup address spaces.
1653 std::string computeSkSL() const override {
1654 return R"(
1655 const uint WORKGROUP_SIZE = 256;
1656
1657 workgroup atomicUint localCounts[2];
1658
1659 void main() {
1660 // Initialize the local counts.
1661 if (sk_LocalInvocationID.x == 0) {
1662 atomicStore(localCounts[0], 0);
1663 atomicStore(localCounts[1], 0);
1664 }
1665
1666 // Synchronize the threads in the workgroup so they all see the initial value.
1667 workgroupBarrier();
1668
1669 // Each thread increments one of the local counters based on its invocation
1670 // index.
1671 uint idx = sk_LocalInvocationID.x < (WORKGROUP_SIZE / 2) ? 0 : 1;
1672 atomicAdd(localCounts[idx], 1);
1673
1674 // Synchronize the threads again to ensure they have all executed the increments
1675 // and the following load reads the same value across all threads in the
1676 // workgroup.
1677 workgroupBarrier();
1678
1679 // Add the workgroup-only tally to the global counter.
1680 if (sk_LocalInvocationID.x == 0) {
1681 atomicAdd(globalCountsFirstHalf, atomicLoad(localCounts[0]));
1682 atomicAdd(globalCountsSecondHalf, atomicLoad(localCounts[1]));
1683 }
1684 }
1685 )";
1686 }
1687
1688 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
1689 SkASSERT(index == 0);
1690 SkASSERT(r.fSlot == 0);
1691 SkASSERT(r.fFlow == DataFlow::kShared);
1692 return 2 * sizeof(uint32_t);
1693 }
1694
1696 return WorkgroupSize(kWorkgroupCount, 1, 1);
1697 }
1698
1699 void prepareStorageBuffer(int resourceIndex,
1700 const ResourceDesc& r,
1701 void* buffer,
1702 size_t bufferSize) const override {
1703 SkASSERT(resourceIndex == 0);
1704 uint32_t* data = static_cast<uint32_t*>(buffer);
1705 data[0] = 0;
1706 data[1] = 0;
1707 }
1708 } step;
1709
1710 DispatchGroup::Builder builder(recorder.get());
1711 builder.appendStep(&step);
1712
1713 BindBufferInfo info = builder.getSharedBufferResource(0);
1714 if (!info) {
1715 ERRORF(reporter, "shared resource at slot 0 is missing");
1716 return;
1717 }
1718
1719 // Record the compute pass task.
1721 groups.push_back(builder.finalize());
1722 recorder->priv().add(ComputeTask::Make(std::move(groups)));
1723
1724 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
1725 auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer);
1726
1727 // Submit the work and wait for it to complete.
1728 std::unique_ptr<Recording> recording = recorder->snap();
1729 if (!recording) {
1730 ERRORF(reporter, "Failed to make recording");
1731 return;
1732 }
1733
1734 InsertRecordingInfo insertInfo;
1735 insertInfo.fRecording = recording.get();
1736 context->insertRecording(insertInfo);
1737 testContext->syncedSubmit(context);
1738
1739 // Verify the contents of the output buffer.
1740 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize / 2;
1741
1742 const uint32_t* ssboData = static_cast<const uint32_t*>(
1743 map_buffer(context, testContext, buffer.get(), info.fOffset));
1744 const uint32_t firstHalfCount = ssboData[0];
1745 const uint32_t secondHalfCount = ssboData[1];
1747 firstHalfCount == kExpectedCount,
1748 "expected '%u', found '%u'",
1749 kExpectedCount,
1750 firstHalfCount);
1752 secondHalfCount == kExpectedCount,
1753 "expected '%u', found '%u'",
1754 kExpectedCount,
1755 secondHalfCount);
1756}
1757
1759 reporter,
1760 context,
1761 testContext) {
1762 constexpr uint32_t kProblemSize = 512;
1763
1764 // The ComputeStep packs kProblemSize floats into kProblemSize / 4 vectors and each thread
1765 // processes 1 vector at a time.
1766 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
1767
1768 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1769
1770 // The ComputeStep requests an unmapped buffer that is zero-initialized. It writes the output to
1771 // a mapped buffer which test verifies.
1772 class TestComputeStep : public ComputeStep {
1773 public:
1774 TestComputeStep() : ComputeStep(
1775 /*name=*/"TestClearedBuffer",
1776 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
1777 /*resources=*/{
1778 // Zero initialized input buffer
1779 {
1780 // TODO(b/299979165): Declare this binding as read-only.
1781 /*type=*/ResourceType::kStorageBuffer,
1782 /*flow=*/DataFlow::kPrivate,
1783 /*policy=*/ResourcePolicy::kClear,
1784 /*sksl=*/"inputBlock { uint4 in_data[]; }\n",
1785 },
1786 // Output buffer:
1787 {
1788 /*type=*/ResourceType::kStorageBuffer,
1789 /*flow=*/DataFlow::kShared, // shared to allow us to access it from the
1790 // Builder
1791 /*policy=*/ResourcePolicy::kMapped, // mappable for read-back
1792 /*slot=*/0,
1793 /*sksl=*/"outputBlock { uint4 out_data[]; }\n",
1794 }
1795 }) {}
1796 ~TestComputeStep() override = default;
1797
1798 std::string computeSkSL() const override {
1799 return R"(
1800 void main() {
1801 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x];
1802 }
1803 )";
1804 }
1805
1806 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
1807 return sizeof(uint32_t) * kProblemSize;
1808 }
1809
1810 void prepareStorageBuffer(int resourceIndex,
1811 const ResourceDesc& r,
1812 void* buffer,
1813 size_t bufferSize) const override {
1814 // Should receive this call only for the mapped buffer.
1815 SkASSERT(resourceIndex == 1);
1816 }
1817
1819 return WorkgroupSize(1, 1, 1);
1820 }
1821 } step;
1822
1823 DispatchGroup::Builder builder(recorder.get());
1824 if (!builder.appendStep(&step)) {
1825 ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");
1826 return;
1827 }
1828
1829 // The output buffer should have been placed in the right output slot.
1830 BindBufferInfo outputInfo = builder.getSharedBufferResource(0);
1831 if (!outputInfo) {
1832 ERRORF(reporter, "Failed to allocate an output buffer at slot 0");
1833 return;
1834 }
1835
1836 // Record the compute task
1838 groups.push_back(builder.finalize());
1839 recorder->priv().add(ComputeTask::Make(std::move(groups)));
1840
1841 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
1842 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.fBuffer);
1843
1844 // Submit the work and wait for it to complete.
1845 std::unique_ptr<Recording> recording = recorder->snap();
1846 if (!recording) {
1847 ERRORF(reporter, "Failed to make recording");
1848 return;
1849 }
1850
1851 InsertRecordingInfo insertInfo;
1852 insertInfo.fRecording = recording.get();
1853 context->insertRecording(insertInfo);
1854 testContext->syncedSubmit(context);
1855
1856 // Verify the contents of the output buffer.
1857 uint32_t* outData = static_cast<uint32_t*>(
1858 map_buffer(context, testContext, outputBuffer.get(), outputInfo.fOffset));
1859 SkASSERT(outputBuffer->isMapped() && outData != nullptr);
1860 for (unsigned int i = 0; i < kProblemSize; ++i) {
1861 const uint32_t found = outData[i];
1862 REPORTER_ASSERT(reporter, 0u == found, "expected '0u', found '%u'", found);
1863 }
1864}
1865
1867 reporter,
1868 context,
1869 testContext) {
1870 // Initiate two independent DispatchGroups operating on the same buffer. The first group
1871 // writes garbage to the buffer and the second group copies the contents to an output buffer.
1872 // This test validates that the reads, writes, and clear occur in the expected order.
1873 constexpr uint32_t kWorkgroupSize = 64;
1874
1875 // Initialize buffer with non-zero data.
1876 class FillWithGarbage : public ComputeStep {
1877 public:
1878 FillWithGarbage() : ComputeStep(
1879 /*name=*/"FillWithGarbage",
1880 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
1881 /*resources=*/{
1882 {
1883 /*type=*/ResourceType::kStorageBuffer,
1884 /*flow=*/DataFlow::kShared,
1885 /*policy=*/ResourcePolicy::kNone,
1886 /*slot=*/0,
1887 /*sksl=*/"outputBlock { uint4 out_data[]; }\n",
1888 }
1889 }) {}
1890 ~FillWithGarbage() override = default;
1891
1892 std::string computeSkSL() const override {
1893 return R"(
1894 void main() {
1895 out_data[sk_GlobalInvocationID.x] = uint4(0xFE);
1896 }
1897 )";
1898 }
1899 } garbageStep;
1900
1901 // Second stage just copies the data to a destination buffer. This is only to verify that this
1902 // stage, issued in a separate DispatchGroup, observes the clear.
1903 class CopyBuffer : public ComputeStep {
1904 public:
1905 CopyBuffer() : ComputeStep(
1906 /*name=*/"CopyBuffer",
1907 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
1908 /*resources=*/{
1909 {
1910 /*type=*/ResourceType::kStorageBuffer,
1911 /*flow=*/DataFlow::kShared,
1912 /*policy=*/ResourcePolicy::kNone,
1913 /*slot=*/0,
1914 /*sksl=*/"inputBlock { uint4 in_data[]; }\n",
1915 },
1916 {
1917 /*type=*/ResourceType::kStorageBuffer,
1918 /*flow=*/DataFlow::kShared,
1919 /*policy=*/ResourcePolicy::kNone,
1920 /*slot=*/1,
1921 /*sksl=*/"outputBlock { uint4 out_data[]; }\n",
1922 }
1923 }) {}
1924 ~CopyBuffer() override = default;
1925
1926 std::string computeSkSL() const override {
1927 return R"(
1928 void main() {
1929 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x];
1930 }
1931 )";
1932 }
1933 } copyStep;
1934
1935 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1936 DispatchGroup::Builder builder(recorder.get());
1937
1938 constexpr size_t kElementCount = 4 * kWorkgroupSize;
1939 constexpr size_t kBufferSize = sizeof(uint32_t) * kElementCount;
1940 auto input = recorder->priv().drawBufferManager()->getStorage(kBufferSize);
1941 auto [_, output] = recorder->priv().drawBufferManager()->getStoragePointer(kBufferSize);
1942
1944
1945 // First group.
1946 builder.assignSharedBuffer({input, kBufferSize}, 0);
1947 builder.appendStep(&garbageStep, {{1, 1, 1}});
1948 groups.push_back(builder.finalize());
1949
1950 // Second group.
1951 builder.reset();
1952 builder.assignSharedBuffer({input, kBufferSize}, 0, ClearBuffer::kYes);
1953 builder.assignSharedBuffer({output, kBufferSize}, 1);
1954 builder.appendStep(&copyStep, {{1, 1, 1}});
1955 groups.push_back(builder.finalize());
1956
1957 recorder->priv().add(ComputeTask::Make(std::move(groups)));
1958 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
1959 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), output.fBuffer);
1960
1961 // Submit the work and wait for it to complete.
1962 std::unique_ptr<Recording> recording = submit_recording(context, testContext, recorder.get());
1963 if (!recording) {
1964 ERRORF(reporter, "Failed to make recording");
1965 return;
1966 }
1967
1968 // Verify the contents of the output buffer.
1969 uint32_t* outData = static_cast<uint32_t*>(
1970 map_buffer(context, testContext, outputBuffer.get(), output.fOffset));
1971 SkASSERT(outputBuffer->isMapped() && outData != nullptr);
1972 for (unsigned int i = 0; i < kElementCount; ++i) {
1973 const uint32_t found = outData[i];
1974 REPORTER_ASSERT(reporter, 0u == found, "expected '0u', found '%u'", found);
1975 }
1976}
1977
1978DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_ClearOrderingScratchBuffers,
1979 reporter,
1980 context,
1981 testContext) {
1982 // This test is the same as the ClearOrdering test but the two stages write to a recycled
1983 // ScratchBuffer. This is primarily to test ScratchBuffer reuse.
1984 constexpr uint32_t kWorkgroupSize = 64;
1985
1986 // Initialize buffer with non-zero data.
1987 class FillWithGarbage : public ComputeStep {
1988 public:
1989 FillWithGarbage() : ComputeStep(
1990 /*name=*/"FillWithGarbage",
1991 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
1992 /*resources=*/{
1993 {
1994 /*type=*/ResourceType::kStorageBuffer,
1995 /*flow=*/DataFlow::kShared,
1996 /*policy=*/ResourcePolicy::kNone,
1997 /*slot=*/0,
1998 /*sksl=*/"outputBlock { uint4 out_data[]; }\n",
1999 }
2000 }) {}
2001 ~FillWithGarbage() override = default;
2002
2003 std::string computeSkSL() const override {
2004 return R"(
2005 void main() {
2006 out_data[sk_GlobalInvocationID.x] = uint4(0xFE);
2007 }
2008 )";
2009 }
2010 } garbageStep;
2011
2012 // Second stage just copies the data to a destination buffer. This is only to verify that this
2013 // stage (issued in a separate DispatchGroup) sees the changes.
2014 class CopyBuffer : public ComputeStep {
2015 public:
2016 CopyBuffer() : ComputeStep(
2017 /*name=*/"CopyBuffer",
2018 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
2019 /*resources=*/{
2020 {
2021 /*type=*/ResourceType::kStorageBuffer,
2022 /*flow=*/DataFlow::kShared,
2023 /*policy=*/ResourcePolicy::kNone,
2024 /*slot=*/0,
2025 /*sksl=*/"inputBlock { uint4 in_data[]; }\n",
2026 },
2027 {
2028 /*type=*/ResourceType::kStorageBuffer,
2029 /*flow=*/DataFlow::kShared,
2030 /*policy=*/ResourcePolicy::kNone,
2031 /*slot=*/1,
2032 /*sksl=*/"outputBlock { uint4 out_data[]; }\n",
2033 }
2034 }) {}
2035 ~CopyBuffer() override = default;
2036
2037 std::string computeSkSL() const override {
2038 return R"(
2039 void main() {
2040 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x];
2041 }
2042 )";
2043 }
2044 } copyStep;
2045
2046 std::unique_ptr<Recorder> recorder = context->makeRecorder();
2047 DispatchGroup::Builder builder(recorder.get());
2048
2049 constexpr size_t kElementCount = 4 * kWorkgroupSize;
2050 constexpr size_t kBufferSize = sizeof(uint32_t) * kElementCount;
2051 auto [_, output] = recorder->priv().drawBufferManager()->getStoragePointer(kBufferSize);
2052
2054
2055 // First group.
2056 {
2057 auto scratch = recorder->priv().drawBufferManager()->getScratchStorage(kBufferSize);
2058 auto input = scratch.suballocate(kBufferSize);
2059 builder.assignSharedBuffer({input, kBufferSize}, 0);
2060
2061 // `scratch` returns to the scratch buffer pool when it goes out of scope
2062 }
2063 builder.appendStep(&garbageStep, {{1, 1, 1}});
2064 groups.push_back(builder.finalize());
2065
2066 // Second group.
2067 builder.reset();
2068 {
2069 auto scratch = recorder->priv().drawBufferManager()->getScratchStorage(kBufferSize);
2070 auto input = scratch.suballocate(kBufferSize);
2071 builder.assignSharedBuffer({input, kBufferSize}, 0, ClearBuffer::kYes);
2072 }
2073 builder.assignSharedBuffer({output, kBufferSize}, 1);
2074 builder.appendStep(&copyStep, {{1, 1, 1}});
2075 groups.push_back(builder.finalize());
2076
2077 recorder->priv().add(ComputeTask::Make(std::move(groups)));
2078 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
2079 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), output.fBuffer);
2080
2081 // Submit the work and wait for it to complete.
2082 std::unique_ptr<Recording> recording = submit_recording(context, testContext, recorder.get());
2083 if (!recording) {
2084 ERRORF(reporter, "Failed to make recording");
2085 return;
2086 }
2087
2088 // Verify the contents of the output buffer.
2089 uint32_t* outData = static_cast<uint32_t*>(
2090 map_buffer(context, testContext, outputBuffer.get(), output.fOffset));
2091 SkASSERT(outputBuffer->isMapped() && outData != nullptr);
2092 for (unsigned int i = 0; i < kElementCount; ++i) {
2093 const uint32_t found = outData[i];
2094 REPORTER_ASSERT(reporter, 0u == found, "expected '0u', found '%u'", found);
2095 }
2096}
2097
2098DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_IndirectDispatch,
2099 reporter,
2100 context,
2101 testContext) {
2102 // This fails on Dawn D3D11, b/315834710
2103 if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {
2104 return;
2105 }
2106
2107 std::unique_ptr<Recorder> recorder = context->makeRecorder();
2108
2109 constexpr uint32_t kWorkgroupCount = 32;
2110 constexpr uint32_t kWorkgroupSize = 64;
2111
2112 // `IndirectStep` populates a buffer with the global workgroup count for `CountStep`.
2113 // `CountStep` is recorded using `DispatchGroup::appendStepIndirect()` and its workgroups get
2114 // dispatched according to the values computed by `IndirectStep` on the GPU.
2115 class IndirectStep : public ComputeStep {
2116 public:
2117 IndirectStep()
2118 : ComputeStep(
2119 /*name=*/"TestIndirectDispatch_IndirectStep",
2120 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
2121 /*resources=*/
2122 {{
2123 /*type=*/ResourceType::kIndirectBuffer,
2124 /*flow=*/DataFlow::kShared,
2125 /*policy=*/ResourcePolicy::kClear,
2126 /*slot=*/0,
2127 // TODO(armansito): Ideally the SSBO would have a single member of
2128 // type `IndirectDispatchArgs` struct type. SkSL modules don't
2129 // support struct declarations so this is currently not possible.
2130 /*sksl=*/"ssbo { uint indirect[]; }",
2131 }}) {}
2132 ~IndirectStep() override = default;
2133
2134 // Kernel that specifies a workgroup size of `kWorkgroupCount` to be used by the indirect
2135 // dispatch.
2136 std::string computeSkSL() const override {
2137 return R"(
2138 // This needs to match `kWorkgroupCount` declared above.
2139 const uint kWorkgroupCount = 32;
2140
2141 void main() {
2142 if (sk_LocalInvocationID.x == 0) {
2143 indirect[0] = kWorkgroupCount;
2144 indirect[1] = 1;
2145 indirect[2] = 1;
2146 }
2147 }
2148 )";
2149 }
2150
2151 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
2152 SkASSERT(index == 0);
2153 SkASSERT(r.fSlot == 0);
2154 SkASSERT(r.fFlow == DataFlow::kShared);
2156 }
2157
2159 return WorkgroupSize(1, 1, 1);
2160 }
2161 } indirectStep;
2162
2163 class CountStep : public ComputeStep {
2164 public:
2165 CountStep()
2166 : ComputeStep(
2167 /*name=*/"TestIndirectDispatch_CountStep",
2168 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
2169 /*resources=*/
2170 {{
2171 /*type=*/ResourceType::kStorageBuffer,
2172 /*flow=*/DataFlow::kShared,
2173 /*policy=*/ResourcePolicy::kMapped,
2174 /*slot=*/1,
2175 /*sksl=*/"ssbo { atomicUint globalCounter; }",
2176 }}) {}
2177 ~CountStep() override = default;
2178
2179 std::string computeSkSL() const override {
2180 return R"(
2181 workgroup atomicUint localCounter;
2182
2183 void main() {
2184 // Initialize the local counter.
2185 if (sk_LocalInvocationID.x == 0) {
2186 atomicStore(localCounter, 0);
2187 }
2188
2189 // Synchronize the threads in the workgroup so they all see the initial value.
2190 workgroupBarrier();
2191
2192 // All threads increment the counter.
2193 atomicAdd(localCounter, 1);
2194
2195 // Synchronize the threads again to ensure they have all executed the increment
2196 // and the following load reads the same value across all threads in the
2197 // workgroup.
2198 workgroupBarrier();
2199
2200 // Add the workgroup-only tally to the global counter.
2201 if (sk_LocalInvocationID.x == 0) {
2202 atomicAdd(globalCounter, atomicLoad(localCounter));
2203 }
2204 }
2205 )";
2206 }
2207
2208 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
2209 SkASSERT(index == 0);
2210 SkASSERT(r.fSlot == 1);
2211 SkASSERT(r.fFlow == DataFlow::kShared);
2212 return sizeof(uint32_t);
2213 }
2214
2215 void prepareStorageBuffer(int resourceIndex,
2216 const ResourceDesc& r,
2217 void* buffer,
2218 size_t bufferSize) const override {
2219 SkASSERT(resourceIndex == 0);
2220 *static_cast<uint32_t*>(buffer) = 0;
2221 }
2222 } countStep;
2223
2224 DispatchGroup::Builder builder(recorder.get());
2225 builder.appendStep(&indirectStep);
2226 BindBufferInfo indirectBufferInfo = builder.getSharedBufferResource(0);
2227 if (!indirectBufferInfo) {
2228 ERRORF(reporter, "Shared resource at slot 0 is missing");
2229 return;
2230 }
2231 builder.appendStepIndirect(&countStep, {indirectBufferInfo, kIndirectDispatchArgumentSize});
2232
2233 BindBufferInfo info = builder.getSharedBufferResource(1);
2234 if (!info) {
2235 ERRORF(reporter, "Shared resource at slot 1 is missing");
2236 return;
2237 }
2238
2239 // Record the compute pass task.
2241 groups.push_back(builder.finalize());
2242 recorder->priv().add(ComputeTask::Make(std::move(groups)));
2243
2244 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
2245 auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer);
2246
2247 // Submit the work and wait for it to complete.
2248 std::unique_ptr<Recording> recording = recorder->snap();
2249 if (!recording) {
2250 ERRORF(reporter, "Failed to make recording");
2251 return;
2252 }
2253
2254 InsertRecordingInfo insertInfo;
2255 insertInfo.fRecording = recording.get();
2256 context->insertRecording(insertInfo);
2257 testContext->syncedSubmit(context);
2258
2259 // Verify the contents of the output buffer.
2260 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
2261 const uint32_t result = static_cast<const uint32_t*>(
2262 map_buffer(context, testContext, buffer.get(), info.fOffset))[0];
2264 result == kExpectedCount,
2265 "expected '%u', found '%u'",
2266 kExpectedCount,
2267 result);
2268}
2269
2270DEF_GRAPHITE_TEST_FOR_METAL_CONTEXT(Compute_NativeShaderSourceMetal,
2271 reporter,
2272 context,
2273 testContext) {
2274 std::unique_ptr<Recorder> recorder = context->makeRecorder();
2275
2276 constexpr uint32_t kWorkgroupCount = 32;
2277 constexpr uint32_t kWorkgroupSize = 1024;
2278
2279 class TestComputeStep : public ComputeStep {
2280 public:
2281 TestComputeStep() : ComputeStep(
2282 /*name=*/"TestAtomicOperationsMetal",
2283 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
2284 /*resources=*/{
2285 {
2286 /*type=*/ResourceType::kStorageBuffer,
2287 /*flow=*/DataFlow::kShared,
2288 /*policy=*/ResourcePolicy::kMapped,
2289 /*slot=*/0,
2290 }
2291 },
2292 /*workgroupBuffers=*/{},
2293 /*baseFlags=*/Flags::kSupportsNativeShader) {}
2294 ~TestComputeStep() override = default;
2295
2296 NativeShaderSource nativeShaderSource(NativeShaderFormat format) const override {
2297 SkASSERT(format == NativeShaderFormat::kMSL);
2298 static constexpr std::string_view kSource = R"(
2299 #include <metal_stdlib>
2300
2301 using namespace metal;
2302
2303 kernel void atomicCount(uint3 localId [[thread_position_in_threadgroup]],
2304 device atomic_uint& globalCounter [[buffer(0)]]) {
2305 threadgroup atomic_uint localCounter;
2306
2307 // Initialize the local counter.
2308 if (localId.x == 0u) {
2309 atomic_store_explicit(&localCounter, 0u, memory_order_relaxed);
2310 }
2311
2312 // Synchronize the threads in the workgroup so they all see the initial value.
2313 threadgroup_barrier(mem_flags::mem_threadgroup);
2314
2315 // All threads increment the counter.
2316 atomic_fetch_add_explicit(&localCounter, 1u, memory_order_relaxed);
2317
2318 // Synchronize the threads again to ensure they have all executed the increment
2319 // and the following load reads the same value across all threads in the
2320 // workgroup.
2321 threadgroup_barrier(mem_flags::mem_threadgroup);
2322
2323 // Add the workgroup-only tally to the global counter.
2324 if (localId.x == 0u) {
2325 uint tally = atomic_load_explicit(&localCounter, memory_order_relaxed);
2326 atomic_fetch_add_explicit(&globalCounter, tally, memory_order_relaxed);
2327 }
2328 }
2329 )";
2330 return {kSource, "atomicCount"};
2331 }
2332
2333 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
2334 SkASSERT(index == 0);
2335 SkASSERT(r.fSlot == 0);
2336 SkASSERT(r.fFlow == DataFlow::kShared);
2337 return sizeof(uint32_t);
2338 }
2339
2341 return WorkgroupSize(kWorkgroupCount, 1, 1);
2342 }
2343
2344 void prepareStorageBuffer(int resourceIndex,
2345 const ResourceDesc& r,
2346 void* buffer,
2347 size_t bufferSize) const override {
2348 SkASSERT(resourceIndex == 0);
2349 *static_cast<uint32_t*>(buffer) = 0;
2350 }
2351 } step;
2352
2353 DispatchGroup::Builder builder(recorder.get());
2354 builder.appendStep(&step);
2355
2356 BindBufferInfo info = builder.getSharedBufferResource(0);
2357 if (!info) {
2358 ERRORF(reporter, "shared resource at slot 0 is missing");
2359 return;
2360 }
2361
2362 // Record the compute pass task.
2364 groups.push_back(builder.finalize());
2365 recorder->priv().add(ComputeTask::Make(std::move(groups)));
2366
2367 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
2368 auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer);
2369
2370 // Submit the work and wait for it to complete.
2371 std::unique_ptr<Recording> recording = recorder->snap();
2372 if (!recording) {
2373 ERRORF(reporter, "Failed to make recording");
2374 return;
2375 }
2376
2377 InsertRecordingInfo insertInfo;
2378 insertInfo.fRecording = recording.get();
2379 context->insertRecording(insertInfo);
2380 testContext->syncedSubmit(context);
2381
2382 // Verify the contents of the output buffer.
2383 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
2384 const uint32_t result = static_cast<const uint32_t*>(
2385 map_buffer(context, testContext, buffer.get(), info.fOffset))[0];
2387 result == kExpectedCount,
2388 "expected '%u', found '%u'",
2389 kExpectedCount,
2390 result);
2391}
2392
2393DEF_GRAPHITE_TEST_FOR_METAL_CONTEXT(Compute_WorkgroupBufferDescMetal,
2394 reporter,
2395 context,
2396 testContext) {
2397 std::unique_ptr<Recorder> recorder = context->makeRecorder();
2398
2399 constexpr uint32_t kWorkgroupCount = 32;
2400 constexpr uint32_t kWorkgroupSize = 1024;
2401
2402 class TestComputeStep : public ComputeStep {
2403 public:
2404 TestComputeStep() : ComputeStep(
2405 /*name=*/"TestAtomicOperationsMetal",
2406 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
2407 /*resources=*/{
2408 {
2409 /*type=*/ResourceType::kStorageBuffer,
2410 /*flow=*/DataFlow::kShared,
2411 /*policy=*/ResourcePolicy::kMapped,
2412 /*slot=*/0,
2413 }
2414 },
2415 /*workgroupBuffers=*/{
2416 {
2417 /*size=*/sizeof(uint32_t),
2418 /*index=*/0u,
2419 }
2420 },
2421 /*baseFlags=*/Flags::kSupportsNativeShader) {}
2422 ~TestComputeStep() override = default;
2423
2424 // This is the same MSL kernel as in Compute_NativeShaderSourceMetal, except `localCounter`
2425 // is an entry-point parameter instead of a local variable. This forces the workgroup
2426 // binding to be encoded explicitly in the command encoder.
2427 NativeShaderSource nativeShaderSource(NativeShaderFormat format) const override {
2428 SkASSERT(format == NativeShaderFormat::kMSL);
2429 static constexpr std::string_view kSource = R"(
2430 #include <metal_stdlib>
2431
2432 using namespace metal;
2433
2434 kernel void atomicCount(uint3 localId [[thread_position_in_threadgroup]],
2435 device atomic_uint& globalCounter [[buffer(0)]],
2436 threadgroup atomic_uint& localCounter [[threadgroup(0)]]) {
2437 // Initialize the local counter.
2438 if (localId.x == 0u) {
2439 atomic_store_explicit(&localCounter, 0u, memory_order_relaxed);
2440 }
2441
2442 // Synchronize the threads in the workgroup so they all see the initial value.
2443 threadgroup_barrier(mem_flags::mem_threadgroup);
2444
2445 // All threads increment the counter.
2446 atomic_fetch_add_explicit(&localCounter, 1u, memory_order_relaxed);
2447
2448 // Synchronize the threads again to ensure they have all executed the increment
2449 // and the following load reads the same value across all threads in the
2450 // workgroup.
2451 threadgroup_barrier(mem_flags::mem_threadgroup);
2452
2453 // Add the workgroup-only tally to the global counter.
2454 if (localId.x == 0u) {
2455 uint tally = atomic_load_explicit(&localCounter, memory_order_relaxed);
2456 atomic_fetch_add_explicit(&globalCounter, tally, memory_order_relaxed);
2457 }
2458 }
2459 )";
2460 return {kSource, "atomicCount"};
2461 }
2462
2463 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
2464 SkASSERT(index == 0);
2465 SkASSERT(r.fSlot == 0);
2466 SkASSERT(r.fFlow == DataFlow::kShared);
2467 return sizeof(uint32_t);
2468 }
2469
2471 return WorkgroupSize(kWorkgroupCount, 1, 1);
2472 }
2473
2474 void prepareStorageBuffer(int resourceIndex,
2475 const ResourceDesc& r,
2476 void* buffer,
2477 size_t bufferSize) const override {
2478 SkASSERT(resourceIndex == 0);
2479 *static_cast<uint32_t*>(buffer) = 0;
2480 }
2481 } step;
2482
2483 DispatchGroup::Builder builder(recorder.get());
2484 builder.appendStep(&step);
2485
2486 BindBufferInfo info = builder.getSharedBufferResource(0);
2487 if (!info) {
2488 ERRORF(reporter, "shared resource at slot 0 is missing");
2489 return;
2490 }
2491
2492 // Record the compute pass task.
2494 groups.push_back(builder.finalize());
2495 recorder->priv().add(ComputeTask::Make(std::move(groups)));
2496
2497 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
2498 auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer);
2499
2500 // Submit the work and wait for it to complete.
2501 std::unique_ptr<Recording> recording = recorder->snap();
2502 if (!recording) {
2503 ERRORF(reporter, "Failed to make recording");
2504 return;
2505 }
2506
2507 InsertRecordingInfo insertInfo;
2508 insertInfo.fRecording = recording.get();
2509 context->insertRecording(insertInfo);
2510 testContext->syncedSubmit(context);
2511
2512 // Verify the contents of the output buffer.
2513 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
2514 const uint32_t result = static_cast<const uint32_t*>(
2515 map_buffer(context, testContext, buffer.get(), info.fOffset))[0];
2517 result == kExpectedCount,
2518 "expected '%u', found '%u'",
2519 kExpectedCount,
2520 result);
2521}
2522
2523DEF_GRAPHITE_TEST_FOR_DAWN_CONTEXT(Compute_NativeShaderSourceWGSL, reporter, context, testContext) {
2524 // This fails on Dawn D3D11, b/315834710
2525 if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {
2526 return;
2527 }
2528
2529 std::unique_ptr<Recorder> recorder = context->makeRecorder();
2530
2531 constexpr uint32_t kWorkgroupCount = 32;
2532 constexpr uint32_t kWorkgroupSize = 256; // The WebGPU default workgroup size limit is 256
2533
2534 class TestComputeStep : public ComputeStep {
2535 public:
2536 TestComputeStep() : ComputeStep(
2537 /*name=*/"TestAtomicOperationsWGSL",
2538 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
2539 /*resources=*/{
2540 {
2541 /*type=*/ResourceType::kStorageBuffer,
2542 /*flow=*/DataFlow::kShared,
2543 /*policy=*/ResourcePolicy::kMapped,
2544 /*slot=*/0,
2545 }
2546 },
2547 /*workgroupBuffers=*/{},
2548 /*baseFlags=*/Flags::kSupportsNativeShader) {}
2549 ~TestComputeStep() override = default;
2550
2551 NativeShaderSource nativeShaderSource(NativeShaderFormat format) const override {
2552 SkASSERT(format == NativeShaderFormat::kWGSL);
2553 static constexpr std::string_view kSource = R"(
2554 @group(0) @binding(0) var<storage, read_write> globalCounter: atomic<u32>;
2555
2556 var<workgroup> localCounter: atomic<u32>;
2557
2558 @compute @workgroup_size(256)
2559 fn atomicCount(@builtin(local_invocation_id) localId: vec3u) {
2560 // Initialize the local counter.
2561 if localId.x == 0u {
2562 atomicStore(&localCounter, 0u);
2563 }
2564
2565 // Synchronize the threads in the workgroup so they all see the initial value.
2566 workgroupBarrier();
2567
2568 // All threads increment the counter.
2569 atomicAdd(&localCounter, 1u);
2570
2571 // Synchronize the threads again to ensure they have all executed the increment
2572 // and the following load reads the same value across all threads in the
2573 // workgroup.
2574 workgroupBarrier();
2575
2576 // Add the workgroup-only tally to the global counter.
2577 if localId.x == 0u {
2578 let tally = atomicLoad(&localCounter);
2579 atomicAdd(&globalCounter, tally);
2580 }
2581 }
2582 )";
2583 return {kSource, "atomicCount"};
2584 }
2585
2586 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
2587 SkASSERT(index == 0);
2588 SkASSERT(r.fSlot == 0);
2589 SkASSERT(r.fFlow == DataFlow::kShared);
2590 return sizeof(uint32_t);
2591 }
2592
2594 return WorkgroupSize(kWorkgroupCount, 1, 1);
2595 }
2596
2597 void prepareStorageBuffer(int resourceIndex,
2598 const ResourceDesc& r,
2599 void* buffer,
2600 size_t bufferSize) const override {
2601 SkASSERT(resourceIndex == 0);
2602 *static_cast<uint32_t*>(buffer) = 0;
2603 }
2604 } step;
2605
2606 DispatchGroup::Builder builder(recorder.get());
2607 builder.appendStep(&step);
2608
2609 BindBufferInfo info = builder.getSharedBufferResource(0);
2610 if (!info) {
2611 ERRORF(reporter, "shared resource at slot 0 is missing");
2612 return;
2613 }
2614
2615 // Record the compute pass task.
2617 groups.push_back(builder.finalize());
2618 recorder->priv().add(ComputeTask::Make(std::move(groups)));
2619
2620 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
2621 auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer);
2622
2623 // Submit the work and wait for it to complete.
2624 std::unique_ptr<Recording> recording = recorder->snap();
2625 if (!recording) {
2626 ERRORF(reporter, "Failed to make recording");
2627 return;
2628 }
2629
2630 InsertRecordingInfo insertInfo;
2631 insertInfo.fRecording = recording.get();
2632 context->insertRecording(insertInfo);
2633 testContext->syncedSubmit(context);
2634
2635 // Verify the contents of the output buffer.
2636 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
2637 const uint32_t result = static_cast<const uint32_t*>(
2638 map_buffer(context, testContext, buffer.get(), info.fOffset))[0];
2640 result == kExpectedCount,
2641 "expected '%u', found '%u'",
2642 kExpectedCount,
2643 result);
2644}
static int step(int x, SkScalar min, SkScalar max)
Definition BlurTest.cpp:215
#define DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS( name, reporter, graphite_context, test_context)
static void info(const char *fmt,...) SK_PRINTF_LIKE(1
Definition DM.cpp:213
SkColor4f color
kUnpremul_SkAlphaType
#define SkASSERT(cond)
Definition SkAssert.h:116
@ kRGBA_8888_SkColorType
pixel with 8 bits for red, green, blue, alpha; in 32-bit word
Definition SkColorType.h:24
static constexpr SkColor SkColorSetARGB(U8CPU a, U8CPU r, U8CPU g, U8CPU b)
Definition SkColor.h:49
constexpr SkColor SK_ColorGREEN
Definition SkColor.h:131
#define SkDEBUGCODE(...)
Definition SkDebug.h:23
static const size_t kBufferSize
Definition SkString.cpp:27
SkTileMode
Definition SkTileMode.h:13
#define DEF_GRAPHITE_TEST_FOR_DAWN_CONTEXT(name, reporter, graphite_context, test_context)
Definition Test.h:389
#define DEF_GRAPHITE_TEST_FOR_METAL_CONTEXT(name, reporter, graphite_context, test_context)
Definition Test.h:385
#define REPORTER_ASSERT(r, cond,...)
Definition Test.h:286
#define ERRORF(r,...)
Definition Test.h:293
void allocPixels(const SkImageInfo &info, size_t rowBytes)
Definition SkBitmap.cpp:258
bool peekPixels(SkPixmap *pixmap) const
Definition SkBitmap.cpp:635
size_t rowBytes() const
Definition SkPixmap.h:145
const SkImageInfo & info() const
Definition SkPixmap.h:135
SkColor4f getColor4f(int x, int y) const
Definition SkPixmap.cpp:388
uint32_t * writable_addr32(int x, int y) const
Definition SkPixmap.h:537
const void * addr() const
Definition SkPixmap.h:153
virtual SamplerDesc calculateSamplerParameters(int resourceIndex, const ResourceDesc &) const
virtual std::string computeSkSL() const
virtual WorkgroupSize calculateGlobalDispatchSize() const
virtual void prepareStorageBuffer(int resourceIndex, const ResourceDesc &resource, void *buffer, size_t bufferSize) const
virtual std::tuple< SkISize, SkColorType > calculateTextureParameters(int resourceIndex, const ResourceDesc &) const
virtual NativeShaderSource nativeShaderSource(NativeShaderFormat) const
virtual void prepareUniformBuffer(int resourceIndex, const ResourceDesc &, UniformManager *) const
virtual size_t calculateBufferSize(int resourceIndex, const ResourceDesc &) const
static sk_sp< ComputeTask > Make(DispatchGroupList dispatchGroups)
static sk_sp< TextureProxy > Make(const Caps *, ResourceProvider *, SkISize dimensions, const TextureInfo &, skgpu::Budgeted)
static UploadInstance Make(Recorder *, sk_sp< TextureProxy > targetProxy, const SkColorInfo &srcColorInfo, const SkColorInfo &dstColorInfo, SkSpan< const MipLevel > levels, const SkIRect &dstRect, std::unique_ptr< ConditionalUploadContext >)
static sk_sp< UploadTask > Make(UploadList *)
static const uint8_t buffer[]
uint8_t value
GAsyncResult * result
uint32_t uint32_t * format
FlTexture * texture
double y
double x
DEF_SWITCHES_START aot vmservice shared library Name of the *so containing AOT compiled Dart assets for launching the service isolate vm snapshot data
Definition switches.h:41
dst
Definition cp.py:12
constexpr size_t kIndirectDispatchArgumentSize
static constexpr SkIRect MakeWH(int32_t w, int32_t h)
Definition SkRect.h:56
const SkColorInfo & colorInfo() const
static SkImageInfo Make(int width, int height, SkColorType ct, SkAlphaType at)
static SkRGBA4f FromBytes_RGBA(uint32_t color)
static SkRGBA4f FromColor(SkColor color)

Function Documentation

◆ DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS() [1/15]

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS ( Compute_AtomicOperationsOverArrayAndStructTest  ,
reporter  ,
context  ,
testContext   
)

Definition at line 1615 of file ComputeTest.cpp.

1618 {
1619 // This fails on Dawn D3D11, b/315834710
1620 if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {
1621 return;
1622 }
1623
1624 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1625
1626 constexpr uint32_t kWorkgroupCount = 32;
1627 constexpr uint32_t kWorkgroupSize = 256;
1628
1629 class TestComputeStep : public ComputeStep {
1630 public:
1631 TestComputeStep() : ComputeStep(
1632 /*name=*/"TestAtomicOperationsOverArrayAndStruct",
1633 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
1634 /*resources=*/{
1635 {
1636 /*type=*/ResourceType::kStorageBuffer,
1637 /*flow=*/DataFlow::kShared,
1638 /*policy=*/ResourcePolicy::kMapped,
1639 /*slot=*/0,
1640 /*sksl=*/"ssbo {\n"
1641 " atomicUint globalCountsFirstHalf;\n"
1642 " atomicUint globalCountsSecondHalf;\n"
1643 "}\n"
1644 }
1645 }) {}
1646 ~TestComputeStep() override = default;
1647
1648 // Construct a kernel that increments a two global (device memory) counters across multiple
1649 // workgroups. Each workgroup maintains its own independent tallies in workgroup-shared
1650 // counters which are then added to the global counts.
1651 //
1652 // This exercises atomic store/load/add and coherent reads and writes over memory in storage
1653 // and workgroup address spaces.
1654 std::string computeSkSL() const override {
1655 return R"(
1656 const uint WORKGROUP_SIZE = 256;
1657
1658 workgroup atomicUint localCounts[2];
1659
1660 void main() {
1661 // Initialize the local counts.
1662 if (sk_LocalInvocationID.x == 0) {
1663 atomicStore(localCounts[0], 0);
1664 atomicStore(localCounts[1], 0);
1665 }
1666
1667 // Synchronize the threads in the workgroup so they all see the initial value.
1668 workgroupBarrier();
1669
1670 // Each thread increments one of the local counters based on its invocation
1671 // index.
1672 uint idx = sk_LocalInvocationID.x < (WORKGROUP_SIZE / 2) ? 0 : 1;
1673 atomicAdd(localCounts[idx], 1);
1674
1675 // Synchronize the threads again to ensure they have all executed the increments
1676 // and the following load reads the same value across all threads in the
1677 // workgroup.
1678 workgroupBarrier();
1679
1680 // Add the workgroup-only tally to the global counter.
1681 if (sk_LocalInvocationID.x == 0) {
1682 atomicAdd(globalCountsFirstHalf, atomicLoad(localCounts[0]));
1683 atomicAdd(globalCountsSecondHalf, atomicLoad(localCounts[1]));
1684 }
1685 }
1686 )";
1687 }
1688
1689 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
1690 SkASSERT(index == 0);
1691 SkASSERT(r.fSlot == 0);
1692 SkASSERT(r.fFlow == DataFlow::kShared);
1693 return 2 * sizeof(uint32_t);
1694 }
1695
1697 return WorkgroupSize(kWorkgroupCount, 1, 1);
1698 }
1699
1700 void prepareStorageBuffer(int resourceIndex,
1701 const ResourceDesc& r,
1702 void* buffer,
1703 size_t bufferSize) const override {
1704 SkASSERT(resourceIndex == 0);
1705 uint32_t* data = static_cast<uint32_t*>(buffer);
1706 data[0] = 0;
1707 data[1] = 0;
1708 }
1709 } step;
1710
1711 DispatchGroup::Builder builder(recorder.get());
1712 builder.appendStep(&step);
1713
1714 BindBufferInfo info = builder.getSharedBufferResource(0);
1715 if (!info) {
1716 ERRORF(reporter, "shared resource at slot 0 is missing");
1717 return;
1718 }
1719
1720 // Record the compute pass task.
1722 groups.push_back(builder.finalize());
1723 recorder->priv().add(ComputeTask::Make(std::move(groups)));
1724
1725 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
1726 auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer);
1727
1728 // Submit the work and wait for it to complete.
1729 std::unique_ptr<Recording> recording = recorder->snap();
1730 if (!recording) {
1731 ERRORF(reporter, "Failed to make recording");
1732 return;
1733 }
1734
1735 InsertRecordingInfo insertInfo;
1736 insertInfo.fRecording = recording.get();
1737 context->insertRecording(insertInfo);
1738 testContext->syncedSubmit(context);
1739
1740 // Verify the contents of the output buffer.
1741 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize / 2;
1742
1743 const uint32_t* ssboData = static_cast<const uint32_t*>(
1744 map_buffer(context, testContext, buffer.get(), info.fOffset));
1745 const uint32_t firstHalfCount = ssboData[0];
1746 const uint32_t secondHalfCount = ssboData[1];
1748 firstHalfCount == kExpectedCount,
1749 "expected '%u', found '%u'",
1750 kExpectedCount,
1751 firstHalfCount);
1753 secondHalfCount == kExpectedCount,
1754 "expected '%u', found '%u'",
1755 kExpectedCount,
1756 secondHalfCount);
1757}

◆ DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS() [2/15]

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS ( Compute_AtomicOperationsTest  ,
reporter  ,
context  ,
testContext   
)

Definition at line 1485 of file ComputeTest.cpp.

1488 {
1489 // This fails on Dawn D3D11, b/315834710
1490 if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {
1491 return;
1492 }
1493
1494 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1495
1496 constexpr uint32_t kWorkgroupCount = 32;
1497 constexpr uint32_t kWorkgroupSize = 256;
1498
1499 class TestComputeStep : public ComputeStep {
1500 public:
1501 TestComputeStep() : ComputeStep(
1502 /*name=*/"TestAtomicOperations",
1503 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
1504 /*resources=*/{
1505 {
1506 /*type=*/ResourceType::kStorageBuffer,
1507 /*flow=*/DataFlow::kShared,
1508 /*policy=*/ResourcePolicy::kMapped,
1509 /*slot=*/0,
1510 /*sksl=*/"ssbo { atomicUint globalCounter; }",
1511 }
1512 }) {}
1513 ~TestComputeStep() override = default;
1514
1515 // A kernel that increments a global (device memory) counter across multiple workgroups.
1516 // Each workgroup maintains its own independent tally in a workgroup-shared counter which
1517 // is then added to the global count.
1518 //
1519 // This exercises atomic store/load/add and coherent reads and writes over memory in storage
1520 // and workgroup address spaces.
1521 std::string computeSkSL() const override {
1522 return R"(
1523 workgroup atomicUint localCounter;
1524
1525 void main() {
1526 // Initialize the local counter.
1527 if (sk_LocalInvocationID.x == 0) {
1528 atomicStore(localCounter, 0);
1529 }
1530
1531 // Synchronize the threads in the workgroup so they all see the initial value.
1532 workgroupBarrier();
1533
1534 // All threads increment the counter.
1535 atomicAdd(localCounter, 1);
1536
1537 // Synchronize the threads again to ensure they have all executed the increment
1538 // and the following load reads the same value across all threads in the
1539 // workgroup.
1540 workgroupBarrier();
1541
1542 // Add the workgroup-only tally to the global counter.
1543 if (sk_LocalInvocationID.x == 0) {
1544 atomicAdd(globalCounter, atomicLoad(localCounter));
1545 }
1546 }
1547 )";
1548 }
1549
1550 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
1551 SkASSERT(index == 0);
1552 SkASSERT(r.fSlot == 0);
1553 SkASSERT(r.fFlow == DataFlow::kShared);
1554 return sizeof(uint32_t);
1555 }
1556
1558 return WorkgroupSize(kWorkgroupCount, 1, 1);
1559 }
1560
1561 void prepareStorageBuffer(int resourceIndex,
1562 const ResourceDesc& r,
1563 void* buffer,
1564 size_t bufferSize) const override {
1565 SkASSERT(resourceIndex == 0);
1566 *static_cast<uint32_t*>(buffer) = 0;
1567 }
1568 } step;
1569
1570 DispatchGroup::Builder builder(recorder.get());
1571 builder.appendStep(&step);
1572
1573 BindBufferInfo info = builder.getSharedBufferResource(0);
1574 if (!info) {
1575 ERRORF(reporter, "shared resource at slot 0 is missing");
1576 return;
1577 }
1578
1579 // Record the compute pass task.
1581 groups.push_back(builder.finalize());
1582 recorder->priv().add(ComputeTask::Make(std::move(groups)));
1583
1584 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
1585 auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer);
1586
1587 // Submit the work and wait for it to complete.
1588 std::unique_ptr<Recording> recording = recorder->snap();
1589 if (!recording) {
1590 ERRORF(reporter, "Failed to make recording");
1591 return;
1592 }
1593
1594 InsertRecordingInfo insertInfo;
1595 insertInfo.fRecording = recording.get();
1596 context->insertRecording(insertInfo);
1597 testContext->syncedSubmit(context);
1598
1599 // Verify the contents of the output buffer.
1600 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
1601 const uint32_t result = static_cast<const uint32_t*>(
1602 map_buffer(context, testContext, buffer.get(), info.fOffset))[0];
1604 result == kExpectedCount,
1605 "expected '%u', found '%u'",
1606 kExpectedCount,
1607 result);
1608}

◆ DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS() [3/15]

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS ( Compute_ClearedBuffer  ,
reporter  ,
context  ,
testContext   
)

Definition at line 1759 of file ComputeTest.cpp.

1762 {
1763 constexpr uint32_t kProblemSize = 512;
1764
1765 // The ComputeStep packs kProblemSize floats into kProblemSize / 4 vectors and each thread
1766 // processes 1 vector at a time.
1767 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
1768
1769 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1770
1771 // The ComputeStep requests an unmapped buffer that is zero-initialized. It writes the output to
1772 // a mapped buffer which test verifies.
1773 class TestComputeStep : public ComputeStep {
1774 public:
1775 TestComputeStep() : ComputeStep(
1776 /*name=*/"TestClearedBuffer",
1777 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
1778 /*resources=*/{
1779 // Zero initialized input buffer
1780 {
1781 // TODO(b/299979165): Declare this binding as read-only.
1782 /*type=*/ResourceType::kStorageBuffer,
1783 /*flow=*/DataFlow::kPrivate,
1784 /*policy=*/ResourcePolicy::kClear,
1785 /*sksl=*/"inputBlock { uint4 in_data[]; }\n",
1786 },
1787 // Output buffer:
1788 {
1789 /*type=*/ResourceType::kStorageBuffer,
1790 /*flow=*/DataFlow::kShared, // shared to allow us to access it from the
1791 // Builder
1792 /*policy=*/ResourcePolicy::kMapped, // mappable for read-back
1793 /*slot=*/0,
1794 /*sksl=*/"outputBlock { uint4 out_data[]; }\n",
1795 }
1796 }) {}
1797 ~TestComputeStep() override = default;
1798
1799 std::string computeSkSL() const override {
1800 return R"(
1801 void main() {
1802 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x];
1803 }
1804 )";
1805 }
1806
1807 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
1808 return sizeof(uint32_t) * kProblemSize;
1809 }
1810
1811 void prepareStorageBuffer(int resourceIndex,
1812 const ResourceDesc& r,
1813 void* buffer,
1814 size_t bufferSize) const override {
1815 // Should receive this call only for the mapped buffer.
1816 SkASSERT(resourceIndex == 1);
1817 }
1818
1820 return WorkgroupSize(1, 1, 1);
1821 }
1822 } step;
1823
1824 DispatchGroup::Builder builder(recorder.get());
1825 if (!builder.appendStep(&step)) {
1826 ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");
1827 return;
1828 }
1829
1830 // The output buffer should have been placed in the right output slot.
1831 BindBufferInfo outputInfo = builder.getSharedBufferResource(0);
1832 if (!outputInfo) {
1833 ERRORF(reporter, "Failed to allocate an output buffer at slot 0");
1834 return;
1835 }
1836
1837 // Record the compute task
1839 groups.push_back(builder.finalize());
1840 recorder->priv().add(ComputeTask::Make(std::move(groups)));
1841
1842 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
1843 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.fBuffer);
1844
1845 // Submit the work and wait for it to complete.
1846 std::unique_ptr<Recording> recording = recorder->snap();
1847 if (!recording) {
1848 ERRORF(reporter, "Failed to make recording");
1849 return;
1850 }
1851
1852 InsertRecordingInfo insertInfo;
1853 insertInfo.fRecording = recording.get();
1854 context->insertRecording(insertInfo);
1855 testContext->syncedSubmit(context);
1856
1857 // Verify the contents of the output buffer.
1858 uint32_t* outData = static_cast<uint32_t*>(
1859 map_buffer(context, testContext, outputBuffer.get(), outputInfo.fOffset));
1860 SkASSERT(outputBuffer->isMapped() && outData != nullptr);
1861 for (unsigned int i = 0; i < kProblemSize; ++i) {
1862 const uint32_t found = outData[i];
1863 REPORTER_ASSERT(reporter, 0u == found, "expected '0u', found '%u'", found);
1864 }
1865}

◆ DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS() [4/15]

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS ( Compute_ClearOrdering  ,
reporter  ,
context  ,
testContext   
)

Definition at line 1867 of file ComputeTest.cpp.

1870 {
1871 // Initiate two independent DispatchGroups operating on the same buffer. The first group
1872 // writes garbage to the buffer and the second group copies the contents to an output buffer.
1873 // This test validates that the reads, writes, and clear occur in the expected order.
1874 constexpr uint32_t kWorkgroupSize = 64;
1875
1876 // Initialize buffer with non-zero data.
1877 class FillWithGarbage : public ComputeStep {
1878 public:
1879 FillWithGarbage() : ComputeStep(
1880 /*name=*/"FillWithGarbage",
1881 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
1882 /*resources=*/{
1883 {
1884 /*type=*/ResourceType::kStorageBuffer,
1885 /*flow=*/DataFlow::kShared,
1886 /*policy=*/ResourcePolicy::kNone,
1887 /*slot=*/0,
1888 /*sksl=*/"outputBlock { uint4 out_data[]; }\n",
1889 }
1890 }) {}
1891 ~FillWithGarbage() override = default;
1892
1893 std::string computeSkSL() const override {
1894 return R"(
1895 void main() {
1896 out_data[sk_GlobalInvocationID.x] = uint4(0xFE);
1897 }
1898 )";
1899 }
1900 } garbageStep;
1901
1902 // Second stage just copies the data to a destination buffer. This is only to verify that this
1903 // stage, issued in a separate DispatchGroup, observes the clear.
1904 class CopyBuffer : public ComputeStep {
1905 public:
1906 CopyBuffer() : ComputeStep(
1907 /*name=*/"CopyBuffer",
1908 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
1909 /*resources=*/{
1910 {
1911 /*type=*/ResourceType::kStorageBuffer,
1912 /*flow=*/DataFlow::kShared,
1913 /*policy=*/ResourcePolicy::kNone,
1914 /*slot=*/0,
1915 /*sksl=*/"inputBlock { uint4 in_data[]; }\n",
1916 },
1917 {
1918 /*type=*/ResourceType::kStorageBuffer,
1919 /*flow=*/DataFlow::kShared,
1920 /*policy=*/ResourcePolicy::kNone,
1921 /*slot=*/1,
1922 /*sksl=*/"outputBlock { uint4 out_data[]; }\n",
1923 }
1924 }) {}
1925 ~CopyBuffer() override = default;
1926
1927 std::string computeSkSL() const override {
1928 return R"(
1929 void main() {
1930 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x];
1931 }
1932 )";
1933 }
1934 } copyStep;
1935
1936 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1937 DispatchGroup::Builder builder(recorder.get());
1938
1939 constexpr size_t kElementCount = 4 * kWorkgroupSize;
1940 constexpr size_t kBufferSize = sizeof(uint32_t) * kElementCount;
1941 auto input = recorder->priv().drawBufferManager()->getStorage(kBufferSize);
1942 auto [_, output] = recorder->priv().drawBufferManager()->getStoragePointer(kBufferSize);
1943
1945
1946 // First group.
1947 builder.assignSharedBuffer({input, kBufferSize}, 0);
1948 builder.appendStep(&garbageStep, {{1, 1, 1}});
1949 groups.push_back(builder.finalize());
1950
1951 // Second group.
1952 builder.reset();
1953 builder.assignSharedBuffer({input, kBufferSize}, 0, ClearBuffer::kYes);
1954 builder.assignSharedBuffer({output, kBufferSize}, 1);
1955 builder.appendStep(&copyStep, {{1, 1, 1}});
1956 groups.push_back(builder.finalize());
1957
1958 recorder->priv().add(ComputeTask::Make(std::move(groups)));
1959 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
1960 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), output.fBuffer);
1961
1962 // Submit the work and wait for it to complete.
1963 std::unique_ptr<Recording> recording = submit_recording(context, testContext, recorder.get());
1964 if (!recording) {
1965 ERRORF(reporter, "Failed to make recording");
1966 return;
1967 }
1968
1969 // Verify the contents of the output buffer.
1970 uint32_t* outData = static_cast<uint32_t*>(
1971 map_buffer(context, testContext, outputBuffer.get(), output.fOffset));
1972 SkASSERT(outputBuffer->isMapped() && outData != nullptr);
1973 for (unsigned int i = 0; i < kElementCount; ++i) {
1974 const uint32_t found = outData[i];
1975 REPORTER_ASSERT(reporter, 0u == found, "expected '0u', found '%u'", found);
1976 }
1977}

◆ DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS() [5/15]

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS ( Compute_ClearOrderingScratchBuffers  ,
reporter  ,
context  ,
testContext   
)

Definition at line 1979 of file ComputeTest.cpp.

1982 {
1983 // This test is the same as the ClearOrdering test but the two stages write to a recycled
1984 // ScratchBuffer. This is primarily to test ScratchBuffer reuse.
1985 constexpr uint32_t kWorkgroupSize = 64;
1986
1987 // Initialize buffer with non-zero data.
1988 class FillWithGarbage : public ComputeStep {
1989 public:
1990 FillWithGarbage() : ComputeStep(
1991 /*name=*/"FillWithGarbage",
1992 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
1993 /*resources=*/{
1994 {
1995 /*type=*/ResourceType::kStorageBuffer,
1996 /*flow=*/DataFlow::kShared,
1997 /*policy=*/ResourcePolicy::kNone,
1998 /*slot=*/0,
1999 /*sksl=*/"outputBlock { uint4 out_data[]; }\n",
2000 }
2001 }) {}
2002 ~FillWithGarbage() override = default;
2003
2004 std::string computeSkSL() const override {
2005 return R"(
2006 void main() {
2007 out_data[sk_GlobalInvocationID.x] = uint4(0xFE);
2008 }
2009 )";
2010 }
2011 } garbageStep;
2012
2013 // Second stage just copies the data to a destination buffer. This is only to verify that this
2014 // stage (issued in a separate DispatchGroup) sees the changes.
2015 class CopyBuffer : public ComputeStep {
2016 public:
2017 CopyBuffer() : ComputeStep(
2018 /*name=*/"CopyBuffer",
2019 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
2020 /*resources=*/{
2021 {
2022 /*type=*/ResourceType::kStorageBuffer,
2023 /*flow=*/DataFlow::kShared,
2024 /*policy=*/ResourcePolicy::kNone,
2025 /*slot=*/0,
2026 /*sksl=*/"inputBlock { uint4 in_data[]; }\n",
2027 },
2028 {
2029 /*type=*/ResourceType::kStorageBuffer,
2030 /*flow=*/DataFlow::kShared,
2031 /*policy=*/ResourcePolicy::kNone,
2032 /*slot=*/1,
2033 /*sksl=*/"outputBlock { uint4 out_data[]; }\n",
2034 }
2035 }) {}
2036 ~CopyBuffer() override = default;
2037
2038 std::string computeSkSL() const override {
2039 return R"(
2040 void main() {
2041 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x];
2042 }
2043 )";
2044 }
2045 } copyStep;
2046
2047 std::unique_ptr<Recorder> recorder = context->makeRecorder();
2048 DispatchGroup::Builder builder(recorder.get());
2049
2050 constexpr size_t kElementCount = 4 * kWorkgroupSize;
2051 constexpr size_t kBufferSize = sizeof(uint32_t) * kElementCount;
2052 auto [_, output] = recorder->priv().drawBufferManager()->getStoragePointer(kBufferSize);
2053
2055
2056 // First group.
2057 {
2058 auto scratch = recorder->priv().drawBufferManager()->getScratchStorage(kBufferSize);
2059 auto input = scratch.suballocate(kBufferSize);
2060 builder.assignSharedBuffer({input, kBufferSize}, 0);
2061
2062 // `scratch` returns to the scratch buffer pool when it goes out of scope
2063 }
2064 builder.appendStep(&garbageStep, {{1, 1, 1}});
2065 groups.push_back(builder.finalize());
2066
2067 // Second group.
2068 builder.reset();
2069 {
2070 auto scratch = recorder->priv().drawBufferManager()->getScratchStorage(kBufferSize);
2071 auto input = scratch.suballocate(kBufferSize);
2072 builder.assignSharedBuffer({input, kBufferSize}, 0, ClearBuffer::kYes);
2073 }
2074 builder.assignSharedBuffer({output, kBufferSize}, 1);
2075 builder.appendStep(&copyStep, {{1, 1, 1}});
2076 groups.push_back(builder.finalize());
2077
2078 recorder->priv().add(ComputeTask::Make(std::move(groups)));
2079 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
2080 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), output.fBuffer);
2081
2082 // Submit the work and wait for it to complete.
2083 std::unique_ptr<Recording> recording = submit_recording(context, testContext, recorder.get());
2084 if (!recording) {
2085 ERRORF(reporter, "Failed to make recording");
2086 return;
2087 }
2088
2089 // Verify the contents of the output buffer.
2090 uint32_t* outData = static_cast<uint32_t*>(
2091 map_buffer(context, testContext, outputBuffer.get(), output.fOffset));
2092 SkASSERT(outputBuffer->isMapped() && outData != nullptr);
2093 for (unsigned int i = 0; i < kElementCount; ++i) {
2094 const uint32_t found = outData[i];
2095 REPORTER_ASSERT(reporter, 0u == found, "expected '0u', found '%u'", found);
2096 }
2097}

◆ DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS() [6/15]

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS ( Compute_DispatchGroupTest  ,
reporter  ,
context  ,
testContext   
)

Definition at line 245 of file ComputeTest.cpp.

248 {
249 // TODO(b/315834710): This fails on Dawn D3D11
250 if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {
251 return;
252 }
253
254 constexpr uint32_t kProblemSize = 512;
255 constexpr float kFactor1 = 4.f;
256 constexpr float kFactor2 = 3.f;
257
258 // The ComputeStep packs kProblemSize floats into kProblemSize / 4 vectors and each thread
259 // processes 1 vector at a time.
260 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
261
262 std::unique_ptr<Recorder> recorder = context->makeRecorder();
263
264 // Define two steps that perform two multiplication passes over the same input.
265
266 class TestComputeStep1 : public ComputeStep {
267 public:
268 // TODO(skia:40045541): SkSL doesn't support std430 layout well, so the buffers
269 // below all pack their data into vectors to be compatible with SPIR-V/WGSL.
270 TestComputeStep1() : ComputeStep(
271 /*name=*/"TestArrayMultiplyFirstPass",
272 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
273 /*resources=*/{
274 // Input buffer:
275 {
276 // TODO(b/299979165): Declare this binding as read-only.
277 /*type=*/ResourceType::kStorageBuffer,
278 /*flow=*/DataFlow::kPrivate,
279 /*policy=*/ResourcePolicy::kMapped, // mappable for read-back
280 /*sksl=*/"inputBlock {\n"
281 " float factor;\n"
282 " layout(offset=16) float4 in_data[];\n"
283 "}",
284 },
285 // Output buffers:
286 {
287 /*type=*/ResourceType::kStorageBuffer,
288 /*flow=*/DataFlow::kShared,
289 /*policy=*/ResourcePolicy::kNone, // GPU-only, read by second step
290 /*slot=*/0,
291 /*sksl=*/"outputBlock1 { float4 forward_data[]; }",
292 },
293 {
294 /*type=*/ResourceType::kStorageBuffer,
295 /*flow=*/DataFlow::kShared,
296 /*policy=*/ResourcePolicy::kMapped, // mappable for read-back
297 /*slot=*/1,
298 /*sksl=*/"outputBlock2 { float2 extra_data; }",
299 }
300 }) {}
301 ~TestComputeStep1() override = default;
302
303 // A kernel that multiplies a large array of floats by a supplied factor.
304 std::string computeSkSL() const override {
305 return R"(
306 void main() {
307 uint idx = sk_GlobalInvocationID.x;
308 forward_data[idx] = in_data[idx] * factor;
309 if (idx == 0) {
310 extra_data.x = factor;
311 extra_data.y = 2 * factor;
312 }
313 }
314 )";
315 }
316
317 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
318 if (index == 0) {
319 SkASSERT(r.fFlow == DataFlow::kPrivate);
320 return sizeof(float) * (kProblemSize + 4);
321 }
322 if (index == 1) {
323 SkASSERT(r.fFlow == DataFlow::kShared);
324 SkASSERT(r.fSlot == 0);
325 return sizeof(float) * kProblemSize;
326 }
327
328 SkASSERT(index == 2);
329 SkASSERT(r.fSlot == 1);
330 SkASSERT(r.fFlow == DataFlow::kShared);
331 return 2 * sizeof(float);
332 }
333
334 void prepareStorageBuffer(int resourceIndex,
335 const ResourceDesc& r,
336 void* buffer,
337 size_t bufferSize) const override {
338 if (resourceIndex != 0) {
339 return;
340 }
341
342 size_t dataCount = sizeof(float) * (kProblemSize + 4);
343 SkASSERT(bufferSize == dataCount);
344 SkSpan<float> inData(static_cast<float*>(buffer), dataCount);
345 inData[0] = kFactor1;
346 for (unsigned int i = 0; i < kProblemSize; ++i) {
347 inData[i + 4] = i + 1;
348 }
349 }
350
352 return WorkgroupSize(1, 1, 1);
353 }
354 } step1;
355
356 class TestComputeStep2 : public ComputeStep {
357 public:
358 TestComputeStep2() : ComputeStep(
359 /*name=*/"TestArrayMultiplySecondPass",
360 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
361 /*resources=*/{
362 // Input buffer:
363 {
364 /*type=*/ResourceType::kStorageBuffer,
365 /*flow=*/DataFlow::kShared,
366 /*policy=*/ResourcePolicy::kNone, // GPU-only
367 /*slot=*/0, // this is the output from the first step
368 /*sksl=*/"inputBlock { float4 in_data[]; }",
369 },
370 {
371 /*type=*/ResourceType::kStorageBuffer,
372 /*flow=*/DataFlow::kPrivate,
373 /*policy=*/ResourcePolicy::kMapped,
374 /*sksl=*/"factorBlock { float factor; }"
375 },
376 // Output buffer:
377 {
378 /*type=*/ResourceType::kStorageBuffer,
379 /*flow=*/DataFlow::kShared,
380 /*policy=*/ResourcePolicy::kMapped, // mappable for read-back
381 /*slot=*/2,
382 /*sksl=*/"outputBlock { float4 out_data[]; }",
383 }
384 }) {}
385 ~TestComputeStep2() override = default;
386
387 // A kernel that multiplies a large array of floats by a supplied factor.
388 std::string computeSkSL() const override {
389 return R"(
390 void main() {
391 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor;
392 }
393 )";
394 }
395
396 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
397 SkASSERT(index != 0);
398 if (index == 1) {
399 SkASSERT(r.fFlow == DataFlow::kPrivate);
400 return sizeof(float) * 4;
401 }
402 SkASSERT(index == 2);
403 SkASSERT(r.fSlot == 2);
404 SkASSERT(r.fFlow == DataFlow::kShared);
405 return sizeof(float) * kProblemSize;
406 }
407
408 void prepareStorageBuffer(int resourceIndex,
409 const ResourceDesc& r,
410 void* buffer,
411 size_t bufferSize) const override {
412 if (resourceIndex != 1) {
413 return;
414 }
415 SkASSERT(r.fFlow == DataFlow::kPrivate);
416 *static_cast<float*>(buffer) = kFactor2;
417 }
418
420 return WorkgroupSize(1, 1, 1);
421 }
422 } step2;
423
424 DispatchGroup::Builder builder(recorder.get());
425 builder.appendStep(&step1);
426 builder.appendStep(&step2);
427
428 // Slots 0, 1, and 2 should all contain shared buffers. Slot 1 contains the extra output buffer
429 // from step 1 while slot 2 contains the result of the second multiplication pass from step 1.
430 // Slot 0 is not mappable.
432 std::holds_alternative<BufferView>(builder.outputTable().fSharedSlots[0]),
433 "shared resource at slot 0 is missing");
434 BindBufferInfo outputInfo = builder.getSharedBufferResource(2);
435 if (!outputInfo) {
436 ERRORF(reporter, "Failed to allocate an output buffer at slot 0");
437 return;
438 }
439
440 // Extra output buffer from step 1 (corresponding to 'outputBlock2')
441 BindBufferInfo extraOutputInfo = builder.getSharedBufferResource(1);
442 if (!extraOutputInfo) {
443 ERRORF(reporter, "shared resource at slot 1 is missing");
444 return;
445 }
446
447 // Record the compute task
449 groups.push_back(builder.finalize());
450 recorder->priv().add(ComputeTask::Make(std::move(groups)));
451
452 // Ensure the output buffers get synchronized to the CPU once the GPU submission has finished.
453 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.fBuffer);
454 auto extraOutputBuffer = sync_buffer_to_cpu(recorder.get(), extraOutputInfo.fBuffer);
455
456 // Submit the work and wait for it to complete.
457 std::unique_ptr<Recording> recording = recorder->snap();
458 if (!recording) {
459 ERRORF(reporter, "Failed to make recording");
460 return;
461 }
462
463 InsertRecordingInfo insertInfo;
464 insertInfo.fRecording = recording.get();
465 context->insertRecording(insertInfo);
466 testContext->syncedSubmit(context);
467
468 // Verify the contents of the output buffer from step 2
469 float* outData = static_cast<float*>(
470 map_buffer(context, testContext, outputBuffer.get(), outputInfo.fOffset));
471 SkASSERT(outputBuffer->isMapped() && outData != nullptr);
472 for (unsigned int i = 0; i < kProblemSize; ++i) {
473 const float expected = (i + 1) * kFactor1 * kFactor2;
474 const float found = outData[i];
475 REPORTER_ASSERT(reporter, expected == found, "expected '%f', found '%f'", expected, found);
476 }
477
478 // Verify the contents of the extra output buffer from step 1
479 float* extraOutData = static_cast<float*>(
480 map_buffer(context, testContext, extraOutputBuffer.get(), extraOutputInfo.fOffset));
481 SkASSERT(extraOutputBuffer->isMapped() && extraOutData != nullptr);
483 kFactor1 == extraOutData[0],
484 "expected '%f', found '%f'",
485 kFactor1,
486 extraOutData[0]);
488 2 * kFactor1 == extraOutData[1],
489 "expected '%f', found '%f'",
490 2 * kFactor2,
491 extraOutData[1]);
492}

◆ DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS() [7/15]

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS ( Compute_ExternallyAssignedBuffer  ,
reporter  ,
context  ,
testContext   
)

Definition at line 649 of file ComputeTest.cpp.

652 {
653 constexpr uint32_t kProblemSize = 512;
654 constexpr float kFactor = 4.f;
655
656 // The ComputeStep packs kProblemSize floats into kProblemSize / 4 vectors and each thread
657 // processes 1 vector at a time.
658 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
659
660 std::unique_ptr<Recorder> recorder = context->makeRecorder();
661
662 class TestComputeStep : public ComputeStep {
663 public:
664 TestComputeStep() : ComputeStep(
665 /*name=*/"ExternallyAssignedBuffer",
666 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
667 /*resources=*/{
668 // Input buffer:
669 {
670 /*type=*/ResourceType::kStorageBuffer,
671 /*flow=*/DataFlow::kPrivate,
672 /*policy=*/ResourcePolicy::kMapped,
673 /*sksl=*/"inputBlock {\n"
674 " float factor;\n"
675 " layout(offset = 16) float4 in_data[];\n"
676 "}\n",
677 },
678 // Output buffer:
679 {
680 /*type=*/ResourceType::kStorageBuffer,
681 /*flow=*/DataFlow::kShared, // shared to allow us to access it from the
682 // Builder
683 /*policy=*/ResourcePolicy::kMapped, // mappable for read-back
684 /*slot=*/0,
685 /*sksl=*/"outputBlock { float4 out_data[]; }",
686 }
687 }) {}
688 ~TestComputeStep() override = default;
689
690 // A kernel that multiplies a large array of floats by a supplied factor.
691 std::string computeSkSL() const override {
692 return R"(
693 void main() {
694 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor;
695 }
696 )";
697 }
698
699 size_t calculateBufferSize(int resourceIndex, const ResourceDesc& r) const override {
700 SkASSERT(resourceIndex == 0);
701 SkASSERT(r.fFlow == DataFlow::kPrivate);
702 return sizeof(float) * (kProblemSize + 4);
703 }
704
705 void prepareStorageBuffer(int resourceIndex,
706 const ResourceDesc& r,
707 void* buffer,
708 size_t bufferSize) const override {
709 SkASSERT(resourceIndex == 0);
710 SkASSERT(r.fFlow == DataFlow::kPrivate);
711
712 size_t dataCount = sizeof(float) * (kProblemSize + 4);
713 SkASSERT(bufferSize == dataCount);
714 SkSpan<float> inData(static_cast<float*>(buffer), dataCount);
715 inData[0] = kFactor;
716 for (unsigned int i = 0; i < kProblemSize; ++i) {
717 inData[i + 4] = i + 1;
718 }
719 }
720 } step;
721
722 // We allocate a buffer and directly assign it to the DispatchGroup::Builder. The ComputeStep
723 // will not participate in the creation of this buffer.
724 auto [_, outputInfo] =
725 recorder->priv().drawBufferManager()->getStoragePointer(sizeof(float) * kProblemSize);
726 REPORTER_ASSERT(reporter, outputInfo, "Failed to allocate output buffer");
727
728 DispatchGroup::Builder builder(recorder.get());
729 builder.assignSharedBuffer({outputInfo, sizeof(float) * kProblemSize}, 0);
730
731 // Initialize the step with a pre-determined global size
732 if (!builder.appendStep(&step, {WorkgroupSize(1, 1, 1)})) {
733 ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");
734 return;
735 }
736
737 // Record the compute task
739 groups.push_back(builder.finalize());
740 recorder->priv().add(ComputeTask::Make(std::move(groups)));
741
742 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
743 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.fBuffer);
744
745 // Submit the work and wait for it to complete.
746 std::unique_ptr<Recording> recording = recorder->snap();
747 if (!recording) {
748 ERRORF(reporter, "Failed to make recording");
749 return;
750 }
751
752 InsertRecordingInfo insertInfo;
753 insertInfo.fRecording = recording.get();
754 context->insertRecording(insertInfo);
755 testContext->syncedSubmit(context);
756
757 // Verify the contents of the output buffer.
758 float* outData = static_cast<float*>(
759 map_buffer(context, testContext, outputBuffer.get(), outputInfo.fOffset));
760 SkASSERT(outputBuffer->isMapped() && outData != nullptr);
761 for (unsigned int i = 0; i < kProblemSize; ++i) {
762 const float expected = (i + 1) * kFactor;
763 const float found = outData[i];
764 REPORTER_ASSERT(reporter, expected == found, "expected '%f', found '%f'", expected, found);
765 }
766}

◆ DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS() [8/15]

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS ( Compute_IndirectDispatch  ,
reporter  ,
context  ,
testContext   
)

Definition at line 2099 of file ComputeTest.cpp.

2102 {
2103 // This fails on Dawn D3D11, b/315834710
2104 if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {
2105 return;
2106 }
2107
2108 std::unique_ptr<Recorder> recorder = context->makeRecorder();
2109
2110 constexpr uint32_t kWorkgroupCount = 32;
2111 constexpr uint32_t kWorkgroupSize = 64;
2112
2113 // `IndirectStep` populates a buffer with the global workgroup count for `CountStep`.
2114 // `CountStep` is recorded using `DispatchGroup::appendStepIndirect()` and its workgroups get
2115 // dispatched according to the values computed by `IndirectStep` on the GPU.
2116 class IndirectStep : public ComputeStep {
2117 public:
2118 IndirectStep()
2119 : ComputeStep(
2120 /*name=*/"TestIndirectDispatch_IndirectStep",
2121 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
2122 /*resources=*/
2123 {{
2124 /*type=*/ResourceType::kIndirectBuffer,
2125 /*flow=*/DataFlow::kShared,
2126 /*policy=*/ResourcePolicy::kClear,
2127 /*slot=*/0,
2128 // TODO(armansito): Ideally the SSBO would have a single member of
2129 // type `IndirectDispatchArgs` struct type. SkSL modules don't
2130 // support struct declarations so this is currently not possible.
2131 /*sksl=*/"ssbo { uint indirect[]; }",
2132 }}) {}
2133 ~IndirectStep() override = default;
2134
2135 // Kernel that specifies a workgroup size of `kWorkgroupCount` to be used by the indirect
2136 // dispatch.
2137 std::string computeSkSL() const override {
2138 return R"(
2139 // This needs to match `kWorkgroupCount` declared above.
2140 const uint kWorkgroupCount = 32;
2141
2142 void main() {
2143 if (sk_LocalInvocationID.x == 0) {
2144 indirect[0] = kWorkgroupCount;
2145 indirect[1] = 1;
2146 indirect[2] = 1;
2147 }
2148 }
2149 )";
2150 }
2151
2152 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
2153 SkASSERT(index == 0);
2154 SkASSERT(r.fSlot == 0);
2155 SkASSERT(r.fFlow == DataFlow::kShared);
2157 }
2158
2160 return WorkgroupSize(1, 1, 1);
2161 }
2162 } indirectStep;
2163
2164 class CountStep : public ComputeStep {
2165 public:
2166 CountStep()
2167 : ComputeStep(
2168 /*name=*/"TestIndirectDispatch_CountStep",
2169 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
2170 /*resources=*/
2171 {{
2172 /*type=*/ResourceType::kStorageBuffer,
2173 /*flow=*/DataFlow::kShared,
2174 /*policy=*/ResourcePolicy::kMapped,
2175 /*slot=*/1,
2176 /*sksl=*/"ssbo { atomicUint globalCounter; }",
2177 }}) {}
2178 ~CountStep() override = default;
2179
2180 std::string computeSkSL() const override {
2181 return R"(
2182 workgroup atomicUint localCounter;
2183
2184 void main() {
2185 // Initialize the local counter.
2186 if (sk_LocalInvocationID.x == 0) {
2187 atomicStore(localCounter, 0);
2188 }
2189
2190 // Synchronize the threads in the workgroup so they all see the initial value.
2191 workgroupBarrier();
2192
2193 // All threads increment the counter.
2194 atomicAdd(localCounter, 1);
2195
2196 // Synchronize the threads again to ensure they have all executed the increment
2197 // and the following load reads the same value across all threads in the
2198 // workgroup.
2199 workgroupBarrier();
2200
2201 // Add the workgroup-only tally to the global counter.
2202 if (sk_LocalInvocationID.x == 0) {
2203 atomicAdd(globalCounter, atomicLoad(localCounter));
2204 }
2205 }
2206 )";
2207 }
2208
2209 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
2210 SkASSERT(index == 0);
2211 SkASSERT(r.fSlot == 1);
2212 SkASSERT(r.fFlow == DataFlow::kShared);
2213 return sizeof(uint32_t);
2214 }
2215
2216 void prepareStorageBuffer(int resourceIndex,
2217 const ResourceDesc& r,
2218 void* buffer,
2219 size_t bufferSize) const override {
2220 SkASSERT(resourceIndex == 0);
2221 *static_cast<uint32_t*>(buffer) = 0;
2222 }
2223 } countStep;
2224
2225 DispatchGroup::Builder builder(recorder.get());
2226 builder.appendStep(&indirectStep);
2227 BindBufferInfo indirectBufferInfo = builder.getSharedBufferResource(0);
2228 if (!indirectBufferInfo) {
2229 ERRORF(reporter, "Shared resource at slot 0 is missing");
2230 return;
2231 }
2232 builder.appendStepIndirect(&countStep, {indirectBufferInfo, kIndirectDispatchArgumentSize});
2233
2234 BindBufferInfo info = builder.getSharedBufferResource(1);
2235 if (!info) {
2236 ERRORF(reporter, "Shared resource at slot 1 is missing");
2237 return;
2238 }
2239
2240 // Record the compute pass task.
2242 groups.push_back(builder.finalize());
2243 recorder->priv().add(ComputeTask::Make(std::move(groups)));
2244
2245 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
2246 auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer);
2247
2248 // Submit the work and wait for it to complete.
2249 std::unique_ptr<Recording> recording = recorder->snap();
2250 if (!recording) {
2251 ERRORF(reporter, "Failed to make recording");
2252 return;
2253 }
2254
2255 InsertRecordingInfo insertInfo;
2256 insertInfo.fRecording = recording.get();
2257 context->insertRecording(insertInfo);
2258 testContext->syncedSubmit(context);
2259
2260 // Verify the contents of the output buffer.
2261 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
2262 const uint32_t result = static_cast<const uint32_t*>(
2263 map_buffer(context, testContext, buffer.get(), info.fOffset))[0];
2265 result == kExpectedCount,
2266 "expected '%u', found '%u'",
2267 kExpectedCount,
2268 result);
2269}

◆ DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS() [9/15]

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS ( Compute_ReadOnlyStorageBuffer  ,
reporter  ,
context  ,
testContext   
)

Definition at line 1027 of file ComputeTest.cpp.

1030 {
1031 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1032
1033 // For this test we allocate a 16x16 tile which is written to by a single workgroup of the same
1034 // size.
1035 constexpr uint32_t kDim = 16;
1036
1037 class TestComputeStep : public ComputeStep {
1038 public:
1039 TestComputeStep() : ComputeStep(
1040 /*name=*/"TestReadOnlyStorageBuffer",
1041 /*localDispatchSize=*/{kDim, kDim, 1},
1042 /*resources=*/{
1043 {
1044 /*type=*/ResourceType::kReadOnlyStorageBuffer,
1045 /*flow=*/DataFlow::kShared,
1046 /*policy=*/ResourcePolicy::kMapped,
1047 /*slot=*/0,
1048 /*sksl=*/"src { uint in_data[]; }",
1049 },
1050 {
1051 /*type=*/ResourceType::kWriteOnlyStorageTexture,
1052 /*flow=*/DataFlow::kShared,
1053 /*policy=*/ResourcePolicy::kNone,
1054 /*slot=*/1,
1055 /*sksl=*/"dst",
1056 }
1057 }) {}
1058 ~TestComputeStep() override = default;
1059
1060 std::string computeSkSL() const override {
1061 return R"(
1062 void main() {
1063 uint ix = sk_LocalInvocationID.y * 16 + sk_LocalInvocationID.x;
1064 uint value = in_data[ix];
1065 half4 splat = half4(
1066 half(value & 0xFF),
1067 half((value >> 8) & 0xFF),
1068 half((value >> 16) & 0xFF),
1069 half((value >> 24) & 0xFF)
1070 );
1071 textureWrite(dst, sk_LocalInvocationID.xy, splat / 255.0);
1072 }
1073 )";
1074 }
1075
1076 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
1077 SkASSERT(index == 0);
1078 return kDim * kDim * sizeof(uint32_t);
1079 }
1080
1081 void prepareStorageBuffer(int index,
1082 const ResourceDesc&,
1083 void* buffer,
1084 size_t bufferSize) const override {
1085 SkASSERT(index == 0);
1086 SkASSERT(bufferSize == kDim * kDim * sizeof(uint32_t));
1087
1088 uint32_t* inputs = reinterpret_cast<uint32_t*>(buffer);
1089 for (uint32_t y = 0; y < kDim; ++y) {
1090 for (uint32_t x = 0; x < kDim; ++x) {
1091 uint32_t value =
1092 ((x * 256 / kDim) & 0xFF) | ((y * 256 / kDim) & 0xFF) << 8 | 255 << 24;
1093 *(inputs++) = value;
1094 }
1095 }
1096 }
1097
1098 std::tuple<SkISize, SkColorType> calculateTextureParameters(
1099 int index, const ResourceDesc& r) const override {
1100 SkASSERT(index == 1);
1101 return {{kDim, kDim}, kRGBA_8888_SkColorType};
1102 }
1103
1105 return WorkgroupSize(1, 1, 1);
1106 }
1107 } step;
1108
1109 DispatchGroup::Builder builder(recorder.get());
1110 if (!builder.appendStep(&step)) {
1111 ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");
1112 return;
1113 }
1114
1115 sk_sp<TextureProxy> dst = builder.getSharedTextureResource(1);
1116 if (!dst) {
1117 ERRORF(reporter, "shared resource at slot 1 is missing");
1118 return;
1119 }
1120
1121 // Record the compute task
1123 groups.push_back(builder.finalize());
1124 recorder->priv().add(ComputeTask::Make(std::move(groups)));
1125
1126 // Submit the work and wait for it to complete.
1127 std::unique_ptr<Recording> recording = recorder->snap();
1128 if (!recording) {
1129 ERRORF(reporter, "Failed to make recording");
1130 return;
1131 }
1132
1133 InsertRecordingInfo insertInfo;
1134 insertInfo.fRecording = recording.get();
1135 context->insertRecording(insertInfo);
1136 testContext->syncedSubmit(context);
1137
1139 SkImageInfo imgInfo =
1141 bitmap.allocPixels(imgInfo);
1142
1143 SkPixmap pixels;
1144 bool peekPixelsSuccess = bitmap.peekPixels(&pixels);
1145 REPORTER_ASSERT(reporter, peekPixelsSuccess);
1146
1147 bool readPixelsSuccess = context->priv().readPixels(pixels, dst.get(), imgInfo, 0, 0);
1148 REPORTER_ASSERT(reporter, readPixelsSuccess);
1149
1150 for (uint32_t x = 0; x < kDim; ++x) {
1151 for (uint32_t y = 0; y < kDim; ++y) {
1152 SkColor4f expected =
1153 SkColor4f::FromColor(SkColorSetARGB(255, x * 256 / kDim, y * 256 / kDim, 0));
1154 SkColor4f color = pixels.getColor4f(x, y);
1155 bool pass = true;
1156 for (int i = 0; i < 4; i++) {
1157 pass &= color[i] == expected[i];
1158 }
1160 "At position {%u, %u}, "
1161 "expected {%.1f, %.1f, %.1f, %.1f}, "
1162 "found {%.1f, %.1f, %.1f, %.1f}",
1163 x, y,
1164 expected.fR, expected.fG, expected.fB, expected.fA,
1165 color.fR, color.fG, color.fB, color.fA);
1166 }
1167 }
1168}

◆ DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS() [10/15]

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS ( Compute_SampledTexture  ,
reporter  ,
context  ,
testContext   
)

Definition at line 1318 of file ComputeTest.cpp.

1321 {
1322 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1323
1324 // The first ComputeStep initializes a 16x16 texture with a checkerboard pattern of alternating
1325 // red and black pixels. The second ComputeStep downsamples this texture into a 4x4 using
1326 // bilinear filtering at pixel borders, intentionally averaging the values of each 4x4 tile in
1327 // the source texture, and writes the result to the destination texture.
1328 constexpr uint32_t kSrcDim = 16;
1329 constexpr uint32_t kDstDim = 4;
1330
1331 class TestComputeStep1 : public ComputeStep {
1332 public:
1333 TestComputeStep1() : ComputeStep(
1334 /*name=*/"Test_SampledTexture_Init",
1335 /*localDispatchSize=*/{kSrcDim, kSrcDim, 1},
1336 /*resources=*/{
1337 {
1338 /*type=*/ResourceType::kWriteOnlyStorageTexture,
1339 /*flow=*/DataFlow::kShared,
1340 /*policy=*/ResourcePolicy::kNone,
1341 /*slot=*/0,
1342 /*sksl=*/"dst",
1343 }
1344 }) {}
1345 ~TestComputeStep1() override = default;
1346
1347 std::string computeSkSL() const override {
1348 return R"(
1349 void main() {
1350 uint2 c = sk_LocalInvocationID.xy;
1351 uint checkerBoardColor = (c.x + (c.y % 2)) % 2;
1352 textureWrite(dst, c, half4(checkerBoardColor, 0, 0, 1));
1353 }
1354 )";
1355 }
1356
1357 std::tuple<SkISize, SkColorType> calculateTextureParameters(
1358 int index, const ResourceDesc& r) const override {
1359 SkASSERT(index == 0);
1360 return {{kSrcDim, kSrcDim}, kRGBA_8888_SkColorType};
1361 }
1362
1364 return WorkgroupSize(1, 1, 1);
1365 }
1366 } step1;
1367
1368 class TestComputeStep2 : public ComputeStep {
1369 public:
1370 TestComputeStep2() : ComputeStep(
1371 /*name=*/"Test_SampledTexture_Sample",
1372 /*localDispatchSize=*/{kDstDim, kDstDim, 1},
1373 /*resources=*/{
1374 // Declare the storage texture before the sampled texture. This tests that
1375 // binding index assignment works consistently across all backends when a
1376 // sampler-less texture and a texture+sampler pair are intermixed and sampler
1377 // bindings aren't necessarily contiguous when the ranges are distinct.
1378 {
1379 /*type=*/ResourceType::kWriteOnlyStorageTexture,
1380 /*flow=*/DataFlow::kShared,
1381 /*policy=*/ResourcePolicy::kNone,
1382 /*slot=*/1,
1383 /*sksl=*/"dst",
1384 },
1385 {
1386 /*type=*/ResourceType::kSampledTexture,
1387 /*flow=*/DataFlow::kShared,
1388 /*policy=*/ResourcePolicy::kNone,
1389 /*slot=*/0,
1390 /*sksl=*/"src",
1391 }
1392 }) {}
1393 ~TestComputeStep2() override = default;
1394
1395 std::string computeSkSL() const override {
1396 return R"(
1397 void main() {
1398 // Normalize the 4x4 invocation indices and sample the source texture using
1399 // that.
1400 uint2 dstCoord = sk_LocalInvocationID.xy;
1401 const float2 dstSizeInv = float2(0.25, 0.25);
1402 float2 unormCoord = float2(dstCoord) * dstSizeInv;
1403
1404 // Use explicit LOD, as quad derivatives are not available to a compute shader.
1405 half4 color = sampleLod(src, unormCoord, 0);
1406 textureWrite(dst, dstCoord, color);
1407 }
1408 )";
1409 }
1410
1411 std::tuple<SkISize, SkColorType> calculateTextureParameters(
1412 int index, const ResourceDesc& r) const override {
1413 SkASSERT(index == 0 || index == 1);
1414 return {{kDstDim, kDstDim}, kRGBA_8888_SkColorType};
1415 }
1416
1417 SamplerDesc calculateSamplerParameters(int index, const ResourceDesc&) const override {
1418 SkASSERT(index == 1);
1419 // Use the repeat tile mode to sample an infinite checkerboard.
1420 constexpr SkTileMode kTileModes[2] = {SkTileMode::kRepeat, SkTileMode::kRepeat};
1421 return {SkFilterMode::kLinear, kTileModes};
1422 }
1423
1425 return WorkgroupSize(1, 1, 1);
1426 }
1427 } step2;
1428
1429 DispatchGroup::Builder builder(recorder.get());
1430 builder.appendStep(&step1);
1431 builder.appendStep(&step2);
1432
1433 sk_sp<TextureProxy> dst = builder.getSharedTextureResource(1);
1434 if (!dst) {
1435 ERRORF(reporter, "shared resource at slot 1 is missing");
1436 return;
1437 }
1438
1439 // Record the compute task
1441 groups.push_back(builder.finalize());
1442 recorder->priv().add(ComputeTask::Make(std::move(groups)));
1443
1444 // Submit the work and wait for it to complete.
1445 std::unique_ptr<Recording> recording = recorder->snap();
1446 if (!recording) {
1447 ERRORF(reporter, "Failed to make recording");
1448 return;
1449 }
1450
1451 InsertRecordingInfo insertInfo;
1452 insertInfo.fRecording = recording.get();
1453 context->insertRecording(insertInfo);
1454 testContext->syncedSubmit(context);
1455
1457 SkImageInfo imgInfo =
1459 bitmap.allocPixels(imgInfo);
1460
1461 SkPixmap pixels;
1462 bool peekPixelsSuccess = bitmap.peekPixels(&pixels);
1463 REPORTER_ASSERT(reporter, peekPixelsSuccess);
1464
1465 bool readPixelsSuccess = context->priv().readPixels(pixels, dst.get(), imgInfo, 0, 0);
1466 REPORTER_ASSERT(reporter, readPixelsSuccess);
1467
1468 for (uint32_t x = 0; x < kDstDim; ++x) {
1469 for (uint32_t y = 0; y < kDstDim; ++y) {
1470 SkColor4f color = pixels.getColor4f(x, y);
1471 REPORTER_ASSERT(reporter, color.fR > 0.49 && color.fR < 0.51,
1472 "At position {%u, %u}, "
1473 "expected red channel in range [0.49, 0.51], "
1474 "found {%.3f}",
1475 x, y, color.fR);
1476 }
1477 }
1478}

◆ DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS() [11/15]

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS ( Compute_SingleDispatchTest  ,
reporter  ,
context  ,
testContext   
)

Definition at line 111 of file ComputeTest.cpp.

114 {
115 constexpr uint32_t kProblemSize = 512;
116 constexpr float kFactor = 4.f;
117
118 // The ComputeStep packs kProblemSize floats into kProblemSize / 4 vectors and each thread
119 // processes 1 vector at a time.
120 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
121
122 std::unique_ptr<Recorder> recorder = context->makeRecorder();
123
124 class TestComputeStep : public ComputeStep {
125 public:
126 // TODO(skia:40045541): SkSL doesn't support std430 layout well, so the buffers
127 // below all pack their data into vectors to be compatible with SPIR-V/WGSL.
128 TestComputeStep() : ComputeStep(
129 /*name=*/"TestArrayMultiply",
130 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
131 /*resources=*/{
132 // Input buffer:
133 {
134 // TODO(b/299979165): Declare this binding as read-only.
135 /*type=*/ResourceType::kStorageBuffer,
136 /*flow=*/DataFlow::kPrivate,
137 /*policy=*/ResourcePolicy::kMapped,
138 /*sksl=*/"inputBlock {\n"
139 " float factor;\n"
140 " layout(offset=16) float4 in_data[];\n"
141 "}",
142 },
143 // Output buffer:
144 {
145 /*type=*/ResourceType::kStorageBuffer,
146 /*flow=*/DataFlow::kShared, // shared to allow us to access it from the
147 // Builder
148 /*policy=*/ResourcePolicy::kMapped, // mappable for read-back
149 /*slot=*/0,
150 /*sksl=*/"outputBlock { float4 out_data[]; }",
151 }
152 }) {}
153 ~TestComputeStep() override = default;
154
155 // A kernel that multiplies a large array of floats by a supplied factor.
156 std::string computeSkSL() const override {
157 return R"(
158 void main() {
159 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor;
160 }
161 )";
162 }
163
164 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
165 if (index == 0) {
166 SkASSERT(r.fFlow == DataFlow::kPrivate);
167 return sizeof(float) * (kProblemSize + 4);
168 }
169 SkASSERT(index == 1);
170 SkASSERT(r.fSlot == 0);
171 SkASSERT(r.fFlow == DataFlow::kShared);
172 return sizeof(float) * kProblemSize;
173 }
174
175 void prepareStorageBuffer(int resourceIndex,
176 const ResourceDesc& r,
177 void* buffer,
178 size_t bufferSize) const override {
179 // Only initialize the input buffer.
180 if (resourceIndex != 0) {
181 return;
182 }
183 SkASSERT(r.fFlow == DataFlow::kPrivate);
184
185 size_t dataCount = sizeof(float) * (kProblemSize + 4);
186 SkASSERT(bufferSize == dataCount);
187 SkSpan<float> inData(static_cast<float*>(buffer), dataCount);
188 inData[0] = kFactor;
189 for (unsigned int i = 0; i < kProblemSize; ++i) {
190 inData[i + 4] = i + 1;
191 }
192 }
193
195 return WorkgroupSize(1, 1, 1);
196 }
197 } step;
198
199 DispatchGroup::Builder builder(recorder.get());
200 if (!builder.appendStep(&step)) {
201 ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");
202 return;
203 }
204
205 // The output buffer should have been placed in the right output slot.
206 BindBufferInfo outputInfo = builder.getSharedBufferResource(0);
207 if (!outputInfo) {
208 ERRORF(reporter, "Failed to allocate an output buffer at slot 0");
209 return;
210 }
211
212 // Record the compute task
214 groups.push_back(builder.finalize());
215 recorder->priv().add(ComputeTask::Make(std::move(groups)));
216
217 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
218 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.fBuffer);
219
220 // Submit the work and wait for it to complete.
221 std::unique_ptr<Recording> recording = recorder->snap();
222 if (!recording) {
223 ERRORF(reporter, "Failed to make recording");
224 return;
225 }
226
227 InsertRecordingInfo insertInfo;
228 insertInfo.fRecording = recording.get();
229 context->insertRecording(insertInfo);
230 testContext->syncedSubmit(context);
231
232 // Verify the contents of the output buffer.
233 float* outData = static_cast<float*>(
234 map_buffer(context, testContext, outputBuffer.get(), outputInfo.fOffset));
235 SkASSERT(outputBuffer->isMapped() && outData != nullptr);
236 for (unsigned int i = 0; i < kProblemSize; ++i) {
237 const float expected = (i + 1) * kFactor;
238 const float found = outData[i];
239 REPORTER_ASSERT(reporter, expected == found, "expected '%f', found '%f'", expected, found);
240 }
241}

◆ DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS() [12/15]

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS ( Compute_StorageTexture  ,
reporter  ,
context  ,
testContext   
)

Definition at line 770 of file ComputeTest.cpp.

773 {
774 std::unique_ptr<Recorder> recorder = context->makeRecorder();
775
776 // For this test we allocate a 16x16 tile which is written to by a single workgroup of the same
777 // size.
778 constexpr uint32_t kDim = 16;
779
780 class TestComputeStep : public ComputeStep {
781 public:
782 TestComputeStep() : ComputeStep(
783 /*name=*/"TestStorageTexture",
784 /*localDispatchSize=*/{kDim, kDim, 1},
785 /*resources=*/{
786 {
787 /*type=*/ResourceType::kWriteOnlyStorageTexture,
788 /*flow=*/DataFlow::kShared,
789 /*policy=*/ResourcePolicy::kNone,
790 /*slot=*/0,
791 /*sksl=*/"dst",
792 }
793 }) {}
794 ~TestComputeStep() override = default;
795
796 std::string computeSkSL() const override {
797 return R"(
798 void main() {
799 textureWrite(dst, sk_LocalInvocationID.xy, half4(0.0, 1.0, 0.0, 1.0));
800 }
801 )";
802 }
803
804 std::tuple<SkISize, SkColorType> calculateTextureParameters(
805 int index, const ResourceDesc& r) const override {
806 return {{kDim, kDim}, kRGBA_8888_SkColorType};
807 }
808
810 return WorkgroupSize(1, 1, 1);
811 }
812 } step;
813
814 DispatchGroup::Builder builder(recorder.get());
815 if (!builder.appendStep(&step)) {
816 ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");
817 return;
818 }
819
820 sk_sp<TextureProxy> texture = builder.getSharedTextureResource(0);
821 if (!texture) {
822 ERRORF(reporter, "Shared resource at slot 0 is missing");
823 return;
824 }
825
826 // Record the compute task
828 groups.push_back(builder.finalize());
829 recorder->priv().add(ComputeTask::Make(std::move(groups)));
830
831 // Submit the work and wait for it to complete.
832 std::unique_ptr<Recording> recording = recorder->snap();
833 if (!recording) {
834 ERRORF(reporter, "Failed to make recording");
835 return;
836 }
837
838 InsertRecordingInfo insertInfo;
839 insertInfo.fRecording = recording.get();
840 context->insertRecording(insertInfo);
841 testContext->syncedSubmit(context);
842
844 SkImageInfo imgInfo =
846 bitmap.allocPixels(imgInfo);
847
848 SkPixmap pixels;
849 bool peekPixelsSuccess = bitmap.peekPixels(&pixels);
850 REPORTER_ASSERT(reporter, peekPixelsSuccess);
851
852 bool readPixelsSuccess = context->priv().readPixels(pixels, texture.get(), imgInfo, 0, 0);
853 REPORTER_ASSERT(reporter, readPixelsSuccess);
854
855 for (uint32_t x = 0; x < kDim; ++x) {
856 for (uint32_t y = 0; y < kDim; ++y) {
858 SkColor4f color = pixels.getColor4f(x, y);
859 REPORTER_ASSERT(reporter, expected == color,
860 "At position {%u, %u}, "
861 "expected {%.1f, %.1f, %.1f, %.1f}, "
862 "found {%.1f, %.1f, %.1f, %.1f}",
863 x, y,
864 expected.fR, expected.fG, expected.fB, expected.fA,
865 color.fR, color.fG, color.fB, color.fA);
866 }
867 }
868}

◆ DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS() [13/15]

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS ( Compute_StorageTextureMultipleComputeSteps  ,
reporter  ,
context  ,
testContext   
)

Definition at line 1171 of file ComputeTest.cpp.

1174 {
1175 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1176
1177 // For this test we allocate a 16x16 tile which is written to by a single workgroup of the same
1178 // size.
1179 constexpr uint32_t kDim = 16;
1180
1181 // Writes to a texture in slot 0.
1182 class TestComputeStep1 : public ComputeStep {
1183 public:
1184 TestComputeStep1() : ComputeStep(
1185 /*name=*/"TestStorageTexturesFirstPass",
1186 /*localDispatchSize=*/{kDim, kDim, 1},
1187 /*resources=*/{
1188 {
1189 /*type=*/ResourceType::kWriteOnlyStorageTexture,
1190 /*flow=*/DataFlow::kShared,
1191 /*policy=*/ResourcePolicy::kNone,
1192 /*slot=*/0,
1193 /*sksl=*/"dst",
1194 }
1195 }) {}
1196 ~TestComputeStep1() override = default;
1197
1198 std::string computeSkSL() const override {
1199 return R"(
1200 void main() {
1201 textureWrite(dst, sk_LocalInvocationID.xy, half4(0.0, 1.0, 0.0, 1.0));
1202 }
1203 )";
1204 }
1205
1206 std::tuple<SkISize, SkColorType> calculateTextureParameters(
1207 int index, const ResourceDesc& r) const override {
1208 SkASSERT(index == 0);
1209 return {{kDim, kDim}, kRGBA_8888_SkColorType};
1210 }
1211
1213 return WorkgroupSize(1, 1, 1);
1214 }
1215 } step1;
1216
1217 // Reads from the texture in slot 0 and writes it to another texture in slot 1.
1218 class TestComputeStep2 : public ComputeStep {
1219 public:
1220 TestComputeStep2() : ComputeStep(
1221 /*name=*/"TestStorageTexturesSecondPass",
1222 /*localDispatchSize=*/{kDim, kDim, 1},
1223 /*resources=*/{
1224 {
1225 /*type=*/ResourceType::kReadOnlyTexture,
1226 /*flow=*/DataFlow::kShared,
1227 /*policy=*/ResourcePolicy::kNone,
1228 /*slot=*/0,
1229 /*sksl=*/"src",
1230 },
1231 {
1232 /*type=*/ResourceType::kWriteOnlyStorageTexture,
1233 /*flow=*/DataFlow::kShared,
1234 /*policy=*/ResourcePolicy::kNone,
1235 /*slot=*/1,
1236 /*sksl=*/"dst",
1237 }
1238 }) {}
1239 ~TestComputeStep2() override = default;
1240
1241 std::string computeSkSL() const override {
1242 return R"(
1243 void main() {
1244 half4 color = textureRead(src, sk_LocalInvocationID.xy);
1245 textureWrite(dst, sk_LocalInvocationID.xy, color);
1246 }
1247 )";
1248 }
1249
1250 std::tuple<SkISize, SkColorType> calculateTextureParameters(
1251 int index, const ResourceDesc& r) const override {
1252 SkASSERT(index == 1);
1253 return {{kDim, kDim}, kRGBA_8888_SkColorType};
1254 }
1255
1257 return WorkgroupSize(1, 1, 1);
1258 }
1259 } step2;
1260
1261 DispatchGroup::Builder builder(recorder.get());
1262 builder.appendStep(&step1);
1263 builder.appendStep(&step2);
1264
1265 sk_sp<TextureProxy> dst = builder.getSharedTextureResource(1);
1266 if (!dst) {
1267 ERRORF(reporter, "shared resource at slot 1 is missing");
1268 return;
1269 }
1270
1271 // Record the compute task
1273 groups.push_back(builder.finalize());
1274 recorder->priv().add(ComputeTask::Make(std::move(groups)));
1275
1276 // Submit the work and wait for it to complete.
1277 std::unique_ptr<Recording> recording = recorder->snap();
1278 if (!recording) {
1279 ERRORF(reporter, "Failed to make recording");
1280 return;
1281 }
1282
1283 InsertRecordingInfo insertInfo;
1284 insertInfo.fRecording = recording.get();
1285 context->insertRecording(insertInfo);
1286 testContext->syncedSubmit(context);
1287
1289 SkImageInfo imgInfo =
1291 bitmap.allocPixels(imgInfo);
1292
1293 SkPixmap pixels;
1294 bool peekPixelsSuccess = bitmap.peekPixels(&pixels);
1295 REPORTER_ASSERT(reporter, peekPixelsSuccess);
1296
1297 bool readPixelsSuccess = context->priv().readPixels(pixels, dst.get(), imgInfo, 0, 0);
1298 REPORTER_ASSERT(reporter, readPixelsSuccess);
1299
1300 for (uint32_t x = 0; x < kDim; ++x) {
1301 for (uint32_t y = 0; y < kDim; ++y) {
1303 SkColor4f color = pixels.getColor4f(x, y);
1304 REPORTER_ASSERT(reporter, expected == color,
1305 "At position {%u, %u}, "
1306 "expected {%.1f, %.1f, %.1f, %.1f}, "
1307 "found {%.1f, %.1f, %.1f, %.1f}",
1308 x, y,
1309 expected.fR, expected.fG, expected.fB, expected.fA,
1310 color.fR, color.fG, color.fB, color.fA);
1311 }
1312 }
1313}

◆ DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS() [14/15]

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS ( Compute_StorageTextureReadAndWrite  ,
reporter  ,
context  ,
testContext   
)

Definition at line 872 of file ComputeTest.cpp.

875 {
876 std::unique_ptr<Recorder> recorder = context->makeRecorder();
877
878 // For this test we allocate a 16x16 tile which is written to by a single workgroup of the same
879 // size.
880 constexpr uint32_t kDim = 16;
881
882 class TestComputeStep : public ComputeStep {
883 public:
884 TestComputeStep() : ComputeStep(
885 /*name=*/"TestStorageTextureReadAndWrite",
886 /*localDispatchSize=*/{kDim, kDim, 1},
887 /*resources=*/{
888 {
889 /*type=*/ResourceType::kReadOnlyTexture,
890 /*flow=*/DataFlow::kShared,
891 /*policy=*/ResourcePolicy::kNone,
892 /*slot=*/0,
893 /*sksl=*/"src",
894 },
895 {
896 /*type=*/ResourceType::kWriteOnlyStorageTexture,
897 /*flow=*/DataFlow::kShared,
898 /*policy=*/ResourcePolicy::kNone,
899 /*slot=*/1,
900 /*sksl=*/"dst",
901 }
902 }) {}
903 ~TestComputeStep() override = default;
904
905 std::string computeSkSL() const override {
906 return R"(
907 void main() {
908 half4 color = textureRead(src, sk_LocalInvocationID.xy);
909 textureWrite(dst, sk_LocalInvocationID.xy, color);
910 }
911 )";
912 }
913
914 std::tuple<SkISize, SkColorType> calculateTextureParameters(
915 int index, const ResourceDesc& r) const override {
916 SkASSERT(index == 1);
917 return {{kDim, kDim}, kRGBA_8888_SkColorType};
918 }
919
921 return WorkgroupSize(1, 1, 1);
922 }
923 } step;
924
925 // Create and populate an input texture.
926 SkBitmap srcBitmap;
927 SkImageInfo srcInfo =
929 srcBitmap.allocPixels(srcInfo);
930 SkPixmap srcPixels;
931 bool srcPeekPixelsSuccess = srcBitmap.peekPixels(&srcPixels);
932 REPORTER_ASSERT(reporter, srcPeekPixelsSuccess);
933 for (uint32_t x = 0; x < kDim; ++x) {
934 for (uint32_t y = 0; y < kDim; ++y) {
935 *srcPixels.writable_addr32(x, y) =
936 SkColorSetARGB(255, x * 256 / kDim, y * 256 / kDim, 0);
937 }
938 }
939
940 auto texInfo = context->priv().caps()->getDefaultSampledTextureInfo(kRGBA_8888_SkColorType,
941 skgpu::Mipmapped::kNo,
942 skgpu::Protected::kNo,
943 skgpu::Renderable::kNo);
944 sk_sp<TextureProxy> srcProxy = TextureProxy::Make(context->priv().caps(),
945 recorder->priv().resourceProvider(),
946 {kDim, kDim},
947 texInfo,
949 MipLevel mipLevel;
950 mipLevel.fPixels = srcPixels.addr();
951 mipLevel.fRowBytes = srcPixels.rowBytes();
953 srcProxy,
954 srcPixels.info().colorInfo(),
955 srcPixels.info().colorInfo(),
956 {mipLevel},
957 SkIRect::MakeWH(kDim, kDim),
958 std::make_unique<ImageUploadContext>());
959 if (!upload.isValid()) {
960 ERRORF(reporter, "Could not create UploadInstance");
961 return;
962 }
963 recorder->priv().add(UploadTask::Make(std::move(upload)));
964
965 DispatchGroup::Builder builder(recorder.get());
966
967 // Assign the input texture to slot 0. This corresponds to the ComputeStep's "src" texture
968 // binding.
969 builder.assignSharedTexture(std::move(srcProxy), 0);
970
971 if (!builder.appendStep(&step)) {
972 ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");
973 return;
974 }
975
976 sk_sp<TextureProxy> dst = builder.getSharedTextureResource(1);
977 if (!dst) {
978 ERRORF(reporter, "shared resource at slot 1 is missing");
979 return;
980 }
981
982 // Record the compute task
984 groups.push_back(builder.finalize());
985 recorder->priv().add(ComputeTask::Make(std::move(groups)));
986
987 // Submit the work and wait for it to complete.
988 std::unique_ptr<Recording> recording = recorder->snap();
989 if (!recording) {
990 ERRORF(reporter, "Failed to make recording");
991 return;
992 }
993
994 InsertRecordingInfo insertInfo;
995 insertInfo.fRecording = recording.get();
996 context->insertRecording(insertInfo);
997 testContext->syncedSubmit(context);
998
1000 SkImageInfo imgInfo =
1002 bitmap.allocPixels(imgInfo);
1003
1004 SkPixmap pixels;
1005 bool peekPixelsSuccess = bitmap.peekPixels(&pixels);
1006 REPORTER_ASSERT(reporter, peekPixelsSuccess);
1007
1008 bool readPixelsSuccess = context->priv().readPixels(pixels, dst.get(), imgInfo, 0, 0);
1009 REPORTER_ASSERT(reporter, readPixelsSuccess);
1010
1011 for (uint32_t x = 0; x < kDim; ++x) {
1012 for (uint32_t y = 0; y < kDim; ++y) {
1014 SkColorSetARGB(255, x * 256 / kDim, y * 256 / kDim, 0));
1015 SkColor4f color = pixels.getColor4f(x, y);
1016 REPORTER_ASSERT(reporter, expected == color,
1017 "At position {%u, %u}, "
1018 "expected {%.1f, %.1f, %.1f, %.1f}, "
1019 "found {%.1f, %.1f, %.1f, %.1f}",
1020 x, y,
1021 expected.fR, expected.fG, expected.fB, expected.fA,
1022 color.fR, color.fG, color.fB, color.fA);
1023 }
1024 }
1025}

◆ DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS() [15/15]

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS ( Compute_UniformBufferTest  ,
reporter  ,
context  ,
testContext   
)

Definition at line 496 of file ComputeTest.cpp.

499 {
500 // TODO(b/315834710): This fails on Dawn D3D11
501 if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {
502 return;
503 }
504
505 constexpr uint32_t kProblemSize = 512;
506 constexpr float kFactor = 4.f;
507
508 // The ComputeStep packs kProblemSize floats into kProblemSize / 4 vectors and each thread
509 // processes 1 vector at a time.
510 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
511
512 std::unique_ptr<Recorder> recorder = context->makeRecorder();
513
514 class TestComputeStep : public ComputeStep {
515 public:
516 TestComputeStep() : ComputeStep(
517 /*name=*/"TestArrayMultiply",
518 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
519 /*resources=*/{
520 // Uniform buffer:
521 {
522 /*type=*/ResourceType::kUniformBuffer,
523 /*flow=*/DataFlow::kPrivate,
524 /*policy=*/ResourcePolicy::kMapped,
525 /*sksl=*/"uniformBlock { float factor; }"
526 },
527 // Input buffer:
528 {
529 /*type=*/ResourceType::kStorageBuffer,
530 /*flow=*/DataFlow::kPrivate,
531 /*policy=*/ResourcePolicy::kMapped,
532 /*sksl=*/"inputBlock { float4 in_data[]; }",
533 },
534 // Output buffer:
535 {
536 /*type=*/ResourceType::kStorageBuffer,
537 /*flow=*/DataFlow::kShared, // shared to allow us to access it from the
538 // Builder
539 /*policy=*/ResourcePolicy::kMapped, // mappable for read-back
540 /*slot=*/0,
541 /*sksl=*/"outputBlock { float4 out_data[]; }",
542 }
543 }) {}
544 ~TestComputeStep() override = default;
545
546 // A kernel that multiplies a large array of floats by a supplied factor.
547 std::string computeSkSL() const override {
548 return R"(
549 void main() {
550 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor;
551 }
552 )";
553 }
554
555 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
556 if (index == 0) {
557 SkASSERT(r.fFlow == DataFlow::kPrivate);
558 return sizeof(float);
559 }
560 if (index == 1) {
561 SkASSERT(r.fFlow == DataFlow::kPrivate);
562 return sizeof(float) * kProblemSize;
563 }
564 SkASSERT(index == 2);
565 SkASSERT(r.fSlot == 0);
566 SkASSERT(r.fFlow == DataFlow::kShared);
567 return sizeof(float) * kProblemSize;
568 }
569
570 void prepareStorageBuffer(int resourceIndex,
571 const ResourceDesc& r,
572 void* buffer,
573 size_t bufferSize) const override {
574 // Only initialize the input storage buffer.
575 if (resourceIndex != 1) {
576 return;
577 }
578 SkASSERT(r.fFlow == DataFlow::kPrivate);
579 size_t dataCount = sizeof(float) * kProblemSize;
580 SkASSERT(bufferSize == dataCount);
581 SkSpan<float> inData(static_cast<float*>(buffer), dataCount);
582 for (unsigned int i = 0; i < kProblemSize; ++i) {
583 inData[i] = i + 1;
584 }
585 }
586
587 void prepareUniformBuffer(int resourceIndex,
588 const ResourceDesc&,
589 UniformManager* mgr) const override {
590 SkASSERT(resourceIndex == 0);
592 const Uniform uniforms[] = {{"factor", SkSLType::kFloat}};
593 mgr->setExpectedUniforms(uniforms);
594 )
595 mgr->write(kFactor);
596 }
597
599 return WorkgroupSize(1, 1, 1);
600 }
601 } step;
602
603 DispatchGroup::Builder builder(recorder.get());
604 if (!builder.appendStep(&step)) {
605 ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");
606 return;
607 }
608
609 // The output buffer should have been placed in the right output slot.
610 BindBufferInfo outputInfo = builder.getSharedBufferResource(0);
611 if (!outputInfo) {
612 ERRORF(reporter, "Failed to allocate an output buffer at slot 0");
613 return;
614 }
615
616 // Record the compute task
618 groups.push_back(builder.finalize());
619 recorder->priv().add(ComputeTask::Make(std::move(groups)));
620
621 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
622 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.fBuffer);
623
624 // Submit the work and wait for it to complete.
625 std::unique_ptr<Recording> recording = recorder->snap();
626 if (!recording) {
627 ERRORF(reporter, "Failed to make recording");
628 return;
629 }
630
631 InsertRecordingInfo insertInfo;
632 insertInfo.fRecording = recording.get();
633 context->insertRecording(insertInfo);
634 testContext->syncedSubmit(context);
635
636 // Verify the contents of the output buffer.
637 float* outData = static_cast<float*>(
638 map_buffer(context, testContext, outputBuffer.get(), outputInfo.fOffset));
639 SkASSERT(outputBuffer->isMapped() && outData != nullptr);
640 for (unsigned int i = 0; i < kProblemSize; ++i) {
641 const float expected = (i + 1) * kFactor;
642 const float found = outData[i];
643 REPORTER_ASSERT(reporter, expected == found, "expected '%f', found '%f'", expected, found);
644 }
645}

◆ DEF_GRAPHITE_TEST_FOR_DAWN_CONTEXT()

DEF_GRAPHITE_TEST_FOR_DAWN_CONTEXT ( Compute_NativeShaderSourceWGSL  ,
reporter  ,
context  ,
testContext   
)

Definition at line 2524 of file ComputeTest.cpp.

2524 {
2525 // This fails on Dawn D3D11, b/315834710
2526 if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {
2527 return;
2528 }
2529
2530 std::unique_ptr<Recorder> recorder = context->makeRecorder();
2531
2532 constexpr uint32_t kWorkgroupCount = 32;
2533 constexpr uint32_t kWorkgroupSize = 256; // The WebGPU default workgroup size limit is 256
2534
2535 class TestComputeStep : public ComputeStep {
2536 public:
2537 TestComputeStep() : ComputeStep(
2538 /*name=*/"TestAtomicOperationsWGSL",
2539 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
2540 /*resources=*/{
2541 {
2542 /*type=*/ResourceType::kStorageBuffer,
2543 /*flow=*/DataFlow::kShared,
2544 /*policy=*/ResourcePolicy::kMapped,
2545 /*slot=*/0,
2546 }
2547 },
2548 /*workgroupBuffers=*/{},
2549 /*baseFlags=*/Flags::kSupportsNativeShader) {}
2550 ~TestComputeStep() override = default;
2551
2552 NativeShaderSource nativeShaderSource(NativeShaderFormat format) const override {
2553 SkASSERT(format == NativeShaderFormat::kWGSL);
2554 static constexpr std::string_view kSource = R"(
2555 @group(0) @binding(0) var<storage, read_write> globalCounter: atomic<u32>;
2556
2557 var<workgroup> localCounter: atomic<u32>;
2558
2559 @compute @workgroup_size(256)
2560 fn atomicCount(@builtin(local_invocation_id) localId: vec3u) {
2561 // Initialize the local counter.
2562 if localId.x == 0u {
2563 atomicStore(&localCounter, 0u);
2564 }
2565
2566 // Synchronize the threads in the workgroup so they all see the initial value.
2567 workgroupBarrier();
2568
2569 // All threads increment the counter.
2570 atomicAdd(&localCounter, 1u);
2571
2572 // Synchronize the threads again to ensure they have all executed the increment
2573 // and the following load reads the same value across all threads in the
2574 // workgroup.
2575 workgroupBarrier();
2576
2577 // Add the workgroup-only tally to the global counter.
2578 if localId.x == 0u {
2579 let tally = atomicLoad(&localCounter);
2580 atomicAdd(&globalCounter, tally);
2581 }
2582 }
2583 )";
2584 return {kSource, "atomicCount"};
2585 }
2586
2587 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
2588 SkASSERT(index == 0);
2589 SkASSERT(r.fSlot == 0);
2590 SkASSERT(r.fFlow == DataFlow::kShared);
2591 return sizeof(uint32_t);
2592 }
2593
2595 return WorkgroupSize(kWorkgroupCount, 1, 1);
2596 }
2597
2598 void prepareStorageBuffer(int resourceIndex,
2599 const ResourceDesc& r,
2600 void* buffer,
2601 size_t bufferSize) const override {
2602 SkASSERT(resourceIndex == 0);
2603 *static_cast<uint32_t*>(buffer) = 0;
2604 }
2605 } step;
2606
2607 DispatchGroup::Builder builder(recorder.get());
2608 builder.appendStep(&step);
2609
2610 BindBufferInfo info = builder.getSharedBufferResource(0);
2611 if (!info) {
2612 ERRORF(reporter, "shared resource at slot 0 is missing");
2613 return;
2614 }
2615
2616 // Record the compute pass task.
2618 groups.push_back(builder.finalize());
2619 recorder->priv().add(ComputeTask::Make(std::move(groups)));
2620
2621 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
2622 auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer);
2623
2624 // Submit the work and wait for it to complete.
2625 std::unique_ptr<Recording> recording = recorder->snap();
2626 if (!recording) {
2627 ERRORF(reporter, "Failed to make recording");
2628 return;
2629 }
2630
2631 InsertRecordingInfo insertInfo;
2632 insertInfo.fRecording = recording.get();
2633 context->insertRecording(insertInfo);
2634 testContext->syncedSubmit(context);
2635
2636 // Verify the contents of the output buffer.
2637 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
2638 const uint32_t result = static_cast<const uint32_t*>(
2639 map_buffer(context, testContext, buffer.get(), info.fOffset))[0];
2641 result == kExpectedCount,
2642 "expected '%u', found '%u'",
2643 kExpectedCount,
2644 result);
2645}

◆ DEF_GRAPHITE_TEST_FOR_METAL_CONTEXT() [1/2]

DEF_GRAPHITE_TEST_FOR_METAL_CONTEXT ( Compute_NativeShaderSourceMetal  ,
reporter  ,
context  ,
testContext   
)

Definition at line 2271 of file ComputeTest.cpp.

2274 {
2275 std::unique_ptr<Recorder> recorder = context->makeRecorder();
2276
2277 constexpr uint32_t kWorkgroupCount = 32;
2278 constexpr uint32_t kWorkgroupSize = 1024;
2279
2280 class TestComputeStep : public ComputeStep {
2281 public:
2282 TestComputeStep() : ComputeStep(
2283 /*name=*/"TestAtomicOperationsMetal",
2284 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
2285 /*resources=*/{
2286 {
2287 /*type=*/ResourceType::kStorageBuffer,
2288 /*flow=*/DataFlow::kShared,
2289 /*policy=*/ResourcePolicy::kMapped,
2290 /*slot=*/0,
2291 }
2292 },
2293 /*workgroupBuffers=*/{},
2294 /*baseFlags=*/Flags::kSupportsNativeShader) {}
2295 ~TestComputeStep() override = default;
2296
2297 NativeShaderSource nativeShaderSource(NativeShaderFormat format) const override {
2298 SkASSERT(format == NativeShaderFormat::kMSL);
2299 static constexpr std::string_view kSource = R"(
2300 #include <metal_stdlib>
2301
2302 using namespace metal;
2303
2304 kernel void atomicCount(uint3 localId [[thread_position_in_threadgroup]],
2305 device atomic_uint& globalCounter [[buffer(0)]]) {
2306 threadgroup atomic_uint localCounter;
2307
2308 // Initialize the local counter.
2309 if (localId.x == 0u) {
2310 atomic_store_explicit(&localCounter, 0u, memory_order_relaxed);
2311 }
2312
2313 // Synchronize the threads in the workgroup so they all see the initial value.
2314 threadgroup_barrier(mem_flags::mem_threadgroup);
2315
2316 // All threads increment the counter.
2317 atomic_fetch_add_explicit(&localCounter, 1u, memory_order_relaxed);
2318
2319 // Synchronize the threads again to ensure they have all executed the increment
2320 // and the following load reads the same value across all threads in the
2321 // workgroup.
2322 threadgroup_barrier(mem_flags::mem_threadgroup);
2323
2324 // Add the workgroup-only tally to the global counter.
2325 if (localId.x == 0u) {
2326 uint tally = atomic_load_explicit(&localCounter, memory_order_relaxed);
2327 atomic_fetch_add_explicit(&globalCounter, tally, memory_order_relaxed);
2328 }
2329 }
2330 )";
2331 return {kSource, "atomicCount"};
2332 }
2333
2334 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
2335 SkASSERT(index == 0);
2336 SkASSERT(r.fSlot == 0);
2337 SkASSERT(r.fFlow == DataFlow::kShared);
2338 return sizeof(uint32_t);
2339 }
2340
2342 return WorkgroupSize(kWorkgroupCount, 1, 1);
2343 }
2344
2345 void prepareStorageBuffer(int resourceIndex,
2346 const ResourceDesc& r,
2347 void* buffer,
2348 size_t bufferSize) const override {
2349 SkASSERT(resourceIndex == 0);
2350 *static_cast<uint32_t*>(buffer) = 0;
2351 }
2352 } step;
2353
2354 DispatchGroup::Builder builder(recorder.get());
2355 builder.appendStep(&step);
2356
2357 BindBufferInfo info = builder.getSharedBufferResource(0);
2358 if (!info) {
2359 ERRORF(reporter, "shared resource at slot 0 is missing");
2360 return;
2361 }
2362
2363 // Record the compute pass task.
2365 groups.push_back(builder.finalize());
2366 recorder->priv().add(ComputeTask::Make(std::move(groups)));
2367
2368 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
2369 auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer);
2370
2371 // Submit the work and wait for it to complete.
2372 std::unique_ptr<Recording> recording = recorder->snap();
2373 if (!recording) {
2374 ERRORF(reporter, "Failed to make recording");
2375 return;
2376 }
2377
2378 InsertRecordingInfo insertInfo;
2379 insertInfo.fRecording = recording.get();
2380 context->insertRecording(insertInfo);
2381 testContext->syncedSubmit(context);
2382
2383 // Verify the contents of the output buffer.
2384 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
2385 const uint32_t result = static_cast<const uint32_t*>(
2386 map_buffer(context, testContext, buffer.get(), info.fOffset))[0];
2388 result == kExpectedCount,
2389 "expected '%u', found '%u'",
2390 kExpectedCount,
2391 result);
2392}

◆ DEF_GRAPHITE_TEST_FOR_METAL_CONTEXT() [2/2]

DEF_GRAPHITE_TEST_FOR_METAL_CONTEXT ( Compute_WorkgroupBufferDescMetal  ,
reporter  ,
context  ,
testContext   
)

Definition at line 2394 of file ComputeTest.cpp.

2397 {
2398 std::unique_ptr<Recorder> recorder = context->makeRecorder();
2399
2400 constexpr uint32_t kWorkgroupCount = 32;
2401 constexpr uint32_t kWorkgroupSize = 1024;
2402
2403 class TestComputeStep : public ComputeStep {
2404 public:
2405 TestComputeStep() : ComputeStep(
2406 /*name=*/"TestAtomicOperationsMetal",
2407 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
2408 /*resources=*/{
2409 {
2410 /*type=*/ResourceType::kStorageBuffer,
2411 /*flow=*/DataFlow::kShared,
2412 /*policy=*/ResourcePolicy::kMapped,
2413 /*slot=*/0,
2414 }
2415 },
2416 /*workgroupBuffers=*/{
2417 {
2418 /*size=*/sizeof(uint32_t),
2419 /*index=*/0u,
2420 }
2421 },
2422 /*baseFlags=*/Flags::kSupportsNativeShader) {}
2423 ~TestComputeStep() override = default;
2424
2425 // This is the same MSL kernel as in Compute_NativeShaderSourceMetal, except `localCounter`
2426 // is an entry-point parameter instead of a local variable. This forces the workgroup
2427 // binding to be encoded explicitly in the command encoder.
2428 NativeShaderSource nativeShaderSource(NativeShaderFormat format) const override {
2429 SkASSERT(format == NativeShaderFormat::kMSL);
2430 static constexpr std::string_view kSource = R"(
2431 #include <metal_stdlib>
2432
2433 using namespace metal;
2434
2435 kernel void atomicCount(uint3 localId [[thread_position_in_threadgroup]],
2436 device atomic_uint& globalCounter [[buffer(0)]],
2437 threadgroup atomic_uint& localCounter [[threadgroup(0)]]) {
2438 // Initialize the local counter.
2439 if (localId.x == 0u) {
2440 atomic_store_explicit(&localCounter, 0u, memory_order_relaxed);
2441 }
2442
2443 // Synchronize the threads in the workgroup so they all see the initial value.
2444 threadgroup_barrier(mem_flags::mem_threadgroup);
2445
2446 // All threads increment the counter.
2447 atomic_fetch_add_explicit(&localCounter, 1u, memory_order_relaxed);
2448
2449 // Synchronize the threads again to ensure they have all executed the increment
2450 // and the following load reads the same value across all threads in the
2451 // workgroup.
2452 threadgroup_barrier(mem_flags::mem_threadgroup);
2453
2454 // Add the workgroup-only tally to the global counter.
2455 if (localId.x == 0u) {
2456 uint tally = atomic_load_explicit(&localCounter, memory_order_relaxed);
2457 atomic_fetch_add_explicit(&globalCounter, tally, memory_order_relaxed);
2458 }
2459 }
2460 )";
2461 return {kSource, "atomicCount"};
2462 }
2463
2464 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
2465 SkASSERT(index == 0);
2466 SkASSERT(r.fSlot == 0);
2467 SkASSERT(r.fFlow == DataFlow::kShared);
2468 return sizeof(uint32_t);
2469 }
2470
2472 return WorkgroupSize(kWorkgroupCount, 1, 1);
2473 }
2474
2475 void prepareStorageBuffer(int resourceIndex,
2476 const ResourceDesc& r,
2477 void* buffer,
2478 size_t bufferSize) const override {
2479 SkASSERT(resourceIndex == 0);
2480 *static_cast<uint32_t*>(buffer) = 0;
2481 }
2482 } step;
2483
2484 DispatchGroup::Builder builder(recorder.get());
2485 builder.appendStep(&step);
2486
2487 BindBufferInfo info = builder.getSharedBufferResource(0);
2488 if (!info) {
2489 ERRORF(reporter, "shared resource at slot 0 is missing");
2490 return;
2491 }
2492
2493 // Record the compute pass task.
2495 groups.push_back(builder.finalize());
2496 recorder->priv().add(ComputeTask::Make(std::move(groups)));
2497
2498 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
2499 auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer);
2500
2501 // Submit the work and wait for it to complete.
2502 std::unique_ptr<Recording> recording = recorder->snap();
2503 if (!recording) {
2504 ERRORF(reporter, "Failed to make recording");
2505 return;
2506 }
2507
2508 InsertRecordingInfo insertInfo;
2509 insertInfo.fRecording = recording.get();
2510 context->insertRecording(insertInfo);
2511 testContext->syncedSubmit(context);
2512
2513 // Verify the contents of the output buffer.
2514 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
2515 const uint32_t result = static_cast<const uint32_t*>(
2516 map_buffer(context, testContext, buffer.get(), info.fOffset))[0];
2518 result == kExpectedCount,
2519 "expected '%u', found '%u'",
2520 kExpectedCount,
2521 result);
2522}