36void* map_buffer(
Context* context,
43 while (!
buffer->isMapped()) {
47 std::byte* ptr =
static_cast<std::byte*
>(
buffer->map());
54 if (recorder->
priv().
caps()->drawBufferCanBeMappedForReadback()) {
65 BufferType::kXferGpuToCpu,
66 AccessPattern::kHostVisible,
67 "ComputeTest_TransferToCpu");
78std::unique_ptr<Recording> submit_recording(
Context* context,
81 std::unique_ptr<Recording> recording = recorder->
snap();
100#define DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS( \
101 name, reporter, graphite_context, test_context) \
102 DEF_GRAPHITE_TEST_FOR_CONTEXTS(name, \
103 is_dawn_or_metal_context_type, \
107 CtsEnforcement::kNever)
115 constexpr uint32_t kProblemSize = 512;
116 constexpr float kFactor = 4.f;
120 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
122 std::unique_ptr<Recorder> recorder = context->
makeRecorder();
130 {kWorkgroupSize, 1, 1},
135 ResourceType::kStorageBuffer,
140 " layout(offset=16) float4 in_data[];\n"
145 ResourceType::kStorageBuffer,
150 "outputBlock { float4 out_data[]; }",
153 ~TestComputeStep()
override =
default;
159 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor;
166 SkASSERT(r.fFlow == DataFlow::kPrivate);
167 return sizeof(float) * (kProblemSize + 4);
171 SkASSERT(r.fFlow == DataFlow::kShared);
172 return sizeof(float) * kProblemSize;
176 const ResourceDesc& r,
178 size_t bufferSize)
const override {
180 if (resourceIndex != 0) {
183 SkASSERT(r.fFlow == DataFlow::kPrivate);
185 size_t dataCount =
sizeof(float) * (kProblemSize + 4);
189 for (
unsigned int i = 0; i < kProblemSize; ++i) {
190 inData[i + 4] = i + 1;
200 if (!builder.appendStep(&
step)) {
208 ERRORF(
reporter,
"Failed to allocate an output buffer at slot 0");
218 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.
fBuffer);
221 std::unique_ptr<Recording> recording = recorder->snap();
233 float* outData =
static_cast<float*
>(
234 map_buffer(context, testContext, outputBuffer.get(), outputInfo.
fOffset));
235 SkASSERT(outputBuffer->isMapped() && outData !=
nullptr);
236 for (
unsigned int i = 0; i < kProblemSize; ++i) {
237 const float expected = (i + 1) * kFactor;
238 const float found = outData[i];
250 if (testContext->
contextType() == skgpu::ContextType::kDawn_D3D11) {
254 constexpr uint32_t kProblemSize = 512;
255 constexpr float kFactor1 = 4.f;
256 constexpr float kFactor2 = 3.f;
260 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
262 std::unique_ptr<Recorder> recorder = context->
makeRecorder();
271 "TestArrayMultiplyFirstPass",
272 {kWorkgroupSize, 1, 1},
277 ResourceType::kStorageBuffer,
282 " layout(offset=16) float4 in_data[];\n"
287 ResourceType::kStorageBuffer,
291 "outputBlock1 { float4 forward_data[]; }",
294 ResourceType::kStorageBuffer,
298 "outputBlock2 { float2 extra_data; }",
301 ~TestComputeStep1()
override =
default;
307 uint idx = sk_GlobalInvocationID.x;
308 forward_data[idx] = in_data[idx] * factor;
310 extra_data.x = factor;
311 extra_data.y = 2 * factor;
319 SkASSERT(r.fFlow == DataFlow::kPrivate);
320 return sizeof(float) * (kProblemSize + 4);
323 SkASSERT(r.fFlow == DataFlow::kShared);
325 return sizeof(float) * kProblemSize;
330 SkASSERT(r.fFlow == DataFlow::kShared);
331 return 2 *
sizeof(float);
335 const ResourceDesc& r,
337 size_t bufferSize)
const override {
338 if (resourceIndex != 0) {
342 size_t dataCount =
sizeof(float) * (kProblemSize + 4);
345 inData[0] = kFactor1;
346 for (
unsigned int i = 0; i < kProblemSize; ++i) {
347 inData[i + 4] = i + 1;
359 "TestArrayMultiplySecondPass",
360 {kWorkgroupSize, 1, 1},
364 ResourceType::kStorageBuffer,
368 "inputBlock { float4 in_data[]; }",
371 ResourceType::kStorageBuffer,
374 "factorBlock { float factor; }"
378 ResourceType::kStorageBuffer,
382 "outputBlock { float4 out_data[]; }",
385 ~TestComputeStep2()
override =
default;
391 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor;
399 SkASSERT(r.fFlow == DataFlow::kPrivate);
400 return sizeof(float) * 4;
404 SkASSERT(r.fFlow == DataFlow::kShared);
405 return sizeof(float) * kProblemSize;
409 const ResourceDesc& r,
411 size_t bufferSize)
const override {
412 if (resourceIndex != 1) {
415 SkASSERT(r.fFlow == DataFlow::kPrivate);
416 *
static_cast<float*
>(
buffer) = kFactor2;
425 builder.appendStep(&step1);
426 builder.appendStep(&step2);
432 std::holds_alternative<BufferView>(builder.outputTable().fSharedSlots[0]),
433 "shared resource at slot 0 is missing");
436 ERRORF(
reporter,
"Failed to allocate an output buffer at slot 0");
441 BindBufferInfo extraOutputInfo = builder.getSharedBufferResource(1);
442 if (!extraOutputInfo) {
453 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.
fBuffer);
454 auto extraOutputBuffer = sync_buffer_to_cpu(recorder.get(), extraOutputInfo.
fBuffer);
457 std::unique_ptr<Recording> recording = recorder->snap();
469 float* outData =
static_cast<float*
>(
470 map_buffer(context, testContext, outputBuffer.get(), outputInfo.
fOffset));
471 SkASSERT(outputBuffer->isMapped() && outData !=
nullptr);
472 for (
unsigned int i = 0; i < kProblemSize; ++i) {
473 const float expected = (i + 1) * kFactor1 * kFactor2;
474 const float found = outData[i];
479 float* extraOutData =
static_cast<float*
>(
480 map_buffer(context, testContext, extraOutputBuffer.get(), extraOutputInfo.
fOffset));
481 SkASSERT(extraOutputBuffer->isMapped() && extraOutData !=
nullptr);
483 kFactor1 == extraOutData[0],
484 "expected '%f', found '%f'",
488 2 * kFactor1 == extraOutData[1],
489 "expected '%f', found '%f'",
501 if (testContext->
contextType() == skgpu::ContextType::kDawn_D3D11) {
505 constexpr uint32_t kProblemSize = 512;
506 constexpr float kFactor = 4.f;
510 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
512 std::unique_ptr<Recorder> recorder = context->
makeRecorder();
518 {kWorkgroupSize, 1, 1},
522 ResourceType::kUniformBuffer,
525 "uniformBlock { float factor; }"
529 ResourceType::kStorageBuffer,
532 "inputBlock { float4 in_data[]; }",
536 ResourceType::kStorageBuffer,
541 "outputBlock { float4 out_data[]; }",
544 ~TestComputeStep()
override =
default;
550 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor;
557 SkASSERT(r.fFlow == DataFlow::kPrivate);
558 return sizeof(float);
561 SkASSERT(r.fFlow == DataFlow::kPrivate);
562 return sizeof(float) * kProblemSize;
566 SkASSERT(r.fFlow == DataFlow::kShared);
567 return sizeof(float) * kProblemSize;
571 const ResourceDesc& r,
573 size_t bufferSize)
const override {
575 if (resourceIndex != 1) {
578 SkASSERT(r.fFlow == DataFlow::kPrivate);
579 size_t dataCount =
sizeof(float) * kProblemSize;
582 for (
unsigned int i = 0; i < kProblemSize; ++i) {
593 mgr->setExpectedUniforms(uniforms);
604 if (!builder.appendStep(&
step)) {
612 ERRORF(
reporter,
"Failed to allocate an output buffer at slot 0");
622 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.
fBuffer);
625 std::unique_ptr<Recording> recording = recorder->snap();
637 float* outData =
static_cast<float*
>(
638 map_buffer(context, testContext, outputBuffer.get(), outputInfo.
fOffset));
639 SkASSERT(outputBuffer->isMapped() && outData !=
nullptr);
640 for (
unsigned int i = 0; i < kProblemSize; ++i) {
641 const float expected = (i + 1) * kFactor;
642 const float found = outData[i];
653 constexpr uint32_t kProblemSize = 512;
654 constexpr float kFactor = 4.f;
658 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
660 std::unique_ptr<Recorder> recorder = context->
makeRecorder();
665 "ExternallyAssignedBuffer",
666 {kWorkgroupSize, 1, 1},
670 ResourceType::kStorageBuffer,
675 " layout(offset = 16) float4 in_data[];\n"
680 ResourceType::kStorageBuffer,
685 "outputBlock { float4 out_data[]; }",
688 ~TestComputeStep()
override =
default;
694 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor;
701 SkASSERT(r.fFlow == DataFlow::kPrivate);
702 return sizeof(float) * (kProblemSize + 4);
706 const ResourceDesc& r,
708 size_t bufferSize)
const override {
710 SkASSERT(r.fFlow == DataFlow::kPrivate);
712 size_t dataCount =
sizeof(float) * (kProblemSize + 4);
716 for (
unsigned int i = 0; i < kProblemSize; ++i) {
717 inData[i + 4] = i + 1;
724 auto [_, outputInfo] =
725 recorder->priv().drawBufferManager()->getStoragePointer(
sizeof(
float) * kProblemSize);
729 builder.assignSharedBuffer({outputInfo,
sizeof(float) * kProblemSize}, 0);
732 if (!builder.appendStep(&
step, {WorkgroupSize(1, 1, 1)})) {
743 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.fBuffer);
746 std::unique_ptr<Recording> recording = recorder->snap();
758 float* outData =
static_cast<float*
>(
759 map_buffer(context, testContext, outputBuffer.get(), outputInfo.fOffset));
760 SkASSERT(outputBuffer->isMapped() && outData !=
nullptr);
761 for (
unsigned int i = 0; i < kProblemSize; ++i) {
762 const float expected = (i + 1) * kFactor;
763 const float found = outData[i];
774 std::unique_ptr<Recorder> recorder = context->
makeRecorder();
778 constexpr uint32_t kDim = 16;
783 "TestStorageTexture",
787 ResourceType::kWriteOnlyStorageTexture,
794 ~TestComputeStep()
override =
default;
799 textureWrite(dst, sk_LocalInvocationID.xy, half4(0.0, 1.0, 0.0, 1.0));
805 int index,
const ResourceDesc& r)
const override {
815 if (!builder.appendStep(&
step)) {
832 std::unique_ptr<Recording> recording = recorder->snap();
846 bitmap.allocPixels(imgInfo);
849 bool peekPixelsSuccess =
bitmap.peekPixels(&pixels);
852 bool readPixelsSuccess = context->
priv().readPixels(pixels,
texture.get(), imgInfo, 0, 0);
855 for (uint32_t
x = 0;
x < kDim; ++
x) {
856 for (uint32_t
y = 0;
y < kDim; ++
y) {
860 "At position {%u, %u}, "
861 "expected {%.1f, %.1f, %.1f, %.1f}, "
862 "found {%.1f, %.1f, %.1f, %.1f}",
864 expected.fR, expected.fG, expected.fB, expected.fA,
876 std::unique_ptr<Recorder> recorder = context->
makeRecorder();
880 constexpr uint32_t kDim = 16;
885 "TestStorageTextureReadAndWrite",
889 ResourceType::kReadOnlyTexture,
896 ResourceType::kWriteOnlyStorageTexture,
903 ~TestComputeStep()
override =
default;
908 half4 color = textureRead(src, sk_LocalInvocationID.xy);
909 textureWrite(dst, sk_LocalInvocationID.xy, color);
915 int index,
const ResourceDesc& r)
const override {
931 bool srcPeekPixelsSuccess = srcBitmap.
peekPixels(&srcPixels);
933 for (uint32_t
x = 0;
x < kDim; ++
x) {
934 for (uint32_t
y = 0;
y < kDim; ++
y) {
941 skgpu::Mipmapped::kNo,
942 skgpu::Protected::kNo,
943 skgpu::Renderable::kNo);
945 recorder->priv().resourceProvider(),
958 std::make_unique<ImageUploadContext>());
969 builder.assignSharedTexture(std::move(srcProxy), 0);
971 if (!builder.appendStep(&
step)) {
988 std::unique_ptr<Recording> recording = recorder->snap();
1002 bitmap.allocPixels(imgInfo);
1005 bool peekPixelsSuccess =
bitmap.peekPixels(&pixels);
1008 bool readPixelsSuccess = context->
priv().readPixels(pixels, dst.get(), imgInfo, 0, 0);
1011 for (uint32_t
x = 0;
x < kDim; ++
x) {
1012 for (uint32_t
y = 0;
y < kDim; ++
y) {
1017 "At position {%u, %u}, "
1018 "expected {%.1f, %.1f, %.1f, %.1f}, "
1019 "found {%.1f, %.1f, %.1f, %.1f}",
1021 expected.fR, expected.fG, expected.fB, expected.fA,
1031 std::unique_ptr<Recorder> recorder = context->
makeRecorder();
1035 constexpr uint32_t kDim = 16;
1040 "TestReadOnlyStorageBuffer",
1044 ResourceType::kReadOnlyStorageBuffer,
1048 "src { uint in_data[]; }",
1051 ResourceType::kWriteOnlyStorageTexture,
1058 ~TestComputeStep()
override =
default;
1063 uint ix = sk_LocalInvocationID.y * 16 + sk_LocalInvocationID.x;
1064 uint value = in_data[ix];
1065 half4 splat = half4(
1067 half((value >> 8) & 0xFF),
1068 half((value >> 16) & 0xFF),
1069 half((value >> 24) & 0xFF)
1071 textureWrite(dst, sk_LocalInvocationID.xy, splat / 255.0);
1078 return kDim * kDim *
sizeof(uint32_t);
1082 const ResourceDesc&,
1084 size_t bufferSize)
const override {
1086 SkASSERT(bufferSize == kDim * kDim *
sizeof(uint32_t));
1088 uint32_t* inputs =
reinterpret_cast<uint32_t*
>(
buffer);
1089 for (uint32_t
y = 0;
y < kDim; ++
y) {
1090 for (uint32_t
x = 0;
x < kDim; ++
x) {
1092 ((
x * 256 / kDim) & 0xFF) | ((
y * 256 / kDim) & 0xFF) << 8 | 255 << 24;
1093 *(inputs++) =
value;
1099 int index,
const ResourceDesc& r)
const override {
1110 if (!builder.appendStep(&
step)) {
1127 std::unique_ptr<Recording> recording = recorder->snap();
1141 bitmap.allocPixels(imgInfo);
1144 bool peekPixelsSuccess =
bitmap.peekPixels(&pixels);
1147 bool readPixelsSuccess = context->
priv().readPixels(pixels, dst.get(), imgInfo, 0, 0);
1150 for (uint32_t
x = 0;
x < kDim; ++
x) {
1151 for (uint32_t
y = 0;
y < kDim; ++
y) {
1156 for (
int i = 0; i < 4; i++) {
1157 pass &=
color[i] == expected[i];
1160 "At position {%u, %u}, "
1161 "expected {%.1f, %.1f, %.1f, %.1f}, "
1162 "found {%.1f, %.1f, %.1f, %.1f}",
1164 expected.fR, expected.fG, expected.fB, expected.fA,
1175 std::unique_ptr<Recorder> recorder = context->
makeRecorder();
1179 constexpr uint32_t kDim = 16;
1185 "TestStorageTexturesFirstPass",
1189 ResourceType::kWriteOnlyStorageTexture,
1196 ~TestComputeStep1()
override =
default;
1201 textureWrite(dst, sk_LocalInvocationID.xy, half4(0.0, 1.0, 0.0, 1.0));
1207 int index,
const ResourceDesc& r)
const override {
1221 "TestStorageTexturesSecondPass",
1225 ResourceType::kReadOnlyTexture,
1232 ResourceType::kWriteOnlyStorageTexture,
1239 ~TestComputeStep2()
override =
default;
1244 half4 color = textureRead(src, sk_LocalInvocationID.xy);
1245 textureWrite(dst, sk_LocalInvocationID.xy, color);
1251 int index,
const ResourceDesc& r)
const override {
1262 builder.appendStep(&step1);
1263 builder.appendStep(&step2);
1277 std::unique_ptr<Recording> recording = recorder->snap();
1291 bitmap.allocPixels(imgInfo);
1294 bool peekPixelsSuccess =
bitmap.peekPixels(&pixels);
1297 bool readPixelsSuccess = context->
priv().readPixels(pixels, dst.get(), imgInfo, 0, 0);
1300 for (uint32_t
x = 0;
x < kDim; ++
x) {
1301 for (uint32_t
y = 0;
y < kDim; ++
y) {
1305 "At position {%u, %u}, "
1306 "expected {%.1f, %.1f, %.1f, %.1f}, "
1307 "found {%.1f, %.1f, %.1f, %.1f}",
1309 expected.fR, expected.fG, expected.fB, expected.fA,
1322 std::unique_ptr<Recorder> recorder = context->
makeRecorder();
1328 constexpr uint32_t kSrcDim = 16;
1329 constexpr uint32_t kDstDim = 4;
1334 "Test_SampledTexture_Init",
1335 {kSrcDim, kSrcDim, 1},
1338 ResourceType::kWriteOnlyStorageTexture,
1345 ~TestComputeStep1()
override =
default;
1350 uint2 c = sk_LocalInvocationID.xy;
1351 uint checkerBoardColor = (c.x + (c.y % 2)) % 2;
1352 textureWrite(dst, c, half4(checkerBoardColor, 0, 0, 1));
1358 int index,
const ResourceDesc& r)
const override {
1371 "Test_SampledTexture_Sample",
1372 {kDstDim, kDstDim, 1},
1379 ResourceType::kWriteOnlyStorageTexture,
1386 ResourceType::kSampledTexture,
1393 ~TestComputeStep2()
override =
default;
1398 // Normalize the 4x4 invocation indices and sample the source texture using
1400 uint2 dstCoord = sk_LocalInvocationID.xy;
1401 const float2 dstSizeInv = float2(0.25, 0.25);
1402 float2 unormCoord = float2(dstCoord) * dstSizeInv;
1404 // Use explicit LOD, as quad derivatives are not available to a compute shader.
1405 half4 color = sampleLod(src, unormCoord, 0);
1406 textureWrite(dst, dstCoord, color);
1412 int index,
const ResourceDesc& r)
const override {
1413 SkASSERT(index == 0 || index == 1);
1430 builder.appendStep(&step1);
1431 builder.appendStep(&step2);
1445 std::unique_ptr<Recording> recording = recorder->snap();
1459 bitmap.allocPixels(imgInfo);
1462 bool peekPixelsSuccess =
bitmap.peekPixels(&pixels);
1465 bool readPixelsSuccess = context->
priv().readPixels(pixels, dst.get(), imgInfo, 0, 0);
1468 for (uint32_t
x = 0;
x < kDstDim; ++
x) {
1469 for (uint32_t
y = 0;
y < kDstDim; ++
y) {
1472 "At position {%u, %u}, "
1473 "expected red channel in range [0.49, 0.51], "
1490 if (testContext->
contextType() == skgpu::ContextType::kDawn_D3D11) {
1494 std::unique_ptr<Recorder> recorder = context->
makeRecorder();
1496 constexpr uint32_t kWorkgroupCount = 32;
1497 constexpr uint32_t kWorkgroupSize = 256;
1502 "TestAtomicOperations",
1503 {kWorkgroupSize, 1, 1},
1506 ResourceType::kStorageBuffer,
1510 "ssbo { atomicUint globalCounter; }",
1513 ~TestComputeStep()
override =
default;
1523 workgroup atomicUint localCounter;
1526 // Initialize the local counter.
1527 if (sk_LocalInvocationID.x == 0) {
1528 atomicStore(localCounter, 0);
1531 // Synchronize the threads in the workgroup so they all see the initial value.
1534 // All threads increment the counter.
1535 atomicAdd(localCounter, 1);
1537 // Synchronize the threads again to ensure they have all executed the increment
1538 // and the following load reads the same value across all threads in the
1542 // Add the workgroup-only tally to the global counter.
1543 if (sk_LocalInvocationID.x == 0) {
1544 atomicAdd(globalCounter, atomicLoad(localCounter));
1553 SkASSERT(r.fFlow == DataFlow::kShared);
1554 return sizeof(uint32_t);
1562 const ResourceDesc& r,
1564 size_t bufferSize)
const override {
1566 *
static_cast<uint32_t*
>(
buffer) = 0;
1571 builder.appendStep(&
step);
1585 auto buffer = sync_buffer_to_cpu(recorder.get(),
info.fBuffer);
1588 std::unique_ptr<Recording> recording = recorder->snap();
1600 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
1601 const uint32_t
result =
static_cast<const uint32_t*
>(
1602 map_buffer(context, testContext,
buffer.get(),
info.fOffset))[0];
1604 result == kExpectedCount,
1605 "expected '%u', found '%u'",
1620 if (testContext->
contextType() == skgpu::ContextType::kDawn_D3D11) {
1624 std::unique_ptr<Recorder> recorder = context->
makeRecorder();
1626 constexpr uint32_t kWorkgroupCount = 32;
1627 constexpr uint32_t kWorkgroupSize = 256;
1632 "TestAtomicOperationsOverArrayAndStruct",
1633 {kWorkgroupSize, 1, 1},
1636 ResourceType::kStorageBuffer,
1641 " atomicUint globalCountsFirstHalf;\n"
1642 " atomicUint globalCountsSecondHalf;\n"
1646 ~TestComputeStep()
override =
default;
1656 const uint WORKGROUP_SIZE = 256;
1658 workgroup atomicUint localCounts[2];
1661 // Initialize the local counts.
1662 if (sk_LocalInvocationID.x == 0) {
1663 atomicStore(localCounts[0], 0);
1664 atomicStore(localCounts[1], 0);
1667 // Synchronize the threads in the workgroup so they all see the initial value.
1670 // Each thread increments one of the local counters based on its invocation
1672 uint idx = sk_LocalInvocationID.x < (WORKGROUP_SIZE / 2) ? 0 : 1;
1673 atomicAdd(localCounts[idx], 1);
1675 // Synchronize the threads again to ensure they have all executed the increments
1676 // and the following load reads the same value across all threads in the
1680 // Add the workgroup-only tally to the global counter.
1681 if (sk_LocalInvocationID.x == 0) {
1682 atomicAdd(globalCountsFirstHalf, atomicLoad(localCounts[0]));
1683 atomicAdd(globalCountsSecondHalf, atomicLoad(localCounts[1]));
1692 SkASSERT(r.fFlow == DataFlow::kShared);
1693 return 2 *
sizeof(uint32_t);
1701 const ResourceDesc& r,
1703 size_t bufferSize)
const override {
1705 uint32_t* data =
static_cast<uint32_t*
>(
buffer);
1712 builder.appendStep(&
step);
1726 auto buffer = sync_buffer_to_cpu(recorder.get(),
info.fBuffer);
1729 std::unique_ptr<Recording> recording = recorder->snap();
1741 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize / 2;
1743 const uint32_t* ssboData =
static_cast<const uint32_t*
>(
1744 map_buffer(context, testContext,
buffer.get(),
info.fOffset));
1745 const uint32_t firstHalfCount = ssboData[0];
1746 const uint32_t secondHalfCount = ssboData[1];
1748 firstHalfCount == kExpectedCount,
1749 "expected '%u', found '%u'",
1753 secondHalfCount == kExpectedCount,
1754 "expected '%u', found '%u'",
1763 constexpr uint32_t kProblemSize = 512;
1767 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
1769 std::unique_ptr<Recorder> recorder = context->
makeRecorder();
1776 "TestClearedBuffer",
1777 {kWorkgroupSize, 1, 1},
1782 ResourceType::kStorageBuffer,
1785 "inputBlock { uint4 in_data[]; }\n",
1789 ResourceType::kStorageBuffer,
1794 "outputBlock { uint4 out_data[]; }\n",
1797 ~TestComputeStep()
override =
default;
1802 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x];
1808 return sizeof(uint32_t) * kProblemSize;
1812 const ResourceDesc& r,
1814 size_t bufferSize)
const override {
1825 if (!builder.appendStep(&
step)) {
1833 ERRORF(
reporter,
"Failed to allocate an output buffer at slot 0");
1843 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.
fBuffer);
1846 std::unique_ptr<Recording> recording = recorder->snap();
1858 uint32_t* outData =
static_cast<uint32_t*
>(
1859 map_buffer(context, testContext, outputBuffer.get(), outputInfo.
fOffset));
1860 SkASSERT(outputBuffer->isMapped() && outData !=
nullptr);
1861 for (
unsigned int i = 0; i < kProblemSize; ++i) {
1862 const uint32_t found = outData[i];
1874 constexpr uint32_t kWorkgroupSize = 64;
1881 {kWorkgroupSize, 1, 1},
1884 ResourceType::kStorageBuffer,
1888 "outputBlock { uint4 out_data[]; }\n",
1891 ~FillWithGarbage()
override =
default;
1896 out_data[sk_GlobalInvocationID.x] = uint4(0xFE);
1908 {kWorkgroupSize, 1, 1},
1911 ResourceType::kStorageBuffer,
1915 "inputBlock { uint4 in_data[]; }\n",
1918 ResourceType::kStorageBuffer,
1922 "outputBlock { uint4 out_data[]; }\n",
1925 ~CopyBuffer()
override =
default;
1930 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x];
1936 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1939 constexpr size_t kElementCount = 4 * kWorkgroupSize;
1940 constexpr size_t kBufferSize =
sizeof(uint32_t) * kElementCount;
1941 auto input = recorder->priv().drawBufferManager()->getStorage(
kBufferSize);
1942 auto [_, output] = recorder->priv().drawBufferManager()->getStoragePointer(
kBufferSize);
1947 builder.assignSharedBuffer({input,
kBufferSize}, 0);
1948 builder.appendStep(&garbageStep, {{1, 1, 1}});
1953 builder.assignSharedBuffer({input,
kBufferSize}, 0, ClearBuffer::kYes);
1954 builder.assignSharedBuffer({output,
kBufferSize}, 1);
1955 builder.appendStep(©Step, {{1, 1, 1}});
1960 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), output.fBuffer);
1963 std::unique_ptr<Recording> recording = submit_recording(context, testContext, recorder.get());
1970 uint32_t* outData =
static_cast<uint32_t*
>(
1971 map_buffer(context, testContext, outputBuffer.get(), output.fOffset));
1972 SkASSERT(outputBuffer->isMapped() && outData !=
nullptr);
1973 for (
unsigned int i = 0; i < kElementCount; ++i) {
1974 const uint32_t found = outData[i];
1985 constexpr uint32_t kWorkgroupSize = 64;
1992 {kWorkgroupSize, 1, 1},
1995 ResourceType::kStorageBuffer,
1999 "outputBlock { uint4 out_data[]; }\n",
2002 ~FillWithGarbage()
override =
default;
2007 out_data[sk_GlobalInvocationID.x] = uint4(0xFE);
2019 {kWorkgroupSize, 1, 1},
2022 ResourceType::kStorageBuffer,
2026 "inputBlock { uint4 in_data[]; }\n",
2029 ResourceType::kStorageBuffer,
2033 "outputBlock { uint4 out_data[]; }\n",
2036 ~CopyBuffer()
override =
default;
2041 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x];
2047 std::unique_ptr<Recorder> recorder = context->makeRecorder();
2050 constexpr size_t kElementCount = 4 * kWorkgroupSize;
2051 constexpr size_t kBufferSize =
sizeof(uint32_t) * kElementCount;
2052 auto [_, output] = recorder->priv().drawBufferManager()->getStoragePointer(
kBufferSize);
2058 auto scratch = recorder->priv().drawBufferManager()->getScratchStorage(
kBufferSize);
2060 builder.assignSharedBuffer({input,
kBufferSize}, 0);
2064 builder.appendStep(&garbageStep, {{1, 1, 1}});
2070 auto scratch = recorder->priv().drawBufferManager()->getScratchStorage(
kBufferSize);
2072 builder.assignSharedBuffer({input,
kBufferSize}, 0, ClearBuffer::kYes);
2074 builder.assignSharedBuffer({output,
kBufferSize}, 1);
2075 builder.appendStep(©Step, {{1, 1, 1}});
2080 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), output.fBuffer);
2083 std::unique_ptr<Recording> recording = submit_recording(context, testContext, recorder.get());
2090 uint32_t* outData =
static_cast<uint32_t*
>(
2091 map_buffer(context, testContext, outputBuffer.get(), output.fOffset));
2092 SkASSERT(outputBuffer->isMapped() && outData !=
nullptr);
2093 for (
unsigned int i = 0; i < kElementCount; ++i) {
2094 const uint32_t found = outData[i];
2104 if (testContext->
contextType() == skgpu::ContextType::kDawn_D3D11) {
2108 std::unique_ptr<Recorder> recorder = context->
makeRecorder();
2110 constexpr uint32_t kWorkgroupCount = 32;
2111 constexpr uint32_t kWorkgroupSize = 64;
2120 "TestIndirectDispatch_IndirectStep",
2121 {kWorkgroupSize, 1, 1},
2124 ResourceType::kIndirectBuffer,
2131 "ssbo { uint indirect[]; }",
2133 ~IndirectStep()
override =
default;
2139 // This needs to match `kWorkgroupCount` declared above.
2140 const uint kWorkgroupCount = 32;
2143 if (sk_LocalInvocationID.x == 0) {
2144 indirect[0] = kWorkgroupCount;
2155 SkASSERT(r.fFlow == DataFlow::kShared);
2168 "TestIndirectDispatch_CountStep",
2169 {kWorkgroupSize, 1, 1},
2172 ResourceType::kStorageBuffer,
2176 "ssbo { atomicUint globalCounter; }",
2178 ~CountStep()
override =
default;
2182 workgroup atomicUint localCounter;
2185 // Initialize the local counter.
2186 if (sk_LocalInvocationID.x == 0) {
2187 atomicStore(localCounter, 0);
2190 // Synchronize the threads in the workgroup so they all see the initial value.
2193 // All threads increment the counter.
2194 atomicAdd(localCounter, 1);
2196 // Synchronize the threads again to ensure they have all executed the increment
2197 // and the following load reads the same value across all threads in the
2201 // Add the workgroup-only tally to the global counter.
2202 if (sk_LocalInvocationID.x == 0) {
2203 atomicAdd(globalCounter, atomicLoad(localCounter));
2212 SkASSERT(r.fFlow == DataFlow::kShared);
2213 return sizeof(uint32_t);
2217 const ResourceDesc& r,
2219 size_t bufferSize)
const override {
2221 *
static_cast<uint32_t*
>(
buffer) = 0;
2226 builder.appendStep(&indirectStep);
2227 BindBufferInfo indirectBufferInfo = builder.getSharedBufferResource(0);
2228 if (!indirectBufferInfo) {
2246 auto buffer = sync_buffer_to_cpu(recorder.get(),
info.fBuffer);
2249 std::unique_ptr<Recording> recording = recorder->snap();
2261 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
2262 const uint32_t
result =
static_cast<const uint32_t*
>(
2263 map_buffer(context, testContext,
buffer.get(),
info.fOffset))[0];
2265 result == kExpectedCount,
2266 "expected '%u', found '%u'",
2275 std::unique_ptr<Recorder> recorder = context->
makeRecorder();
2277 constexpr uint32_t kWorkgroupCount = 32;
2278 constexpr uint32_t kWorkgroupSize = 1024;
2283 "TestAtomicOperationsMetal",
2284 {kWorkgroupSize, 1, 1},
2287 ResourceType::kStorageBuffer,
2295 ~TestComputeStep()
override =
default;
2299 static constexpr std::string_view kSource = R
"(
2300 #include <metal_stdlib>
2302 using namespace metal;
2304 kernel void atomicCount(uint3 localId [[thread_position_in_threadgroup]],
2305 device atomic_uint& globalCounter [[buffer(0)]]) {
2306 threadgroup atomic_uint localCounter;
2308 // Initialize the local counter.
2309 if (localId.x == 0u) {
2310 atomic_store_explicit(&localCounter, 0u, memory_order_relaxed);
2313 // Synchronize the threads in the workgroup so they all see the initial value.
2314 threadgroup_barrier(mem_flags::mem_threadgroup);
2316 // All threads increment the counter.
2317 atomic_fetch_add_explicit(&localCounter, 1u, memory_order_relaxed);
2319 // Synchronize the threads again to ensure they have all executed the increment
2320 // and the following load reads the same value across all threads in the
2322 threadgroup_barrier(mem_flags::mem_threadgroup);
2324 // Add the workgroup-only tally to the global counter.
2325 if (localId.x == 0u) {
2326 uint tally = atomic_load_explicit(&localCounter, memory_order_relaxed);
2327 atomic_fetch_add_explicit(&globalCounter, tally, memory_order_relaxed);
2331 return {kSource,
"atomicCount"};
2337 SkASSERT(r.fFlow == DataFlow::kShared);
2338 return sizeof(uint32_t);
2346 const ResourceDesc& r,
2348 size_t bufferSize)
const override {
2350 *
static_cast<uint32_t*
>(
buffer) = 0;
2355 builder.appendStep(&
step);
2369 auto buffer = sync_buffer_to_cpu(recorder.get(),
info.fBuffer);
2372 std::unique_ptr<Recording> recording = recorder->snap();
2384 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
2385 const uint32_t
result =
static_cast<const uint32_t*
>(
2386 map_buffer(context, testContext,
buffer.get(),
info.fOffset))[0];
2388 result == kExpectedCount,
2389 "expected '%u', found '%u'",
2398 std::unique_ptr<Recorder> recorder = context->
makeRecorder();
2400 constexpr uint32_t kWorkgroupCount = 32;
2401 constexpr uint32_t kWorkgroupSize = 1024;
2406 "TestAtomicOperationsMetal",
2407 {kWorkgroupSize, 1, 1},
2410 ResourceType::kStorageBuffer,
2423 ~TestComputeStep()
override =
default;
2430 static constexpr std::string_view kSource = R
"(
2431 #include <metal_stdlib>
2433 using namespace metal;
2435 kernel void atomicCount(uint3 localId [[thread_position_in_threadgroup]],
2436 device atomic_uint& globalCounter [[buffer(0)]],
2437 threadgroup atomic_uint& localCounter [[threadgroup(0)]]) {
2438 // Initialize the local counter.
2439 if (localId.x == 0u) {
2440 atomic_store_explicit(&localCounter, 0u, memory_order_relaxed);
2443 // Synchronize the threads in the workgroup so they all see the initial value.
2444 threadgroup_barrier(mem_flags::mem_threadgroup);
2446 // All threads increment the counter.
2447 atomic_fetch_add_explicit(&localCounter, 1u, memory_order_relaxed);
2449 // Synchronize the threads again to ensure they have all executed the increment
2450 // and the following load reads the same value across all threads in the
2452 threadgroup_barrier(mem_flags::mem_threadgroup);
2454 // Add the workgroup-only tally to the global counter.
2455 if (localId.x == 0u) {
2456 uint tally = atomic_load_explicit(&localCounter, memory_order_relaxed);
2457 atomic_fetch_add_explicit(&globalCounter, tally, memory_order_relaxed);
2461 return {kSource,
"atomicCount"};
2467 SkASSERT(r.fFlow == DataFlow::kShared);
2468 return sizeof(uint32_t);
2476 const ResourceDesc& r,
2478 size_t bufferSize)
const override {
2480 *
static_cast<uint32_t*
>(
buffer) = 0;
2485 builder.appendStep(&
step);
2499 auto buffer = sync_buffer_to_cpu(recorder.get(),
info.fBuffer);
2502 std::unique_ptr<Recording> recording = recorder->snap();
2514 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
2515 const uint32_t
result =
static_cast<const uint32_t*
>(
2516 map_buffer(context, testContext,
buffer.get(),
info.fOffset))[0];
2518 result == kExpectedCount,
2519 "expected '%u', found '%u'",
2526 if (testContext->
contextType() == skgpu::ContextType::kDawn_D3D11) {
2530 std::unique_ptr<Recorder> recorder = context->
makeRecorder();
2532 constexpr uint32_t kWorkgroupCount = 32;
2533 constexpr uint32_t kWorkgroupSize = 256;
2538 "TestAtomicOperationsWGSL",
2539 {kWorkgroupSize, 1, 1},
2542 ResourceType::kStorageBuffer,
2550 ~TestComputeStep()
override =
default;
2554 static constexpr std::string_view kSource = R
"(
2555 @group(0) @binding(0) var<storage, read_write> globalCounter: atomic<u32>;
2557 var<workgroup> localCounter: atomic<u32>;
2559 @compute @workgroup_size(256)
2560 fn atomicCount(@builtin(local_invocation_id) localId: vec3u) {
2561 // Initialize the local counter.
2562 if localId.x == 0u {
2563 atomicStore(&localCounter, 0u);
2566 // Synchronize the threads in the workgroup so they all see the initial value.
2569 // All threads increment the counter.
2570 atomicAdd(&localCounter, 1u);
2572 // Synchronize the threads again to ensure they have all executed the increment
2573 // and the following load reads the same value across all threads in the
2577 // Add the workgroup-only tally to the global counter.
2578 if localId.x == 0u {
2579 let tally = atomicLoad(&localCounter);
2580 atomicAdd(&globalCounter, tally);
2584 return {kSource,
"atomicCount"};
2590 SkASSERT(r.fFlow == DataFlow::kShared);
2591 return sizeof(uint32_t);
2599 const ResourceDesc& r,
2601 size_t bufferSize)
const override {
2603 *
static_cast<uint32_t*
>(
buffer) = 0;
2608 builder.appendStep(&
step);
2622 auto buffer = sync_buffer_to_cpu(recorder.get(),
info.fBuffer);
2625 std::unique_ptr<Recording> recording = recorder->snap();
2637 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
2638 const uint32_t
result =
static_cast<const uint32_t*
>(
2639 map_buffer(context, testContext,
buffer.get(),
info.fOffset))[0];
2641 result == kExpectedCount,
2642 "expected '%u', found '%u'",
static int step(int x, SkScalar min, SkScalar max)
#define DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS( name, reporter, graphite_context, test_context)
static void info(const char *fmt,...) SK_PRINTF_LIKE(1
@ kRGBA_8888_SkColorType
pixel with 8 bits for red, green, blue, alpha; in 32-bit word
static constexpr SkColor SkColorSetARGB(U8CPU a, U8CPU r, U8CPU g, U8CPU b)
constexpr SkColor SK_ColorGREEN
sk_sp< T > sk_ref_sp(T *obj)
static const size_t kBufferSize
#define DEF_GRAPHITE_TEST_FOR_DAWN_CONTEXT(name, reporter, graphite_context, test_context)
#define DEF_GRAPHITE_TEST_FOR_METAL_CONTEXT(name, reporter, graphite_context, test_context)
#define REPORTER_ASSERT(r, cond,...)
void allocPixels(const SkImageInfo &info, size_t rowBytes)
bool peekPixels(SkPixmap *pixmap) const
const SkImageInfo & info() const
SkColor4f getColor4f(int x, int y) const
uint32_t * writable_addr32(int x, int y) const
const void * addr() const
bool bufferMapsAreAsync() const
virtual TextureInfo getDefaultSampledTextureInfo(SkColorType, Mipmapped mipmapped, Protected, Renderable) const =0
virtual SamplerDesc calculateSamplerParameters(int resourceIndex, const ResourceDesc &) const
virtual std::string computeSkSL() const
virtual WorkgroupSize calculateGlobalDispatchSize() const
virtual void prepareStorageBuffer(int resourceIndex, const ResourceDesc &resource, void *buffer, size_t bufferSize) const
virtual std::tuple< SkISize, SkColorType > calculateTextureParameters(int resourceIndex, const ResourceDesc &) const
virtual NativeShaderSource nativeShaderSource(NativeShaderFormat) const
virtual void prepareUniformBuffer(int resourceIndex, const ResourceDesc &, UniformManager *) const
virtual size_t calculateBufferSize(int resourceIndex, const ResourceDesc &) const
static sk_sp< ComputeTask > Make(DispatchGroupList dispatchGroups)
const Caps * caps() const
std::unique_ptr< Recorder > makeRecorder(const RecorderOptions &={})
bool insertRecording(const InsertRecordingInfo &)
static sk_sp< CopyBufferToBufferTask > Make(const Buffer *srcBuffer, size_t srcOffset, sk_sp< Buffer > dstBuffer, size_t dstOffset, size_t size)
const Caps * caps() const
ResourceProvider * resourceProvider()
std::unique_ptr< Recording > snap()
sk_sp< Buffer > findOrCreateBuffer(size_t size, BufferType type, AccessPattern, std::string_view label)
static sk_sp< SynchronizeToCpuTask > Make(sk_sp< Buffer >)
static sk_sp< TextureProxy > Make(const Caps *, ResourceProvider *, SkISize dimensions, const TextureInfo &, skgpu::Budgeted)
static UploadInstance Make(Recorder *, sk_sp< TextureProxy > targetProxy, const SkColorInfo &srcColorInfo, const SkColorInfo &dstColorInfo, SkSpan< const MipLevel > levels, const SkIRect &dstRect, std::unique_ptr< ConditionalUploadContext >)
static sk_sp< UploadTask > Make(UploadList *)
void syncedSubmit(skgpu::graphite::Context *)
virtual skgpu::ContextType contextType()=0
static const uint8_t buffer[]
uint32_t uint32_t * format
constexpr size_t kIndirectDispatchArgumentSize
bool IsMetalContextType(skgpu::ContextType type)
bool IsDawnContextType(skgpu::ContextType type)
static constexpr SkIRect MakeWH(int32_t w, int32_t h)
const SkColorInfo & colorInfo() const
static SkImageInfo Make(int width, int height, SkColorType ct, SkAlphaType at)
static SkRGBA4f FromBytes_RGBA(uint32_t color)
static SkRGBA4f FromColor(SkColor color)