36void* map_buffer(
Context* context,
43 while (!
buffer->isMapped()) {
47 std::byte* ptr =
static_cast<std::byte*
>(
buffer->map());
54 if (recorder->
priv().
caps()->drawBufferCanBeMappedForReadback()) {
65 BufferType::kXferGpuToCpu,
66 AccessPattern::kHostVisible,
67 "ComputeTest_TransferToCpu");
78std::unique_ptr<Recording> submit_recording(
Context* context,
81 std::unique_ptr<Recording> recording = recorder->
snap();
100#define DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS( \
101 name, reporter, graphite_context, test_context) \
102 DEF_GRAPHITE_TEST_FOR_CONTEXTS(name, \
103 is_dawn_or_metal_context_type, \
107 CtsEnforcement::kNever)
115 constexpr uint32_t kProblemSize = 512;
116 constexpr float kFactor = 4.f;
120 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
122 std::unique_ptr<Recorder> recorder = context->
makeRecorder();
130 {kWorkgroupSize, 1, 1},
140 " layout(offset=16) float4 in_data[];\n"
150 "outputBlock { float4 out_data[]; }",
153 ~TestComputeStep()
override =
default;
159 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor;
166 SkASSERT(r.fFlow == DataFlow::kPrivate);
167 return sizeof(
float) * (kProblemSize + 4);
171 SkASSERT(r.fFlow == DataFlow::kShared);
172 return sizeof(
float) * kProblemSize;
176 const ResourceDesc& r,
178 size_t bufferSize)
const override {
180 if (resourceIndex != 0) {
183 SkASSERT(r.fFlow == DataFlow::kPrivate);
185 size_t dataCount =
sizeof(
float) * (kProblemSize + 4);
189 for (
unsigned int i = 0;
i < kProblemSize; ++
i) {
190 inData[
i + 4] =
i + 1;
208 ERRORF(
reporter,
"Failed to allocate an output buffer at slot 0");
218 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.
fBuffer);
221 std::unique_ptr<Recording> recording = recorder->snap();
233 float* outData =
static_cast<float*
>(
234 map_buffer(context, testContext, outputBuffer.get(), outputInfo.
fOffset));
235 SkASSERT(outputBuffer->isMapped() && outData !=
nullptr);
236 for (
unsigned int i = 0;
i < kProblemSize; ++
i) {
237 const float expected = (
i + 1) * kFactor;
238 const float found = outData[
i];
254 constexpr uint32_t kProblemSize = 512;
255 constexpr float kFactor1 = 4.f;
256 constexpr float kFactor2 = 3.f;
260 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
262 std::unique_ptr<Recorder> recorder = context->
makeRecorder();
271 "TestArrayMultiplyFirstPass",
272 {kWorkgroupSize, 1, 1},
282 " layout(offset=16) float4 in_data[];\n"
291 "outputBlock1 { float4 forward_data[]; }",
298 "outputBlock2 { float2 extra_data; }",
301 ~TestComputeStep1()
override =
default;
307 uint idx = sk_GlobalInvocationID.x;
308 forward_data[idx] = in_data[idx] * factor;
310 extra_data.x = factor;
311 extra_data.y = 2 * factor;
319 SkASSERT(r.fFlow == DataFlow::kPrivate);
320 return sizeof(
float) * (kProblemSize + 4);
323 SkASSERT(r.fFlow == DataFlow::kShared);
325 return sizeof(
float) * kProblemSize;
330 SkASSERT(r.fFlow == DataFlow::kShared);
331 return 2 *
sizeof(
float);
335 const ResourceDesc& r,
337 size_t bufferSize)
const override {
338 if (resourceIndex != 0) {
342 size_t dataCount =
sizeof(
float) * (kProblemSize + 4);
345 inData[0] = kFactor1;
346 for (
unsigned int i = 0;
i < kProblemSize; ++
i) {
347 inData[
i + 4] =
i + 1;
359 "TestArrayMultiplySecondPass",
360 {kWorkgroupSize, 1, 1},
368 "inputBlock { float4 in_data[]; }",
374 "factorBlock { float factor; }"
382 "outputBlock { float4 out_data[]; }",
385 ~TestComputeStep2()
override =
default;
391 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor;
399 SkASSERT(r.fFlow == DataFlow::kPrivate);
400 return sizeof(
float) * 4;
404 SkASSERT(r.fFlow == DataFlow::kShared);
405 return sizeof(
float) * kProblemSize;
409 const ResourceDesc& r,
411 size_t bufferSize)
const override {
412 if (resourceIndex != 1) {
415 SkASSERT(r.fFlow == DataFlow::kPrivate);
416 *
static_cast<float*
>(
buffer) = kFactor2;
432 std::holds_alternative<BufferView>(
builder.outputTable().fSharedSlots[0]),
433 "shared resource at slot 0 is missing");
436 ERRORF(
reporter,
"Failed to allocate an output buffer at slot 0");
442 if (!extraOutputInfo) {
453 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.
fBuffer);
454 auto extraOutputBuffer = sync_buffer_to_cpu(recorder.get(), extraOutputInfo.
fBuffer);
457 std::unique_ptr<Recording> recording = recorder->snap();
469 float* outData =
static_cast<float*
>(
470 map_buffer(context, testContext, outputBuffer.get(), outputInfo.
fOffset));
471 SkASSERT(outputBuffer->isMapped() && outData !=
nullptr);
472 for (
unsigned int i = 0;
i < kProblemSize; ++
i) {
473 const float expected = (
i + 1) * kFactor1 * kFactor2;
474 const float found = outData[
i];
479 float* extraOutData =
static_cast<float*
>(
480 map_buffer(context, testContext, extraOutputBuffer.get(), extraOutputInfo.
fOffset));
481 SkASSERT(extraOutputBuffer->isMapped() && extraOutData !=
nullptr);
483 kFactor1 == extraOutData[0],
484 "expected '%f', found '%f'",
488 2 * kFactor1 == extraOutData[1],
489 "expected '%f', found '%f'",
505 constexpr uint32_t kProblemSize = 512;
506 constexpr float kFactor = 4.f;
510 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
512 std::unique_ptr<Recorder> recorder = context->
makeRecorder();
518 {kWorkgroupSize, 1, 1},
525 "uniformBlock { float factor; }"
532 "inputBlock { float4 in_data[]; }",
541 "outputBlock { float4 out_data[]; }",
544 ~TestComputeStep()
override =
default;
550 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor;
557 SkASSERT(r.fFlow == DataFlow::kPrivate);
558 return sizeof(
float);
561 SkASSERT(r.fFlow == DataFlow::kPrivate);
562 return sizeof(
float) * kProblemSize;
566 SkASSERT(r.fFlow == DataFlow::kShared);
567 return sizeof(
float) * kProblemSize;
571 const ResourceDesc& r,
573 size_t bufferSize)
const override {
575 if (resourceIndex != 1) {
578 SkASSERT(r.fFlow == DataFlow::kPrivate);
579 size_t dataCount =
sizeof(
float) * kProblemSize;
582 for (
unsigned int i = 0;
i < kProblemSize; ++
i) {
593 mgr->setExpectedUniforms(uniforms);
612 ERRORF(
reporter,
"Failed to allocate an output buffer at slot 0");
622 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.
fBuffer);
625 std::unique_ptr<Recording> recording = recorder->snap();
637 float* outData =
static_cast<float*
>(
638 map_buffer(context, testContext, outputBuffer.get(), outputInfo.
fOffset));
639 SkASSERT(outputBuffer->isMapped() && outData !=
nullptr);
640 for (
unsigned int i = 0;
i < kProblemSize; ++
i) {
641 const float expected = (
i + 1) * kFactor;
642 const float found = outData[
i];
653 constexpr uint32_t kProblemSize = 512;
654 constexpr float kFactor = 4.f;
658 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
660 std::unique_ptr<Recorder> recorder = context->
makeRecorder();
665 "ExternallyAssignedBuffer",
666 {kWorkgroupSize, 1, 1},
675 " layout(offset = 16) float4 in_data[];\n"
685 "outputBlock { float4 out_data[]; }",
688 ~TestComputeStep()
override =
default;
694 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor;
701 SkASSERT(r.fFlow == DataFlow::kPrivate);
702 return sizeof(
float) * (kProblemSize + 4);
706 const ResourceDesc& r,
708 size_t bufferSize)
const override {
710 SkASSERT(r.fFlow == DataFlow::kPrivate);
712 size_t dataCount =
sizeof(
float) * (kProblemSize + 4);
716 for (
unsigned int i = 0;
i < kProblemSize; ++
i) {
717 inData[
i + 4] =
i + 1;
724 auto [_, outputInfo] =
725 recorder->priv().drawBufferManager()->getStoragePointer(
sizeof(
float) * kProblemSize);
729 builder.assignSharedBuffer({outputInfo,
sizeof(
float) * kProblemSize}, 0);
732 if (!
builder.appendStep(&
step, {WorkgroupSize(1, 1, 1)})) {
743 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.fBuffer);
746 std::unique_ptr<Recording> recording = recorder->snap();
758 float* outData =
static_cast<float*
>(
759 map_buffer(context, testContext, outputBuffer.get(), outputInfo.fOffset));
760 SkASSERT(outputBuffer->isMapped() && outData !=
nullptr);
761 for (
unsigned int i = 0;
i < kProblemSize; ++
i) {
762 const float expected = (
i + 1) * kFactor;
763 const float found = outData[
i];
774 std::unique_ptr<Recorder> recorder = context->
makeRecorder();
778 constexpr uint32_t kDim = 16;
783 "TestStorageTexture",
794 ~TestComputeStep()
override =
default;
799 textureWrite(dst, sk_LocalInvocationID.xy, half4(0.0, 1.0, 0.0, 1.0));
805 int index,
const ResourceDesc& r)
const override {
832 std::unique_ptr<Recording> recording = recorder->snap();
846 bitmap.allocPixels(imgInfo);
849 bool peekPixelsSuccess =
bitmap.peekPixels(&pixels);
852 bool readPixelsSuccess = context->
priv().readPixels(pixels,
texture.get(), imgInfo, 0, 0);
855 for (uint32_t
x = 0;
x < kDim; ++
x) {
856 for (uint32_t
y = 0;
y < kDim; ++
y) {
860 "At position {%u, %u}, "
861 "expected {%.1f, %.1f, %.1f, %.1f}, "
862 "found {%.1f, %.1f, %.1f, %.1f}",
864 expected.fR, expected.fG, expected.fB, expected.fA,
876 std::unique_ptr<Recorder> recorder = context->
makeRecorder();
880 constexpr uint32_t kDim = 16;
885 "TestStorageTextureReadAndWrite",
903 ~TestComputeStep()
override =
default;
908 half4 color = textureRead(src, sk_LocalInvocationID.xy);
909 textureWrite(dst, sk_LocalInvocationID.xy, color);
915 int index,
const ResourceDesc& r)
const override {
931 bool srcPeekPixelsSuccess = srcBitmap.
peekPixels(&srcPixels);
933 for (uint32_t
x = 0;
x < kDim; ++
x) {
934 for (uint32_t
y = 0;
y < kDim; ++
y) {
945 recorder->priv().resourceProvider(),
948 "ComputeTestSrcProxy",
959 std::make_unique<ImageUploadContext>());
970 builder.assignSharedTexture(std::move(srcProxy), 0);
989 std::unique_ptr<Recording> recording = recorder->snap();
1003 bitmap.allocPixels(imgInfo);
1006 bool peekPixelsSuccess =
bitmap.peekPixels(&pixels);
1009 bool readPixelsSuccess = context->
priv().readPixels(pixels,
dst.get(), imgInfo, 0, 0);
1012 for (uint32_t
x = 0;
x < kDim; ++
x) {
1013 for (uint32_t
y = 0;
y < kDim; ++
y) {
1014 SkColor4f expected = SkColor4f::FromBytes_RGBA(
1018 "At position {%u, %u}, "
1019 "expected {%.1f, %.1f, %.1f, %.1f}, "
1020 "found {%.1f, %.1f, %.1f, %.1f}",
1022 expected.fR, expected.fG, expected.fB, expected.fA,
1032 std::unique_ptr<Recorder> recorder = context->
makeRecorder();
1036 constexpr uint32_t kDim = 16;
1041 "TestReadOnlyStorageBuffer",
1049 "src { uint in_data[]; }",
1059 ~TestComputeStep()
override =
default;
1064 uint ix = sk_LocalInvocationID.y * 16 + sk_LocalInvocationID.x;
1065 uint value = in_data[ix];
1066 half4 splat = half4(
1068 half((value >> 8) & 0xFF),
1069 half((value >> 16) & 0xFF),
1070 half((value >> 24) & 0xFF)
1072 textureWrite(dst, sk_LocalInvocationID.xy, splat / 255.0);
1079 return kDim * kDim *
sizeof(uint32_t);
1083 const ResourceDesc&,
1085 size_t bufferSize)
const override {
1087 SkASSERT(bufferSize == kDim * kDim *
sizeof(uint32_t));
1089 uint32_t*
inputs =
reinterpret_cast<uint32_t*
>(
buffer);
1090 for (uint32_t
y = 0;
y < kDim; ++
y) {
1091 for (uint32_t
x = 0;
x < kDim; ++
x) {
1093 ((
x * 256 / kDim) & 0xFF) | ((
y * 256 / kDim) & 0xFF) << 8 | 255 << 24;
1100 int index,
const ResourceDesc& r)
const override {
1128 std::unique_ptr<Recording> recording = recorder->snap();
1142 bitmap.allocPixels(imgInfo);
1145 bool peekPixelsSuccess =
bitmap.peekPixels(&pixels);
1148 bool readPixelsSuccess = context->
priv().readPixels(pixels,
dst.get(), imgInfo, 0, 0);
1151 for (uint32_t
x = 0;
x < kDim; ++
x) {
1152 for (uint32_t
y = 0;
y < kDim; ++
y) {
1154 SkColor4f::FromColor(
SkColorSetARGB(255,
x * 256 / kDim,
y * 256 / kDim, 0));
1157 for (
int i = 0;
i < 4;
i++) {
1158 pass &=
color[
i] == expected[
i];
1161 "At position {%u, %u}, "
1162 "expected {%.1f, %.1f, %.1f, %.1f}, "
1163 "found {%.1f, %.1f, %.1f, %.1f}",
1165 expected.fR, expected.fG, expected.fB, expected.fA,
1176 std::unique_ptr<Recorder> recorder = context->
makeRecorder();
1180 constexpr uint32_t kDim = 16;
1186 "TestStorageTexturesFirstPass",
1197 ~TestComputeStep1()
override =
default;
1202 textureWrite(dst, sk_LocalInvocationID.xy, half4(0.0, 1.0, 0.0, 1.0));
1208 int index,
const ResourceDesc& r)
const override {
1222 "TestStorageTexturesSecondPass",
1240 ~TestComputeStep2()
override =
default;
1245 half4 color = textureRead(src, sk_LocalInvocationID.xy);
1246 textureWrite(dst, sk_LocalInvocationID.xy, color);
1252 int index,
const ResourceDesc& r)
const override {
1278 std::unique_ptr<Recording> recording = recorder->snap();
1292 bitmap.allocPixels(imgInfo);
1295 bool peekPixelsSuccess =
bitmap.peekPixels(&pixels);
1298 bool readPixelsSuccess = context->
priv().readPixels(pixels,
dst.get(), imgInfo, 0, 0);
1301 for (uint32_t
x = 0;
x < kDim; ++
x) {
1302 for (uint32_t
y = 0;
y < kDim; ++
y) {
1306 "At position {%u, %u}, "
1307 "expected {%.1f, %.1f, %.1f, %.1f}, "
1308 "found {%.1f, %.1f, %.1f, %.1f}",
1310 expected.fR, expected.fG, expected.fB, expected.fA,
1323 std::unique_ptr<Recorder> recorder = context->
makeRecorder();
1329 constexpr uint32_t kSrcDim = 16;
1330 constexpr uint32_t kDstDim = 4;
1335 "Test_SampledTexture_Init",
1336 {kSrcDim, kSrcDim, 1},
1346 ~TestComputeStep1()
override =
default;
1351 uint2 c = sk_LocalInvocationID.xy;
1352 uint checkerBoardColor = (c.x + (c.y % 2)) % 2;
1353 textureWrite(dst, c, half4(checkerBoardColor, 0, 0, 1));
1359 int index,
const ResourceDesc& r)
const override {
1372 "Test_SampledTexture_Sample",
1373 {kDstDim, kDstDim, 1},
1394 ~TestComputeStep2()
override =
default;
1399 // Normalize the 4x4 invocation indices and sample the source texture using
1401 uint2 dstCoord = sk_LocalInvocationID.xy;
1402 const float2 dstSizeInv = float2(0.25, 0.25);
1403 float2 unormCoord = float2(dstCoord) * dstSizeInv;
1405 // Use explicit LOD, as quad derivatives are not available to a compute shader.
1406 half4 color = sampleLod(src, unormCoord, 0);
1407 textureWrite(dst, dstCoord, color);
1413 int index,
const ResourceDesc& r)
const override {
1414 SkASSERT(index == 0 || index == 1);
1446 std::unique_ptr<Recording> recording = recorder->snap();
1460 bitmap.allocPixels(imgInfo);
1463 bool peekPixelsSuccess =
bitmap.peekPixels(&pixels);
1466 bool readPixelsSuccess = context->
priv().readPixels(pixels,
dst.get(), imgInfo, 0, 0);
1469 for (uint32_t
x = 0;
x < kDstDim; ++
x) {
1470 for (uint32_t
y = 0;
y < kDstDim; ++
y) {
1473 "At position {%u, %u}, "
1474 "expected red channel in range [0.49, 0.51], "
1495 std::unique_ptr<Recorder> recorder = context->
makeRecorder();
1497 constexpr uint32_t kWorkgroupCount = 32;
1498 constexpr uint32_t kWorkgroupSize = 256;
1503 "TestAtomicOperations",
1504 {kWorkgroupSize, 1, 1},
1511 "ssbo { atomicUint globalCounter; }",
1514 ~TestComputeStep()
override =
default;
1524 workgroup atomicUint localCounter;
1527 // Initialize the local counter.
1528 if (sk_LocalInvocationID.x == 0) {
1529 atomicStore(localCounter, 0);
1532 // Synchronize the threads in the workgroup so they all see the initial value.
1535 // All threads increment the counter.
1536 atomicAdd(localCounter, 1);
1538 // Synchronize the threads again to ensure they have all executed the increment
1539 // and the following load reads the same value across all threads in the
1543 // Add the workgroup-only tally to the global counter.
1544 if (sk_LocalInvocationID.x == 0) {
1545 atomicAdd(globalCounter, atomicLoad(localCounter));
1554 SkASSERT(r.fFlow == DataFlow::kShared);
1555 return sizeof(uint32_t);
1563 const ResourceDesc& r,
1565 size_t bufferSize)
const override {
1567 *
static_cast<uint32_t*
>(
buffer) = 0;
1586 auto buffer = sync_buffer_to_cpu(recorder.get(),
info.fBuffer);
1589 std::unique_ptr<Recording> recording = recorder->snap();
1601 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
1602 const uint32_t
result =
static_cast<const uint32_t*
>(
1603 map_buffer(context, testContext,
buffer.get(),
info.fOffset))[0];
1605 result == kExpectedCount,
1606 "expected '%u', found '%u'",
1625 std::unique_ptr<Recorder> recorder = context->
makeRecorder();
1627 constexpr uint32_t kWorkgroupCount = 32;
1628 constexpr uint32_t kWorkgroupSize = 256;
1633 "TestAtomicOperationsOverArrayAndStruct",
1634 {kWorkgroupSize, 1, 1},
1642 " atomicUint globalCountsFirstHalf;\n"
1643 " atomicUint globalCountsSecondHalf;\n"
1647 ~TestComputeStep()
override =
default;
1657 const uint WORKGROUP_SIZE = 256;
1659 workgroup atomicUint localCounts[2];
1662 // Initialize the local counts.
1663 if (sk_LocalInvocationID.x == 0) {
1664 atomicStore(localCounts[0], 0);
1665 atomicStore(localCounts[1], 0);
1668 // Synchronize the threads in the workgroup so they all see the initial value.
1671 // Each thread increments one of the local counters based on its invocation
1673 uint idx = sk_LocalInvocationID.x < (WORKGROUP_SIZE / 2) ? 0 : 1;
1674 atomicAdd(localCounts[idx], 1);
1676 // Synchronize the threads again to ensure they have all executed the increments
1677 // and the following load reads the same value across all threads in the
1681 // Add the workgroup-only tally to the global counter.
1682 if (sk_LocalInvocationID.x == 0) {
1683 atomicAdd(globalCountsFirstHalf, atomicLoad(localCounts[0]));
1684 atomicAdd(globalCountsSecondHalf, atomicLoad(localCounts[1]));
1693 SkASSERT(r.fFlow == DataFlow::kShared);
1694 return 2 *
sizeof(uint32_t);
1702 const ResourceDesc& r,
1704 size_t bufferSize)
const override {
1706 uint32_t*
data =
static_cast<uint32_t*
>(
buffer);
1727 auto buffer = sync_buffer_to_cpu(recorder.get(),
info.fBuffer);
1730 std::unique_ptr<Recording> recording = recorder->snap();
1742 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize / 2;
1744 const uint32_t* ssboData =
static_cast<const uint32_t*
>(
1745 map_buffer(context, testContext,
buffer.get(),
info.fOffset));
1746 const uint32_t firstHalfCount = ssboData[0];
1747 const uint32_t secondHalfCount = ssboData[1];
1749 firstHalfCount == kExpectedCount,
1750 "expected '%u', found '%u'",
1754 secondHalfCount == kExpectedCount,
1755 "expected '%u', found '%u'",
1764 constexpr uint32_t kProblemSize = 512;
1768 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
1770 std::unique_ptr<Recorder> recorder = context->
makeRecorder();
1777 "TestClearedBuffer",
1778 {kWorkgroupSize, 1, 1},
1786 "inputBlock { uint4 in_data[]; }\n",
1795 "outputBlock { uint4 out_data[]; }\n",
1798 ~TestComputeStep()
override =
default;
1803 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x];
1809 return sizeof(uint32_t) * kProblemSize;
1813 const ResourceDesc& r,
1815 size_t bufferSize)
const override {
1834 ERRORF(
reporter,
"Failed to allocate an output buffer at slot 0");
1844 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.
fBuffer);
1847 std::unique_ptr<Recording> recording = recorder->snap();
1859 uint32_t* outData =
static_cast<uint32_t*
>(
1860 map_buffer(context, testContext, outputBuffer.get(), outputInfo.
fOffset));
1861 SkASSERT(outputBuffer->isMapped() && outData !=
nullptr);
1862 for (
unsigned int i = 0;
i < kProblemSize; ++
i) {
1863 const uint32_t found = outData[
i];
1875 constexpr uint32_t kWorkgroupSize = 64;
1882 {kWorkgroupSize, 1, 1},
1889 "outputBlock { uint4 out_data[]; }\n",
1892 ~FillWithGarbage()
override =
default;
1897 out_data[sk_GlobalInvocationID.x] = uint4(0xFE);
1909 {kWorkgroupSize, 1, 1},
1916 "inputBlock { uint4 in_data[]; }\n",
1923 "outputBlock { uint4 out_data[]; }\n",
1926 ~CopyBuffer()
override =
default;
1931 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x];
1937 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1940 constexpr size_t kElementCount = 4 * kWorkgroupSize;
1941 constexpr size_t kBufferSize =
sizeof(uint32_t) * kElementCount;
1942 auto input = recorder->priv().drawBufferManager()->getStorage(
kBufferSize);
1943 auto [_,
output] = recorder->priv().drawBufferManager()->getStoragePointer(
kBufferSize);
1949 builder.appendStep(&garbageStep, {{1, 1, 1}});
1956 builder.appendStep(©Step, {{1, 1, 1}});
1961 auto outputBuffer = sync_buffer_to_cpu(recorder.get(),
output.fBuffer);
1964 std::unique_ptr<Recording> recording = submit_recording(context, testContext, recorder.get());
1971 uint32_t* outData =
static_cast<uint32_t*
>(
1972 map_buffer(context, testContext, outputBuffer.get(),
output.fOffset));
1973 SkASSERT(outputBuffer->isMapped() && outData !=
nullptr);
1974 for (
unsigned int i = 0;
i < kElementCount; ++
i) {
1975 const uint32_t found = outData[
i];
1986 constexpr uint32_t kWorkgroupSize = 64;
1993 {kWorkgroupSize, 1, 1},
2000 "outputBlock { uint4 out_data[]; }\n",
2003 ~FillWithGarbage()
override =
default;
2008 out_data[sk_GlobalInvocationID.x] = uint4(0xFE);
2020 {kWorkgroupSize, 1, 1},
2027 "inputBlock { uint4 in_data[]; }\n",
2034 "outputBlock { uint4 out_data[]; }\n",
2037 ~CopyBuffer()
override =
default;
2042 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x];
2048 std::unique_ptr<Recorder> recorder = context->makeRecorder();
2051 constexpr size_t kElementCount = 4 * kWorkgroupSize;
2052 constexpr size_t kBufferSize =
sizeof(uint32_t) * kElementCount;
2053 auto [_,
output] = recorder->priv().drawBufferManager()->getStoragePointer(
kBufferSize);
2059 auto scratch = recorder->priv().drawBufferManager()->getScratchStorage(
kBufferSize);
2065 builder.appendStep(&garbageStep, {{1, 1, 1}});
2071 auto scratch = recorder->priv().drawBufferManager()->getScratchStorage(
kBufferSize);
2076 builder.appendStep(©Step, {{1, 1, 1}});
2081 auto outputBuffer = sync_buffer_to_cpu(recorder.get(),
output.fBuffer);
2084 std::unique_ptr<Recording> recording = submit_recording(context, testContext, recorder.get());
2091 uint32_t* outData =
static_cast<uint32_t*
>(
2092 map_buffer(context, testContext, outputBuffer.get(),
output.fOffset));
2093 SkASSERT(outputBuffer->isMapped() && outData !=
nullptr);
2094 for (
unsigned int i = 0;
i < kElementCount; ++
i) {
2095 const uint32_t found = outData[
i];
2109 std::unique_ptr<Recorder> recorder = context->
makeRecorder();
2111 constexpr uint32_t kWorkgroupCount = 32;
2112 constexpr uint32_t kWorkgroupSize = 64;
2121 "TestIndirectDispatch_IndirectStep",
2122 {kWorkgroupSize, 1, 1},
2132 "ssbo { uint indirect[]; }",
2134 ~IndirectStep()
override =
default;
2140 // This needs to match `kWorkgroupCount` declared above.
2141 const uint kWorkgroupCount = 32;
2144 if (sk_LocalInvocationID.x == 0) {
2145 indirect[0] = kWorkgroupCount;
2156 SkASSERT(r.fFlow == DataFlow::kShared);
2169 "TestIndirectDispatch_CountStep",
2170 {kWorkgroupSize, 1, 1},
2177 "ssbo { atomicUint globalCounter; }",
2179 ~CountStep()
override =
default;
2183 workgroup atomicUint localCounter;
2186 // Initialize the local counter.
2187 if (sk_LocalInvocationID.x == 0) {
2188 atomicStore(localCounter, 0);
2191 // Synchronize the threads in the workgroup so they all see the initial value.
2194 // All threads increment the counter.
2195 atomicAdd(localCounter, 1);
2197 // Synchronize the threads again to ensure they have all executed the increment
2198 // and the following load reads the same value across all threads in the
2202 // Add the workgroup-only tally to the global counter.
2203 if (sk_LocalInvocationID.x == 0) {
2204 atomicAdd(globalCounter, atomicLoad(localCounter));
2213 SkASSERT(r.fFlow == DataFlow::kShared);
2214 return sizeof(uint32_t);
2218 const ResourceDesc& r,
2220 size_t bufferSize)
const override {
2222 *
static_cast<uint32_t*
>(
buffer) = 0;
2227 builder.appendStep(&indirectStep);
2229 if (!indirectBufferInfo) {
2247 auto buffer = sync_buffer_to_cpu(recorder.get(),
info.fBuffer);
2250 std::unique_ptr<Recording> recording = recorder->snap();
2262 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
2263 const uint32_t
result =
static_cast<const uint32_t*
>(
2264 map_buffer(context, testContext,
buffer.get(),
info.fOffset))[0];
2266 result == kExpectedCount,
2267 "expected '%u', found '%u'",
2276 std::unique_ptr<Recorder> recorder = context->
makeRecorder();
2278 constexpr uint32_t kWorkgroupCount = 32;
2279 constexpr uint32_t kWorkgroupSize = 1024;
2284 "TestAtomicOperationsMetal",
2285 {kWorkgroupSize, 1, 1},
2296 ~TestComputeStep()
override =
default;
2300 static constexpr std::string_view kSource = R
"(
2301 #include <metal_stdlib>
2303 using namespace metal;
2305 kernel void atomicCount(uint3 localId [[thread_position_in_threadgroup]],
2306 device atomic_uint& globalCounter [[buffer(0)]]) {
2307 threadgroup atomic_uint localCounter;
2309 // Initialize the local counter.
2310 if (localId.x == 0u) {
2311 atomic_store_explicit(&localCounter, 0u, memory_order_relaxed);
2314 // Synchronize the threads in the workgroup so they all see the initial value.
2315 threadgroup_barrier(mem_flags::mem_threadgroup);
2317 // All threads increment the counter.
2318 atomic_fetch_add_explicit(&localCounter, 1u, memory_order_relaxed);
2320 // Synchronize the threads again to ensure they have all executed the increment
2321 // and the following load reads the same value across all threads in the
2323 threadgroup_barrier(mem_flags::mem_threadgroup);
2325 // Add the workgroup-only tally to the global counter.
2326 if (localId.x == 0u) {
2327 uint tally = atomic_load_explicit(&localCounter, memory_order_relaxed);
2328 atomic_fetch_add_explicit(&globalCounter, tally, memory_order_relaxed);
2332 return {kSource,
"atomicCount"};
2338 SkASSERT(r.fFlow == DataFlow::kShared);
2339 return sizeof(uint32_t);
2347 const ResourceDesc& r,
2349 size_t bufferSize)
const override {
2351 *
static_cast<uint32_t*
>(
buffer) = 0;
2370 auto buffer = sync_buffer_to_cpu(recorder.get(),
info.fBuffer);
2373 std::unique_ptr<Recording> recording = recorder->snap();
2385 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
2386 const uint32_t
result =
static_cast<const uint32_t*
>(
2387 map_buffer(context, testContext,
buffer.get(),
info.fOffset))[0];
2389 result == kExpectedCount,
2390 "expected '%u', found '%u'",
2399 std::unique_ptr<Recorder> recorder = context->
makeRecorder();
2401 constexpr uint32_t kWorkgroupCount = 32;
2402 constexpr uint32_t kWorkgroupSize = 1024;
2407 "TestAtomicOperationsMetal",
2408 {kWorkgroupSize, 1, 1},
2424 ~TestComputeStep()
override =
default;
2431 static constexpr std::string_view kSource = R
"(
2432 #include <metal_stdlib>
2434 using namespace metal;
2436 kernel void atomicCount(uint3 localId [[thread_position_in_threadgroup]],
2437 device atomic_uint& globalCounter [[buffer(0)]],
2438 threadgroup atomic_uint& localCounter [[threadgroup(0)]]) {
2439 // Initialize the local counter.
2440 if (localId.x == 0u) {
2441 atomic_store_explicit(&localCounter, 0u, memory_order_relaxed);
2444 // Synchronize the threads in the workgroup so they all see the initial value.
2445 threadgroup_barrier(mem_flags::mem_threadgroup);
2447 // All threads increment the counter.
2448 atomic_fetch_add_explicit(&localCounter, 1u, memory_order_relaxed);
2450 // Synchronize the threads again to ensure they have all executed the increment
2451 // and the following load reads the same value across all threads in the
2453 threadgroup_barrier(mem_flags::mem_threadgroup);
2455 // Add the workgroup-only tally to the global counter.
2456 if (localId.x == 0u) {
2457 uint tally = atomic_load_explicit(&localCounter, memory_order_relaxed);
2458 atomic_fetch_add_explicit(&globalCounter, tally, memory_order_relaxed);
2462 return {kSource,
"atomicCount"};
2468 SkASSERT(r.fFlow == DataFlow::kShared);
2469 return sizeof(uint32_t);
2477 const ResourceDesc& r,
2479 size_t bufferSize)
const override {
2481 *
static_cast<uint32_t*
>(
buffer) = 0;
2500 auto buffer = sync_buffer_to_cpu(recorder.get(),
info.fBuffer);
2503 std::unique_ptr<Recording> recording = recorder->snap();
2515 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
2516 const uint32_t
result =
static_cast<const uint32_t*
>(
2517 map_buffer(context, testContext,
buffer.get(),
info.fOffset))[0];
2519 result == kExpectedCount,
2520 "expected '%u', found '%u'",
2531 std::unique_ptr<Recorder> recorder = context->
makeRecorder();
2533 constexpr uint32_t kWorkgroupCount = 32;
2534 constexpr uint32_t kWorkgroupSize = 256;
2539 "TestAtomicOperationsWGSL",
2540 {kWorkgroupSize, 1, 1},
2551 ~TestComputeStep()
override =
default;
2555 static constexpr std::string_view kSource = R
"(
2556 @group(0) @binding(0) var<storage, read_write> globalCounter: atomic<u32>;
2558 var<workgroup> localCounter: atomic<u32>;
2560 @compute @workgroup_size(256)
2561 fn atomicCount(@builtin(local_invocation_id) localId: vec3u) {
2562 // Initialize the local counter.
2563 if localId.x == 0u {
2564 atomicStore(&localCounter, 0u);
2567 // Synchronize the threads in the workgroup so they all see the initial value.
2570 // All threads increment the counter.
2571 atomicAdd(&localCounter, 1u);
2573 // Synchronize the threads again to ensure they have all executed the increment
2574 // and the following load reads the same value across all threads in the
2578 // Add the workgroup-only tally to the global counter.
2579 if localId.x == 0u {
2580 let tally = atomicLoad(&localCounter);
2581 atomicAdd(&globalCounter, tally);
2585 return {kSource,
"atomicCount"};
2591 SkASSERT(r.fFlow == DataFlow::kShared);
2592 return sizeof(uint32_t);
2600 const ResourceDesc& r,
2602 size_t bufferSize)
const override {
2604 *
static_cast<uint32_t*
>(
buffer) = 0;
2623 auto buffer = sync_buffer_to_cpu(recorder.get(),
info.fBuffer);
2626 std::unique_ptr<Recording> recording = recorder->snap();
2638 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
2639 const uint32_t
result =
static_cast<const uint32_t*
>(
2640 map_buffer(context, testContext,
buffer.get(),
info.fOffset))[0];
2642 result == kExpectedCount,
2643 "expected '%u', found '%u'",
static int step(int x, SkScalar min, SkScalar max)
DEF_GRAPHITE_TEST_FOR_METAL_CONTEXT(Compute_NativeShaderSourceMetal, reporter, context, testContext)
DEF_GRAPHITE_TEST_FOR_DAWN_CONTEXT(Compute_NativeShaderSourceWGSL, reporter, context, testContext)
#define DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS( name, reporter, graphite_context, test_context)
static void info(const char *fmt,...) SK_PRINTF_LIKE(1
@ kRGBA_8888_SkColorType
pixel with 8 bits for red, green, blue, alpha; in 32-bit word
static constexpr SkColor SkColorSetARGB(U8CPU a, U8CPU r, U8CPU g, U8CPU b)
constexpr SkColor SK_ColorGREEN
sk_sp< T > sk_ref_sp(T *obj)
static const size_t kBufferSize
SkDEBUGCODE(SK_SPI) SkThreadID SkGetThreadID()
#define REPORTER_ASSERT(r, cond,...)
void allocPixels(const SkImageInfo &info, size_t rowBytes)
bool peekPixels(SkPixmap *pixmap) const
const SkImageInfo & info() const
SkColor4f getColor4f(int x, int y) const
uint32_t * writable_addr32(int x, int y) const
const void * addr() const
bool bufferMapsAreAsync() const
virtual TextureInfo getDefaultSampledTextureInfo(SkColorType, Mipmapped mipmapped, Protected, Renderable) const =0
virtual SamplerDesc calculateSamplerParameters(int resourceIndex, const ResourceDesc &) const
virtual std::string computeSkSL() const
virtual WorkgroupSize calculateGlobalDispatchSize() const
@ kWriteOnlyStorageTexture
virtual void prepareStorageBuffer(int resourceIndex, const ResourceDesc &resource, void *buffer, size_t bufferSize) const
virtual std::tuple< SkISize, SkColorType > calculateTextureParameters(int resourceIndex, const ResourceDesc &) const
ComputeStep(std::string_view name, WorkgroupSize localDispatchSize, SkSpan< const ResourceDesc > resources, SkSpan< const WorkgroupBufferDesc > workgroupBuffers={}, Flags baseFlags=Flags::kNone)
virtual NativeShaderSource nativeShaderSource(NativeShaderFormat) const
virtual void prepareUniformBuffer(int resourceIndex, const ResourceDesc &, UniformManager *) const
virtual size_t calculateBufferSize(int resourceIndex, const ResourceDesc &) const
const Caps * caps() const
std::unique_ptr< Recorder > makeRecorder(const RecorderOptions &={})
bool insertRecording(const InsertRecordingInfo &)
const Caps * caps() const
ResourceProvider * resourceProvider()
std::unique_ptr< Recording > snap()
sk_sp< Buffer > findOrCreateBuffer(size_t size, BufferType type, AccessPattern, std::string_view label)
void syncedSubmit(skgpu::graphite::Context *)
virtual skgpu::ContextType contextType()=0
uint32_t uint32_t * format
SK_API sk_sp< SkDocument > Make(SkWStream *dst, const SkSerialProcs *=nullptr, std::function< void(const SkPicture *)> onEndPage=nullptr)
DEF_SWITCHES_START aot vmservice shared library Name of the *so containing AOT compiled Dart assets for launching the service isolate vm snapshot The VM snapshot data that will be memory mapped as read only SnapshotAssetPath must be present isolate snapshot The isolate snapshot data that will be memory mapped as read only SnapshotAssetPath must be present cache dir Path to the cache directory This is different from the persistent_cache_path in embedder which is used for Skia shader cache icu native lib Path to the library file that exports the ICU data vm service The hostname IP address on which the Dart VM Service should be served If not defaults to or::depending on whether ipv6 is specified vm service A custom Dart VM Service port The default is to pick a randomly available open port disable vm Disable the Dart VM Service The Dart VM Service is never available in release mode disable vm service Disable mDNS Dart VM Service publication Bind to the IPv6 localhost address for the Dart VM Service Ignored if vm service host is set endless trace buffer
constexpr size_t kIndirectDispatchArgumentSize
@ kDawn_D3D11
Direct3D 12.
bool IsMetalContextType(skgpu::ContextType type)
bool IsDawnContextType(skgpu::ContextType type)
static constexpr SkIRect MakeWH(int32_t w, int32_t h)
const SkColorInfo & colorInfo() const
static SkImageInfo Make(int width, int height, SkColorType ct, SkAlphaType at)
std::shared_ptr< const fml::Mapping > data