Flutter Engine
The Flutter Engine
Macros | Functions
ComputeTest.cpp File Reference
#include "tests/Test.h"
#include "include/core/SkBitmap.h"
#include "include/gpu/graphite/Context.h"
#include "include/gpu/graphite/Recorder.h"
#include "include/gpu/graphite/Recording.h"
#include "src/gpu/graphite/Buffer.h"
#include "src/gpu/graphite/Caps.h"
#include "src/gpu/graphite/ComputePipelineDesc.h"
#include "src/gpu/graphite/ComputeTypes.h"
#include "src/gpu/graphite/ContextPriv.h"
#include "src/gpu/graphite/RecorderPriv.h"
#include "src/gpu/graphite/ResourceProvider.h"
#include "src/gpu/graphite/UniformManager.h"
#include "src/gpu/graphite/compute/ComputeStep.h"
#include "src/gpu/graphite/compute/DispatchGroup.h"
#include "src/gpu/graphite/task/ComputeTask.h"
#include "src/gpu/graphite/task/CopyTask.h"
#include "src/gpu/graphite/task/SynchronizeToCpuTask.h"
#include "src/gpu/graphite/task/UploadTask.h"
#include "tools/graphite/GraphiteTestContext.h"

Go to the source code of this file.

Macros

#define DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS( name, reporter, graphite_context, test_context)
 

Functions

 DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS (Compute_SingleDispatchTest, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS (Compute_DispatchGroupTest, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS (Compute_UniformBufferTest, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS (Compute_ExternallyAssignedBuffer, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS (Compute_StorageTexture, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS (Compute_StorageTextureReadAndWrite, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS (Compute_ReadOnlyStorageBuffer, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS (Compute_StorageTextureMultipleComputeSteps, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS (Compute_SampledTexture, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS (Compute_AtomicOperationsTest, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS (Compute_AtomicOperationsOverArrayAndStructTest, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS (Compute_ClearedBuffer, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS (Compute_ClearOrdering, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS (Compute_ClearOrderingScratchBuffers, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS (Compute_IndirectDispatch, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_METAL_CONTEXT (Compute_NativeShaderSourceMetal, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_METAL_CONTEXT (Compute_WorkgroupBufferDescMetal, reporter, context, testContext)
 
 DEF_GRAPHITE_TEST_FOR_DAWN_CONTEXT (Compute_NativeShaderSourceWGSL, reporter, context, testContext)
 

Macro Definition Documentation

◆ DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS

#define DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS (   name,
  reporter,
  graphite_context,
  test_context 
)
Value:
is_dawn_or_metal_context_type, \
graphite_context, \
test_context, \
reporter
Definition: FontMgrTest.cpp:39
#define DEF_GRAPHITE_TEST_FOR_CONTEXTS(name, context_filter, reporter, graphite_ctx, test_ctx, ctsEnforcement)
Definition: Test.h:368
DEF_SWITCHES_START aot vmservice shared library name
Definition: switches.h:32

Definition at line 100 of file ComputeTest.cpp.

Function Documentation

◆ DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS() [1/15]

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS ( Compute_AtomicOperationsOverArrayAndStructTest  ,
reporter  ,
context  ,
testContext   
)

Definition at line 1616 of file ComputeTest.cpp.

1619 {
1620 // This fails on Dawn D3D11, b/315834710
1621 if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {
1622 return;
1623 }
1624
1625 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1626
1627 constexpr uint32_t kWorkgroupCount = 32;
1628 constexpr uint32_t kWorkgroupSize = 256;
1629
1630 class TestComputeStep : public ComputeStep {
1631 public:
1632 TestComputeStep() : ComputeStep(
1633 /*name=*/"TestAtomicOperationsOverArrayAndStruct",
1634 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
1635 /*resources=*/{
1636 {
1638 /*flow=*/DataFlow::kShared,
1639 /*policy=*/ResourcePolicy::kMapped,
1640 /*slot=*/0,
1641 /*sksl=*/"ssbo {\n"
1642 " atomicUint globalCountsFirstHalf;\n"
1643 " atomicUint globalCountsSecondHalf;\n"
1644 "}\n"
1645 }
1646 }) {}
1647 ~TestComputeStep() override = default;
1648
1649 // Construct a kernel that increments a two global (device memory) counters across multiple
1650 // workgroups. Each workgroup maintains its own independent tallies in workgroup-shared
1651 // counters which are then added to the global counts.
1652 //
1653 // This exercises atomic store/load/add and coherent reads and writes over memory in storage
1654 // and workgroup address spaces.
1655 std::string computeSkSL() const override {
1656 return R"(
1657 const uint WORKGROUP_SIZE = 256;
1658
1659 workgroup atomicUint localCounts[2];
1660
1661 void main() {
1662 // Initialize the local counts.
1663 if (sk_LocalInvocationID.x == 0) {
1664 atomicStore(localCounts[0], 0);
1665 atomicStore(localCounts[1], 0);
1666 }
1667
1668 // Synchronize the threads in the workgroup so they all see the initial value.
1669 workgroupBarrier();
1670
1671 // Each thread increments one of the local counters based on its invocation
1672 // index.
1673 uint idx = sk_LocalInvocationID.x < (WORKGROUP_SIZE / 2) ? 0 : 1;
1674 atomicAdd(localCounts[idx], 1);
1675
1676 // Synchronize the threads again to ensure they have all executed the increments
1677 // and the following load reads the same value across all threads in the
1678 // workgroup.
1679 workgroupBarrier();
1680
1681 // Add the workgroup-only tally to the global counter.
1682 if (sk_LocalInvocationID.x == 0) {
1683 atomicAdd(globalCountsFirstHalf, atomicLoad(localCounts[0]));
1684 atomicAdd(globalCountsSecondHalf, atomicLoad(localCounts[1]));
1685 }
1686 }
1687 )";
1688 }
1689
1690 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
1691 SkASSERT(index == 0);
1692 SkASSERT(r.fSlot == 0);
1693 SkASSERT(r.fFlow == DataFlow::kShared);
1694 return 2 * sizeof(uint32_t);
1695 }
1696
1698 return WorkgroupSize(kWorkgroupCount, 1, 1);
1699 }
1700
1701 void prepareStorageBuffer(int resourceIndex,
1702 const ResourceDesc& r,
1703 void* buffer,
1704 size_t bufferSize) const override {
1705 SkASSERT(resourceIndex == 0);
1706 uint32_t* data = static_cast<uint32_t*>(buffer);
1707 data[0] = 0;
1708 data[1] = 0;
1709 }
1710 } step;
1711
1712 DispatchGroup::Builder builder(recorder.get());
1713 builder.appendStep(&step);
1714
1715 BindBufferInfo info = builder.getSharedBufferResource(0);
1716 if (!info) {
1717 ERRORF(reporter, "shared resource at slot 0 is missing");
1718 return;
1719 }
1720
1721 // Record the compute pass task.
1723 groups.push_back(builder.finalize());
1724 recorder->priv().add(ComputeTask::Make(std::move(groups)));
1725
1726 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
1727 auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer);
1728
1729 // Submit the work and wait for it to complete.
1730 std::unique_ptr<Recording> recording = recorder->snap();
1731 if (!recording) {
1732 ERRORF(reporter, "Failed to make recording");
1733 return;
1734 }
1735
1736 InsertRecordingInfo insertInfo;
1737 insertInfo.fRecording = recording.get();
1738 context->insertRecording(insertInfo);
1739 testContext->syncedSubmit(context);
1740
1741 // Verify the contents of the output buffer.
1742 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize / 2;
1743
1744 const uint32_t* ssboData = static_cast<const uint32_t*>(
1745 map_buffer(context, testContext, buffer.get(), info.fOffset));
1746 const uint32_t firstHalfCount = ssboData[0];
1747 const uint32_t secondHalfCount = ssboData[1];
1749 firstHalfCount == kExpectedCount,
1750 "expected '%u', found '%u'",
1751 kExpectedCount,
1752 firstHalfCount);
1754 secondHalfCount == kExpectedCount,
1755 "expected '%u', found '%u'",
1756 kExpectedCount,
1757 secondHalfCount);
1758}
static int step(int x, SkScalar min, SkScalar max)
Definition: BlurTest.cpp:215
static void info(const char *fmt,...) SK_PRINTF_LIKE(1
Definition: DM.cpp:213
#define SkASSERT(cond)
Definition: SkAssert.h:116
#define REPORTER_ASSERT(r, cond,...)
Definition: Test.h:286
#define ERRORF(r,...)
Definition: Test.h:293
virtual std::string computeSkSL() const
Definition: ComputeStep.cpp:62
virtual WorkgroupSize calculateGlobalDispatchSize() const
Definition: ComputeStep.cpp:89
virtual void prepareStorageBuffer(int resourceIndex, const ResourceDesc &resource, void *buffer, size_t bufferSize) const
Definition: ComputeStep.cpp:53
virtual size_t calculateBufferSize(int resourceIndex, const ResourceDesc &) const
Definition: ComputeStep.cpp:72
SK_API sk_sp< SkDocument > Make(SkWStream *dst, const SkSerialProcs *=nullptr, std::function< void(const SkPicture *)> onEndPage=nullptr)
DEF_SWITCHES_START aot vmservice shared library Name of the *so containing AOT compiled Dart assets for launching the service isolate vm snapshot The VM snapshot data that will be memory mapped as read only SnapshotAssetPath must be present isolate snapshot The isolate snapshot data that will be memory mapped as read only SnapshotAssetPath must be present cache dir Path to the cache directory This is different from the persistent_cache_path in embedder which is used for Skia shader cache icu native lib Path to the library file that exports the ICU data vm service The hostname IP address on which the Dart VM Service should be served If not defaults to or::depending on whether ipv6 is specified vm service A custom Dart VM Service port The default is to pick a randomly available open port disable vm Disable the Dart VM Service The Dart VM Service is never available in release mode disable vm service Disable mDNS Dart VM Service publication Bind to the IPv6 localhost address for the Dart VM Service Ignored if vm service host is set endless trace buffer
Definition: switches.h:126
@ kDawn_D3D11
Direct3D 12.
std::shared_ptr< const fml::Mapping > data
Definition: texture_gles.cc:63

◆ DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS() [2/15]

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS ( Compute_AtomicOperationsTest  ,
reporter  ,
context  ,
testContext   
)

Definition at line 1486 of file ComputeTest.cpp.

1489 {
1490 // This fails on Dawn D3D11, b/315834710
1491 if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {
1492 return;
1493 }
1494
1495 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1496
1497 constexpr uint32_t kWorkgroupCount = 32;
1498 constexpr uint32_t kWorkgroupSize = 256;
1499
1500 class TestComputeStep : public ComputeStep {
1501 public:
1502 TestComputeStep() : ComputeStep(
1503 /*name=*/"TestAtomicOperations",
1504 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
1505 /*resources=*/{
1506 {
1508 /*flow=*/DataFlow::kShared,
1509 /*policy=*/ResourcePolicy::kMapped,
1510 /*slot=*/0,
1511 /*sksl=*/"ssbo { atomicUint globalCounter; }",
1512 }
1513 }) {}
1514 ~TestComputeStep() override = default;
1515
1516 // A kernel that increments a global (device memory) counter across multiple workgroups.
1517 // Each workgroup maintains its own independent tally in a workgroup-shared counter which
1518 // is then added to the global count.
1519 //
1520 // This exercises atomic store/load/add and coherent reads and writes over memory in storage
1521 // and workgroup address spaces.
1522 std::string computeSkSL() const override {
1523 return R"(
1524 workgroup atomicUint localCounter;
1525
1526 void main() {
1527 // Initialize the local counter.
1528 if (sk_LocalInvocationID.x == 0) {
1529 atomicStore(localCounter, 0);
1530 }
1531
1532 // Synchronize the threads in the workgroup so they all see the initial value.
1533 workgroupBarrier();
1534
1535 // All threads increment the counter.
1536 atomicAdd(localCounter, 1);
1537
1538 // Synchronize the threads again to ensure they have all executed the increment
1539 // and the following load reads the same value across all threads in the
1540 // workgroup.
1541 workgroupBarrier();
1542
1543 // Add the workgroup-only tally to the global counter.
1544 if (sk_LocalInvocationID.x == 0) {
1545 atomicAdd(globalCounter, atomicLoad(localCounter));
1546 }
1547 }
1548 )";
1549 }
1550
1551 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
1552 SkASSERT(index == 0);
1553 SkASSERT(r.fSlot == 0);
1554 SkASSERT(r.fFlow == DataFlow::kShared);
1555 return sizeof(uint32_t);
1556 }
1557
1559 return WorkgroupSize(kWorkgroupCount, 1, 1);
1560 }
1561
1562 void prepareStorageBuffer(int resourceIndex,
1563 const ResourceDesc& r,
1564 void* buffer,
1565 size_t bufferSize) const override {
1566 SkASSERT(resourceIndex == 0);
1567 *static_cast<uint32_t*>(buffer) = 0;
1568 }
1569 } step;
1570
1571 DispatchGroup::Builder builder(recorder.get());
1572 builder.appendStep(&step);
1573
1574 BindBufferInfo info = builder.getSharedBufferResource(0);
1575 if (!info) {
1576 ERRORF(reporter, "shared resource at slot 0 is missing");
1577 return;
1578 }
1579
1580 // Record the compute pass task.
1582 groups.push_back(builder.finalize());
1583 recorder->priv().add(ComputeTask::Make(std::move(groups)));
1584
1585 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
1586 auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer);
1587
1588 // Submit the work and wait for it to complete.
1589 std::unique_ptr<Recording> recording = recorder->snap();
1590 if (!recording) {
1591 ERRORF(reporter, "Failed to make recording");
1592 return;
1593 }
1594
1595 InsertRecordingInfo insertInfo;
1596 insertInfo.fRecording = recording.get();
1597 context->insertRecording(insertInfo);
1598 testContext->syncedSubmit(context);
1599
1600 // Verify the contents of the output buffer.
1601 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
1602 const uint32_t result = static_cast<const uint32_t*>(
1603 map_buffer(context, testContext, buffer.get(), info.fOffset))[0];
1605 result == kExpectedCount,
1606 "expected '%u', found '%u'",
1607 kExpectedCount,
1608 result);
1609}
GAsyncResult * result

◆ DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS() [3/15]

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS ( Compute_ClearedBuffer  ,
reporter  ,
context  ,
testContext   
)

Definition at line 1760 of file ComputeTest.cpp.

1763 {
1764 constexpr uint32_t kProblemSize = 512;
1765
1766 // The ComputeStep packs kProblemSize floats into kProblemSize / 4 vectors and each thread
1767 // processes 1 vector at a time.
1768 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
1769
1770 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1771
1772 // The ComputeStep requests an unmapped buffer that is zero-initialized. It writes the output to
1773 // a mapped buffer which test verifies.
1774 class TestComputeStep : public ComputeStep {
1775 public:
1776 TestComputeStep() : ComputeStep(
1777 /*name=*/"TestClearedBuffer",
1778 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
1779 /*resources=*/{
1780 // Zero initialized input buffer
1781 {
1782 // TODO(b/299979165): Declare this binding as read-only.
1784 /*flow=*/DataFlow::kPrivate,
1785 /*policy=*/ResourcePolicy::kClear,
1786 /*sksl=*/"inputBlock { uint4 in_data[]; }\n",
1787 },
1788 // Output buffer:
1789 {
1791 /*flow=*/DataFlow::kShared, // shared to allow us to access it from the
1792 // Builder
1793 /*policy=*/ResourcePolicy::kMapped, // mappable for read-back
1794 /*slot=*/0,
1795 /*sksl=*/"outputBlock { uint4 out_data[]; }\n",
1796 }
1797 }) {}
1798 ~TestComputeStep() override = default;
1799
1800 std::string computeSkSL() const override {
1801 return R"(
1802 void main() {
1803 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x];
1804 }
1805 )";
1806 }
1807
1808 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
1809 return sizeof(uint32_t) * kProblemSize;
1810 }
1811
1812 void prepareStorageBuffer(int resourceIndex,
1813 const ResourceDesc& r,
1814 void* buffer,
1815 size_t bufferSize) const override {
1816 // Should receive this call only for the mapped buffer.
1817 SkASSERT(resourceIndex == 1);
1818 }
1819
1821 return WorkgroupSize(1, 1, 1);
1822 }
1823 } step;
1824
1825 DispatchGroup::Builder builder(recorder.get());
1826 if (!builder.appendStep(&step)) {
1827 ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");
1828 return;
1829 }
1830
1831 // The output buffer should have been placed in the right output slot.
1832 BindBufferInfo outputInfo = builder.getSharedBufferResource(0);
1833 if (!outputInfo) {
1834 ERRORF(reporter, "Failed to allocate an output buffer at slot 0");
1835 return;
1836 }
1837
1838 // Record the compute task
1840 groups.push_back(builder.finalize());
1841 recorder->priv().add(ComputeTask::Make(std::move(groups)));
1842
1843 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
1844 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.fBuffer);
1845
1846 // Submit the work and wait for it to complete.
1847 std::unique_ptr<Recording> recording = recorder->snap();
1848 if (!recording) {
1849 ERRORF(reporter, "Failed to make recording");
1850 return;
1851 }
1852
1853 InsertRecordingInfo insertInfo;
1854 insertInfo.fRecording = recording.get();
1855 context->insertRecording(insertInfo);
1856 testContext->syncedSubmit(context);
1857
1858 // Verify the contents of the output buffer.
1859 uint32_t* outData = static_cast<uint32_t*>(
1860 map_buffer(context, testContext, outputBuffer.get(), outputInfo.fOffset));
1861 SkASSERT(outputBuffer->isMapped() && outData != nullptr);
1862 for (unsigned int i = 0; i < kProblemSize; ++i) {
1863 const uint32_t found = outData[i];
1864 REPORTER_ASSERT(reporter, 0u == found, "expected '0u', found '%u'", found);
1865 }
1866}

◆ DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS() [4/15]

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS ( Compute_ClearOrdering  ,
reporter  ,
context  ,
testContext   
)

Definition at line 1868 of file ComputeTest.cpp.

1871 {
1872 // Initiate two independent DispatchGroups operating on the same buffer. The first group
1873 // writes garbage to the buffer and the second group copies the contents to an output buffer.
1874 // This test validates that the reads, writes, and clear occur in the expected order.
1875 constexpr uint32_t kWorkgroupSize = 64;
1876
1877 // Initialize buffer with non-zero data.
1878 class FillWithGarbage : public ComputeStep {
1879 public:
1880 FillWithGarbage() : ComputeStep(
1881 /*name=*/"FillWithGarbage",
1882 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
1883 /*resources=*/{
1884 {
1886 /*flow=*/DataFlow::kShared,
1887 /*policy=*/ResourcePolicy::kNone,
1888 /*slot=*/0,
1889 /*sksl=*/"outputBlock { uint4 out_data[]; }\n",
1890 }
1891 }) {}
1892 ~FillWithGarbage() override = default;
1893
1894 std::string computeSkSL() const override {
1895 return R"(
1896 void main() {
1897 out_data[sk_GlobalInvocationID.x] = uint4(0xFE);
1898 }
1899 )";
1900 }
1901 } garbageStep;
1902
1903 // Second stage just copies the data to a destination buffer. This is only to verify that this
1904 // stage, issued in a separate DispatchGroup, observes the clear.
1905 class CopyBuffer : public ComputeStep {
1906 public:
1907 CopyBuffer() : ComputeStep(
1908 /*name=*/"CopyBuffer",
1909 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
1910 /*resources=*/{
1911 {
1913 /*flow=*/DataFlow::kShared,
1914 /*policy=*/ResourcePolicy::kNone,
1915 /*slot=*/0,
1916 /*sksl=*/"inputBlock { uint4 in_data[]; }\n",
1917 },
1918 {
1920 /*flow=*/DataFlow::kShared,
1921 /*policy=*/ResourcePolicy::kNone,
1922 /*slot=*/1,
1923 /*sksl=*/"outputBlock { uint4 out_data[]; }\n",
1924 }
1925 }) {}
1926 ~CopyBuffer() override = default;
1927
1928 std::string computeSkSL() const override {
1929 return R"(
1930 void main() {
1931 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x];
1932 }
1933 )";
1934 }
1935 } copyStep;
1936
1937 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1938 DispatchGroup::Builder builder(recorder.get());
1939
1940 constexpr size_t kElementCount = 4 * kWorkgroupSize;
1941 constexpr size_t kBufferSize = sizeof(uint32_t) * kElementCount;
1942 auto input = recorder->priv().drawBufferManager()->getStorage(kBufferSize);
1943 auto [_, output] = recorder->priv().drawBufferManager()->getStoragePointer(kBufferSize);
1944
1946
1947 // First group.
1948 builder.assignSharedBuffer({input, kBufferSize}, 0);
1949 builder.appendStep(&garbageStep, {{1, 1, 1}});
1950 groups.push_back(builder.finalize());
1951
1952 // Second group.
1953 builder.reset();
1954 builder.assignSharedBuffer({input, kBufferSize}, 0, ClearBuffer::kYes);
1955 builder.assignSharedBuffer({output, kBufferSize}, 1);
1956 builder.appendStep(&copyStep, {{1, 1, 1}});
1957 groups.push_back(builder.finalize());
1958
1959 recorder->priv().add(ComputeTask::Make(std::move(groups)));
1960 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
1961 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), output.fBuffer);
1962
1963 // Submit the work and wait for it to complete.
1964 std::unique_ptr<Recording> recording = submit_recording(context, testContext, recorder.get());
1965 if (!recording) {
1966 ERRORF(reporter, "Failed to make recording");
1967 return;
1968 }
1969
1970 // Verify the contents of the output buffer.
1971 uint32_t* outData = static_cast<uint32_t*>(
1972 map_buffer(context, testContext, outputBuffer.get(), output.fOffset));
1973 SkASSERT(outputBuffer->isMapped() && outData != nullptr);
1974 for (unsigned int i = 0; i < kElementCount; ++i) {
1975 const uint32_t found = outData[i];
1976 REPORTER_ASSERT(reporter, 0u == found, "expected '0u', found '%u'", found);
1977 }
1978}
static const size_t kBufferSize
Definition: SkString.cpp:27

◆ DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS() [5/15]

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS ( Compute_ClearOrderingScratchBuffers  ,
reporter  ,
context  ,
testContext   
)

Definition at line 1980 of file ComputeTest.cpp.

1983 {
1984 // This test is the same as the ClearOrdering test but the two stages write to a recycled
1985 // ScratchBuffer. This is primarily to test ScratchBuffer reuse.
1986 constexpr uint32_t kWorkgroupSize = 64;
1987
1988 // Initialize buffer with non-zero data.
1989 class FillWithGarbage : public ComputeStep {
1990 public:
1991 FillWithGarbage() : ComputeStep(
1992 /*name=*/"FillWithGarbage",
1993 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
1994 /*resources=*/{
1995 {
1997 /*flow=*/DataFlow::kShared,
1998 /*policy=*/ResourcePolicy::kNone,
1999 /*slot=*/0,
2000 /*sksl=*/"outputBlock { uint4 out_data[]; }\n",
2001 }
2002 }) {}
2003 ~FillWithGarbage() override = default;
2004
2005 std::string computeSkSL() const override {
2006 return R"(
2007 void main() {
2008 out_data[sk_GlobalInvocationID.x] = uint4(0xFE);
2009 }
2010 )";
2011 }
2012 } garbageStep;
2013
2014 // Second stage just copies the data to a destination buffer. This is only to verify that this
2015 // stage (issued in a separate DispatchGroup) sees the changes.
2016 class CopyBuffer : public ComputeStep {
2017 public:
2018 CopyBuffer() : ComputeStep(
2019 /*name=*/"CopyBuffer",
2020 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
2021 /*resources=*/{
2022 {
2024 /*flow=*/DataFlow::kShared,
2025 /*policy=*/ResourcePolicy::kNone,
2026 /*slot=*/0,
2027 /*sksl=*/"inputBlock { uint4 in_data[]; }\n",
2028 },
2029 {
2031 /*flow=*/DataFlow::kShared,
2032 /*policy=*/ResourcePolicy::kNone,
2033 /*slot=*/1,
2034 /*sksl=*/"outputBlock { uint4 out_data[]; }\n",
2035 }
2036 }) {}
2037 ~CopyBuffer() override = default;
2038
2039 std::string computeSkSL() const override {
2040 return R"(
2041 void main() {
2042 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x];
2043 }
2044 )";
2045 }
2046 } copyStep;
2047
2048 std::unique_ptr<Recorder> recorder = context->makeRecorder();
2049 DispatchGroup::Builder builder(recorder.get());
2050
2051 constexpr size_t kElementCount = 4 * kWorkgroupSize;
2052 constexpr size_t kBufferSize = sizeof(uint32_t) * kElementCount;
2053 auto [_, output] = recorder->priv().drawBufferManager()->getStoragePointer(kBufferSize);
2054
2056
2057 // First group.
2058 {
2059 auto scratch = recorder->priv().drawBufferManager()->getScratchStorage(kBufferSize);
2060 auto input = scratch.suballocate(kBufferSize);
2061 builder.assignSharedBuffer({input, kBufferSize}, 0);
2062
2063 // `scratch` returns to the scratch buffer pool when it goes out of scope
2064 }
2065 builder.appendStep(&garbageStep, {{1, 1, 1}});
2066 groups.push_back(builder.finalize());
2067
2068 // Second group.
2069 builder.reset();
2070 {
2071 auto scratch = recorder->priv().drawBufferManager()->getScratchStorage(kBufferSize);
2072 auto input = scratch.suballocate(kBufferSize);
2073 builder.assignSharedBuffer({input, kBufferSize}, 0, ClearBuffer::kYes);
2074 }
2075 builder.assignSharedBuffer({output, kBufferSize}, 1);
2076 builder.appendStep(&copyStep, {{1, 1, 1}});
2077 groups.push_back(builder.finalize());
2078
2079 recorder->priv().add(ComputeTask::Make(std::move(groups)));
2080 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
2081 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), output.fBuffer);
2082
2083 // Submit the work and wait for it to complete.
2084 std::unique_ptr<Recording> recording = submit_recording(context, testContext, recorder.get());
2085 if (!recording) {
2086 ERRORF(reporter, "Failed to make recording");
2087 return;
2088 }
2089
2090 // Verify the contents of the output buffer.
2091 uint32_t* outData = static_cast<uint32_t*>(
2092 map_buffer(context, testContext, outputBuffer.get(), output.fOffset));
2093 SkASSERT(outputBuffer->isMapped() && outData != nullptr);
2094 for (unsigned int i = 0; i < kElementCount; ++i) {
2095 const uint32_t found = outData[i];
2096 REPORTER_ASSERT(reporter, 0u == found, "expected '0u', found '%u'", found);
2097 }
2098}

◆ DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS() [6/15]

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS ( Compute_DispatchGroupTest  ,
reporter  ,
context  ,
testContext   
)

Definition at line 245 of file ComputeTest.cpp.

248 {
249 // TODO(b/315834710): This fails on Dawn D3D11
250 if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {
251 return;
252 }
253
254 constexpr uint32_t kProblemSize = 512;
255 constexpr float kFactor1 = 4.f;
256 constexpr float kFactor2 = 3.f;
257
258 // The ComputeStep packs kProblemSize floats into kProblemSize / 4 vectors and each thread
259 // processes 1 vector at a time.
260 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
261
262 std::unique_ptr<Recorder> recorder = context->makeRecorder();
263
264 // Define two steps that perform two multiplication passes over the same input.
265
266 class TestComputeStep1 : public ComputeStep {
267 public:
268 // TODO(skia:40045541): SkSL doesn't support std430 layout well, so the buffers
269 // below all pack their data into vectors to be compatible with SPIR-V/WGSL.
270 TestComputeStep1() : ComputeStep(
271 /*name=*/"TestArrayMultiplyFirstPass",
272 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
273 /*resources=*/{
274 // Input buffer:
275 {
276 // TODO(b/299979165): Declare this binding as read-only.
278 /*flow=*/DataFlow::kPrivate,
279 /*policy=*/ResourcePolicy::kMapped, // mappable for read-back
280 /*sksl=*/"inputBlock {\n"
281 " float factor;\n"
282 " layout(offset=16) float4 in_data[];\n"
283 "}",
284 },
285 // Output buffers:
286 {
288 /*flow=*/DataFlow::kShared,
289 /*policy=*/ResourcePolicy::kNone, // GPU-only, read by second step
290 /*slot=*/0,
291 /*sksl=*/"outputBlock1 { float4 forward_data[]; }",
292 },
293 {
295 /*flow=*/DataFlow::kShared,
296 /*policy=*/ResourcePolicy::kMapped, // mappable for read-back
297 /*slot=*/1,
298 /*sksl=*/"outputBlock2 { float2 extra_data; }",
299 }
300 }) {}
301 ~TestComputeStep1() override = default;
302
303 // A kernel that multiplies a large array of floats by a supplied factor.
304 std::string computeSkSL() const override {
305 return R"(
306 void main() {
307 uint idx = sk_GlobalInvocationID.x;
308 forward_data[idx] = in_data[idx] * factor;
309 if (idx == 0) {
310 extra_data.x = factor;
311 extra_data.y = 2 * factor;
312 }
313 }
314 )";
315 }
316
317 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
318 if (index == 0) {
319 SkASSERT(r.fFlow == DataFlow::kPrivate);
320 return sizeof(float) * (kProblemSize + 4);
321 }
322 if (index == 1) {
323 SkASSERT(r.fFlow == DataFlow::kShared);
324 SkASSERT(r.fSlot == 0);
325 return sizeof(float) * kProblemSize;
326 }
327
328 SkASSERT(index == 2);
329 SkASSERT(r.fSlot == 1);
330 SkASSERT(r.fFlow == DataFlow::kShared);
331 return 2 * sizeof(float);
332 }
333
334 void prepareStorageBuffer(int resourceIndex,
335 const ResourceDesc& r,
336 void* buffer,
337 size_t bufferSize) const override {
338 if (resourceIndex != 0) {
339 return;
340 }
341
342 size_t dataCount = sizeof(float) * (kProblemSize + 4);
343 SkASSERT(bufferSize == dataCount);
344 SkSpan<float> inData(static_cast<float*>(buffer), dataCount);
345 inData[0] = kFactor1;
346 for (unsigned int i = 0; i < kProblemSize; ++i) {
347 inData[i + 4] = i + 1;
348 }
349 }
350
352 return WorkgroupSize(1, 1, 1);
353 }
354 } step1;
355
356 class TestComputeStep2 : public ComputeStep {
357 public:
358 TestComputeStep2() : ComputeStep(
359 /*name=*/"TestArrayMultiplySecondPass",
360 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
361 /*resources=*/{
362 // Input buffer:
363 {
365 /*flow=*/DataFlow::kShared,
366 /*policy=*/ResourcePolicy::kNone, // GPU-only
367 /*slot=*/0, // this is the output from the first step
368 /*sksl=*/"inputBlock { float4 in_data[]; }",
369 },
370 {
372 /*flow=*/DataFlow::kPrivate,
373 /*policy=*/ResourcePolicy::kMapped,
374 /*sksl=*/"factorBlock { float factor; }"
375 },
376 // Output buffer:
377 {
379 /*flow=*/DataFlow::kShared,
380 /*policy=*/ResourcePolicy::kMapped, // mappable for read-back
381 /*slot=*/2,
382 /*sksl=*/"outputBlock { float4 out_data[]; }",
383 }
384 }) {}
385 ~TestComputeStep2() override = default;
386
387 // A kernel that multiplies a large array of floats by a supplied factor.
388 std::string computeSkSL() const override {
389 return R"(
390 void main() {
391 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor;
392 }
393 )";
394 }
395
396 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
397 SkASSERT(index != 0);
398 if (index == 1) {
399 SkASSERT(r.fFlow == DataFlow::kPrivate);
400 return sizeof(float) * 4;
401 }
402 SkASSERT(index == 2);
403 SkASSERT(r.fSlot == 2);
404 SkASSERT(r.fFlow == DataFlow::kShared);
405 return sizeof(float) * kProblemSize;
406 }
407
408 void prepareStorageBuffer(int resourceIndex,
409 const ResourceDesc& r,
410 void* buffer,
411 size_t bufferSize) const override {
412 if (resourceIndex != 1) {
413 return;
414 }
415 SkASSERT(r.fFlow == DataFlow::kPrivate);
416 *static_cast<float*>(buffer) = kFactor2;
417 }
418
420 return WorkgroupSize(1, 1, 1);
421 }
422 } step2;
423
424 DispatchGroup::Builder builder(recorder.get());
425 builder.appendStep(&step1);
426 builder.appendStep(&step2);
427
428 // Slots 0, 1, and 2 should all contain shared buffers. Slot 1 contains the extra output buffer
429 // from step 1 while slot 2 contains the result of the second multiplication pass from step 1.
430 // Slot 0 is not mappable.
432 std::holds_alternative<BufferView>(builder.outputTable().fSharedSlots[0]),
433 "shared resource at slot 0 is missing");
434 BindBufferInfo outputInfo = builder.getSharedBufferResource(2);
435 if (!outputInfo) {
436 ERRORF(reporter, "Failed to allocate an output buffer at slot 0");
437 return;
438 }
439
440 // Extra output buffer from step 1 (corresponding to 'outputBlock2')
441 BindBufferInfo extraOutputInfo = builder.getSharedBufferResource(1);
442 if (!extraOutputInfo) {
443 ERRORF(reporter, "shared resource at slot 1 is missing");
444 return;
445 }
446
447 // Record the compute task
449 groups.push_back(builder.finalize());
450 recorder->priv().add(ComputeTask::Make(std::move(groups)));
451
452 // Ensure the output buffers get synchronized to the CPU once the GPU submission has finished.
453 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.fBuffer);
454 auto extraOutputBuffer = sync_buffer_to_cpu(recorder.get(), extraOutputInfo.fBuffer);
455
456 // Submit the work and wait for it to complete.
457 std::unique_ptr<Recording> recording = recorder->snap();
458 if (!recording) {
459 ERRORF(reporter, "Failed to make recording");
460 return;
461 }
462
463 InsertRecordingInfo insertInfo;
464 insertInfo.fRecording = recording.get();
465 context->insertRecording(insertInfo);
466 testContext->syncedSubmit(context);
467
468 // Verify the contents of the output buffer from step 2
469 float* outData = static_cast<float*>(
470 map_buffer(context, testContext, outputBuffer.get(), outputInfo.fOffset));
471 SkASSERT(outputBuffer->isMapped() && outData != nullptr);
472 for (unsigned int i = 0; i < kProblemSize; ++i) {
473 const float expected = (i + 1) * kFactor1 * kFactor2;
474 const float found = outData[i];
475 REPORTER_ASSERT(reporter, expected == found, "expected '%f', found '%f'", expected, found);
476 }
477
478 // Verify the contents of the extra output buffer from step 1
479 float* extraOutData = static_cast<float*>(
480 map_buffer(context, testContext, extraOutputBuffer.get(), extraOutputInfo.fOffset));
481 SkASSERT(extraOutputBuffer->isMapped() && extraOutData != nullptr);
483 kFactor1 == extraOutData[0],
484 "expected '%f', found '%f'",
485 kFactor1,
486 extraOutData[0]);
488 2 * kFactor1 == extraOutData[1],
489 "expected '%f', found '%f'",
490 2 * kFactor2,
491 extraOutData[1]);
492}

◆ DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS() [7/15]

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS ( Compute_ExternallyAssignedBuffer  ,
reporter  ,
context  ,
testContext   
)

Definition at line 649 of file ComputeTest.cpp.

652 {
653 constexpr uint32_t kProblemSize = 512;
654 constexpr float kFactor = 4.f;
655
656 // The ComputeStep packs kProblemSize floats into kProblemSize / 4 vectors and each thread
657 // processes 1 vector at a time.
658 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
659
660 std::unique_ptr<Recorder> recorder = context->makeRecorder();
661
662 class TestComputeStep : public ComputeStep {
663 public:
664 TestComputeStep() : ComputeStep(
665 /*name=*/"ExternallyAssignedBuffer",
666 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
667 /*resources=*/{
668 // Input buffer:
669 {
671 /*flow=*/DataFlow::kPrivate,
672 /*policy=*/ResourcePolicy::kMapped,
673 /*sksl=*/"inputBlock {\n"
674 " float factor;\n"
675 " layout(offset = 16) float4 in_data[];\n"
676 "}\n",
677 },
678 // Output buffer:
679 {
681 /*flow=*/DataFlow::kShared, // shared to allow us to access it from the
682 // Builder
683 /*policy=*/ResourcePolicy::kMapped, // mappable for read-back
684 /*slot=*/0,
685 /*sksl=*/"outputBlock { float4 out_data[]; }",
686 }
687 }) {}
688 ~TestComputeStep() override = default;
689
690 // A kernel that multiplies a large array of floats by a supplied factor.
691 std::string computeSkSL() const override {
692 return R"(
693 void main() {
694 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor;
695 }
696 )";
697 }
698
699 size_t calculateBufferSize(int resourceIndex, const ResourceDesc& r) const override {
700 SkASSERT(resourceIndex == 0);
701 SkASSERT(r.fFlow == DataFlow::kPrivate);
702 return sizeof(float) * (kProblemSize + 4);
703 }
704
705 void prepareStorageBuffer(int resourceIndex,
706 const ResourceDesc& r,
707 void* buffer,
708 size_t bufferSize) const override {
709 SkASSERT(resourceIndex == 0);
710 SkASSERT(r.fFlow == DataFlow::kPrivate);
711
712 size_t dataCount = sizeof(float) * (kProblemSize + 4);
713 SkASSERT(bufferSize == dataCount);
714 SkSpan<float> inData(static_cast<float*>(buffer), dataCount);
715 inData[0] = kFactor;
716 for (unsigned int i = 0; i < kProblemSize; ++i) {
717 inData[i + 4] = i + 1;
718 }
719 }
720 } step;
721
722 // We allocate a buffer and directly assign it to the DispatchGroup::Builder. The ComputeStep
723 // will not participate in the creation of this buffer.
724 auto [_, outputInfo] =
725 recorder->priv().drawBufferManager()->getStoragePointer(sizeof(float) * kProblemSize);
726 REPORTER_ASSERT(reporter, outputInfo, "Failed to allocate output buffer");
727
728 DispatchGroup::Builder builder(recorder.get());
729 builder.assignSharedBuffer({outputInfo, sizeof(float) * kProblemSize}, 0);
730
731 // Initialize the step with a pre-determined global size
732 if (!builder.appendStep(&step, {WorkgroupSize(1, 1, 1)})) {
733 ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");
734 return;
735 }
736
737 // Record the compute task
739 groups.push_back(builder.finalize());
740 recorder->priv().add(ComputeTask::Make(std::move(groups)));
741
742 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
743 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.fBuffer);
744
745 // Submit the work and wait for it to complete.
746 std::unique_ptr<Recording> recording = recorder->snap();
747 if (!recording) {
748 ERRORF(reporter, "Failed to make recording");
749 return;
750 }
751
752 InsertRecordingInfo insertInfo;
753 insertInfo.fRecording = recording.get();
754 context->insertRecording(insertInfo);
755 testContext->syncedSubmit(context);
756
757 // Verify the contents of the output buffer.
758 float* outData = static_cast<float*>(
759 map_buffer(context, testContext, outputBuffer.get(), outputInfo.fOffset));
760 SkASSERT(outputBuffer->isMapped() && outData != nullptr);
761 for (unsigned int i = 0; i < kProblemSize; ++i) {
762 const float expected = (i + 1) * kFactor;
763 const float found = outData[i];
764 REPORTER_ASSERT(reporter, expected == found, "expected '%f', found '%f'", expected, found);
765 }
766}

◆ DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS() [8/15]

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS ( Compute_IndirectDispatch  ,
reporter  ,
context  ,
testContext   
)

Definition at line 2100 of file ComputeTest.cpp.

2103 {
2104 // This fails on Dawn D3D11, b/315834710
2105 if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {
2106 return;
2107 }
2108
2109 std::unique_ptr<Recorder> recorder = context->makeRecorder();
2110
2111 constexpr uint32_t kWorkgroupCount = 32;
2112 constexpr uint32_t kWorkgroupSize = 64;
2113
2114 // `IndirectStep` populates a buffer with the global workgroup count for `CountStep`.
2115 // `CountStep` is recorded using `DispatchGroup::appendStepIndirect()` and its workgroups get
2116 // dispatched according to the values computed by `IndirectStep` on the GPU.
2117 class IndirectStep : public ComputeStep {
2118 public:
2119 IndirectStep()
2120 : ComputeStep(
2121 /*name=*/"TestIndirectDispatch_IndirectStep",
2122 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
2123 /*resources=*/
2124 {{
2126 /*flow=*/DataFlow::kShared,
2127 /*policy=*/ResourcePolicy::kClear,
2128 /*slot=*/0,
2129 // TODO(armansito): Ideally the SSBO would have a single member of
2130 // type `IndirectDispatchArgs` struct type. SkSL modules don't
2131 // support struct declarations so this is currently not possible.
2132 /*sksl=*/"ssbo { uint indirect[]; }",
2133 }}) {}
2134 ~IndirectStep() override = default;
2135
2136 // Kernel that specifies a workgroup size of `kWorkgroupCount` to be used by the indirect
2137 // dispatch.
2138 std::string computeSkSL() const override {
2139 return R"(
2140 // This needs to match `kWorkgroupCount` declared above.
2141 const uint kWorkgroupCount = 32;
2142
2143 void main() {
2144 if (sk_LocalInvocationID.x == 0) {
2145 indirect[0] = kWorkgroupCount;
2146 indirect[1] = 1;
2147 indirect[2] = 1;
2148 }
2149 }
2150 )";
2151 }
2152
2153 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
2154 SkASSERT(index == 0);
2155 SkASSERT(r.fSlot == 0);
2156 SkASSERT(r.fFlow == DataFlow::kShared);
2158 }
2159
2161 return WorkgroupSize(1, 1, 1);
2162 }
2163 } indirectStep;
2164
2165 class CountStep : public ComputeStep {
2166 public:
2167 CountStep()
2168 : ComputeStep(
2169 /*name=*/"TestIndirectDispatch_CountStep",
2170 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
2171 /*resources=*/
2172 {{
2174 /*flow=*/DataFlow::kShared,
2175 /*policy=*/ResourcePolicy::kMapped,
2176 /*slot=*/1,
2177 /*sksl=*/"ssbo { atomicUint globalCounter; }",
2178 }}) {}
2179 ~CountStep() override = default;
2180
2181 std::string computeSkSL() const override {
2182 return R"(
2183 workgroup atomicUint localCounter;
2184
2185 void main() {
2186 // Initialize the local counter.
2187 if (sk_LocalInvocationID.x == 0) {
2188 atomicStore(localCounter, 0);
2189 }
2190
2191 // Synchronize the threads in the workgroup so they all see the initial value.
2192 workgroupBarrier();
2193
2194 // All threads increment the counter.
2195 atomicAdd(localCounter, 1);
2196
2197 // Synchronize the threads again to ensure they have all executed the increment
2198 // and the following load reads the same value across all threads in the
2199 // workgroup.
2200 workgroupBarrier();
2201
2202 // Add the workgroup-only tally to the global counter.
2203 if (sk_LocalInvocationID.x == 0) {
2204 atomicAdd(globalCounter, atomicLoad(localCounter));
2205 }
2206 }
2207 )";
2208 }
2209
2210 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
2211 SkASSERT(index == 0);
2212 SkASSERT(r.fSlot == 1);
2213 SkASSERT(r.fFlow == DataFlow::kShared);
2214 return sizeof(uint32_t);
2215 }
2216
2217 void prepareStorageBuffer(int resourceIndex,
2218 const ResourceDesc& r,
2219 void* buffer,
2220 size_t bufferSize) const override {
2221 SkASSERT(resourceIndex == 0);
2222 *static_cast<uint32_t*>(buffer) = 0;
2223 }
2224 } countStep;
2225
2226 DispatchGroup::Builder builder(recorder.get());
2227 builder.appendStep(&indirectStep);
2228 BindBufferInfo indirectBufferInfo = builder.getSharedBufferResource(0);
2229 if (!indirectBufferInfo) {
2230 ERRORF(reporter, "Shared resource at slot 0 is missing");
2231 return;
2232 }
2233 builder.appendStepIndirect(&countStep, {indirectBufferInfo, kIndirectDispatchArgumentSize});
2234
2235 BindBufferInfo info = builder.getSharedBufferResource(1);
2236 if (!info) {
2237 ERRORF(reporter, "Shared resource at slot 1 is missing");
2238 return;
2239 }
2240
2241 // Record the compute pass task.
2243 groups.push_back(builder.finalize());
2244 recorder->priv().add(ComputeTask::Make(std::move(groups)));
2245
2246 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
2247 auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer);
2248
2249 // Submit the work and wait for it to complete.
2250 std::unique_ptr<Recording> recording = recorder->snap();
2251 if (!recording) {
2252 ERRORF(reporter, "Failed to make recording");
2253 return;
2254 }
2255
2256 InsertRecordingInfo insertInfo;
2257 insertInfo.fRecording = recording.get();
2258 context->insertRecording(insertInfo);
2259 testContext->syncedSubmit(context);
2260
2261 // Verify the contents of the output buffer.
2262 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
2263 const uint32_t result = static_cast<const uint32_t*>(
2264 map_buffer(context, testContext, buffer.get(), info.fOffset))[0];
2266 result == kExpectedCount,
2267 "expected '%u', found '%u'",
2268 kExpectedCount,
2269 result);
2270}
constexpr size_t kIndirectDispatchArgumentSize
Definition: ComputeTypes.h:24

◆ DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS() [9/15]

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS ( Compute_ReadOnlyStorageBuffer  ,
reporter  ,
context  ,
testContext   
)

Definition at line 1028 of file ComputeTest.cpp.

1031 {
1032 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1033
1034 // For this test we allocate a 16x16 tile which is written to by a single workgroup of the same
1035 // size.
1036 constexpr uint32_t kDim = 16;
1037
1038 class TestComputeStep : public ComputeStep {
1039 public:
1040 TestComputeStep() : ComputeStep(
1041 /*name=*/"TestReadOnlyStorageBuffer",
1042 /*localDispatchSize=*/{kDim, kDim, 1},
1043 /*resources=*/{
1044 {
1046 /*flow=*/DataFlow::kShared,
1047 /*policy=*/ResourcePolicy::kMapped,
1048 /*slot=*/0,
1049 /*sksl=*/"src { uint in_data[]; }",
1050 },
1051 {
1053 /*flow=*/DataFlow::kShared,
1054 /*policy=*/ResourcePolicy::kNone,
1055 /*slot=*/1,
1056 /*sksl=*/"dst",
1057 }
1058 }) {}
1059 ~TestComputeStep() override = default;
1060
1061 std::string computeSkSL() const override {
1062 return R"(
1063 void main() {
1064 uint ix = sk_LocalInvocationID.y * 16 + sk_LocalInvocationID.x;
1065 uint value = in_data[ix];
1066 half4 splat = half4(
1067 half(value & 0xFF),
1068 half((value >> 8) & 0xFF),
1069 half((value >> 16) & 0xFF),
1070 half((value >> 24) & 0xFF)
1071 );
1072 textureWrite(dst, sk_LocalInvocationID.xy, splat / 255.0);
1073 }
1074 )";
1075 }
1076
1077 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
1078 SkASSERT(index == 0);
1079 return kDim * kDim * sizeof(uint32_t);
1080 }
1081
1082 void prepareStorageBuffer(int index,
1083 const ResourceDesc&,
1084 void* buffer,
1085 size_t bufferSize) const override {
1086 SkASSERT(index == 0);
1087 SkASSERT(bufferSize == kDim * kDim * sizeof(uint32_t));
1088
1089 uint32_t* inputs = reinterpret_cast<uint32_t*>(buffer);
1090 for (uint32_t y = 0; y < kDim; ++y) {
1091 for (uint32_t x = 0; x < kDim; ++x) {
1092 uint32_t value =
1093 ((x * 256 / kDim) & 0xFF) | ((y * 256 / kDim) & 0xFF) << 8 | 255 << 24;
1094 *(inputs++) = value;
1095 }
1096 }
1097 }
1098
1099 std::tuple<SkISize, SkColorType> calculateTextureParameters(
1100 int index, const ResourceDesc& r) const override {
1101 SkASSERT(index == 1);
1102 return {{kDim, kDim}, kRGBA_8888_SkColorType};
1103 }
1104
1106 return WorkgroupSize(1, 1, 1);
1107 }
1108 } step;
1109
1110 DispatchGroup::Builder builder(recorder.get());
1111 if (!builder.appendStep(&step)) {
1112 ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");
1113 return;
1114 }
1115
1116 sk_sp<TextureProxy> dst = builder.getSharedTextureResource(1);
1117 if (!dst) {
1118 ERRORF(reporter, "shared resource at slot 1 is missing");
1119 return;
1120 }
1121
1122 // Record the compute task
1124 groups.push_back(builder.finalize());
1125 recorder->priv().add(ComputeTask::Make(std::move(groups)));
1126
1127 // Submit the work and wait for it to complete.
1128 std::unique_ptr<Recording> recording = recorder->snap();
1129 if (!recording) {
1130 ERRORF(reporter, "Failed to make recording");
1131 return;
1132 }
1133
1134 InsertRecordingInfo insertInfo;
1135 insertInfo.fRecording = recording.get();
1136 context->insertRecording(insertInfo);
1137 testContext->syncedSubmit(context);
1138
1140 SkImageInfo imgInfo =
1142 bitmap.allocPixels(imgInfo);
1143
1144 SkPixmap pixels;
1145 bool peekPixelsSuccess = bitmap.peekPixels(&pixels);
1146 REPORTER_ASSERT(reporter, peekPixelsSuccess);
1147
1148 bool readPixelsSuccess = context->priv().readPixels(pixels, dst.get(), imgInfo, 0, 0);
1149 REPORTER_ASSERT(reporter, readPixelsSuccess);
1150
1151 for (uint32_t x = 0; x < kDim; ++x) {
1152 for (uint32_t y = 0; y < kDim; ++y) {
1153 SkColor4f expected =
1154 SkColor4f::FromColor(SkColorSetARGB(255, x * 256 / kDim, y * 256 / kDim, 0));
1155 SkColor4f color = pixels.getColor4f(x, y);
1156 bool pass = true;
1157 for (int i = 0; i < 4; i++) {
1158 pass &= color[i] == expected[i];
1159 }
1161 "At position {%u, %u}, "
1162 "expected {%.1f, %.1f, %.1f, %.1f}, "
1163 "found {%.1f, %.1f, %.1f, %.1f}",
1164 x, y,
1165 expected.fR, expected.fG, expected.fB, expected.fA,
1166 color.fR, color.fG, color.fB, color.fA);
1167 }
1168 }
1169}
kUnpremul_SkAlphaType
@ kRGBA_8888_SkColorType
pixel with 8 bits for red, green, blue, alpha; in 32-bit word
Definition: SkColorType.h:24
static constexpr SkColor SkColorSetARGB(U8CPU a, U8CPU r, U8CPU g, U8CPU b)
Definition: SkColor.h:49
SkColor4f getColor4f(int x, int y) const
Definition: SkPixmap.cpp:388
virtual std::tuple< SkISize, SkColorType > calculateTextureParameters(int resourceIndex, const ResourceDesc &) const
Definition: ComputeStep.cpp:77
DlColor color
uint8_t value
double y
double x
Definition: bitmap.py:1
dst
Definition: cp.py:12
static SkImageInfo Make(int width, int height, SkColorType ct, SkAlphaType at)

◆ DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS() [10/15]

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS ( Compute_SampledTexture  ,
reporter  ,
context  ,
testContext   
)

Definition at line 1319 of file ComputeTest.cpp.

1322 {
1323 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1324
1325 // The first ComputeStep initializes a 16x16 texture with a checkerboard pattern of alternating
1326 // red and black pixels. The second ComputeStep downsamples this texture into a 4x4 using
1327 // bilinear filtering at pixel borders, intentionally averaging the values of each 4x4 tile in
1328 // the source texture, and writes the result to the destination texture.
1329 constexpr uint32_t kSrcDim = 16;
1330 constexpr uint32_t kDstDim = 4;
1331
1332 class TestComputeStep1 : public ComputeStep {
1333 public:
1334 TestComputeStep1() : ComputeStep(
1335 /*name=*/"Test_SampledTexture_Init",
1336 /*localDispatchSize=*/{kSrcDim, kSrcDim, 1},
1337 /*resources=*/{
1338 {
1340 /*flow=*/DataFlow::kShared,
1341 /*policy=*/ResourcePolicy::kNone,
1342 /*slot=*/0,
1343 /*sksl=*/"dst",
1344 }
1345 }) {}
1346 ~TestComputeStep1() override = default;
1347
1348 std::string computeSkSL() const override {
1349 return R"(
1350 void main() {
1351 uint2 c = sk_LocalInvocationID.xy;
1352 uint checkerBoardColor = (c.x + (c.y % 2)) % 2;
1353 textureWrite(dst, c, half4(checkerBoardColor, 0, 0, 1));
1354 }
1355 )";
1356 }
1357
1358 std::tuple<SkISize, SkColorType> calculateTextureParameters(
1359 int index, const ResourceDesc& r) const override {
1360 SkASSERT(index == 0);
1361 return {{kSrcDim, kSrcDim}, kRGBA_8888_SkColorType};
1362 }
1363
1365 return WorkgroupSize(1, 1, 1);
1366 }
1367 } step1;
1368
1369 class TestComputeStep2 : public ComputeStep {
1370 public:
1371 TestComputeStep2() : ComputeStep(
1372 /*name=*/"Test_SampledTexture_Sample",
1373 /*localDispatchSize=*/{kDstDim, kDstDim, 1},
1374 /*resources=*/{
1375 // Declare the storage texture before the sampled texture. This tests that
1376 // binding index assignment works consistently across all backends when a
1377 // sampler-less texture and a texture+sampler pair are intermixed and sampler
1378 // bindings aren't necessarily contiguous when the ranges are distinct.
1379 {
1381 /*flow=*/DataFlow::kShared,
1382 /*policy=*/ResourcePolicy::kNone,
1383 /*slot=*/1,
1384 /*sksl=*/"dst",
1385 },
1386 {
1388 /*flow=*/DataFlow::kShared,
1389 /*policy=*/ResourcePolicy::kNone,
1390 /*slot=*/0,
1391 /*sksl=*/"src",
1392 }
1393 }) {}
1394 ~TestComputeStep2() override = default;
1395
1396 std::string computeSkSL() const override {
1397 return R"(
1398 void main() {
1399 // Normalize the 4x4 invocation indices and sample the source texture using
1400 // that.
1401 uint2 dstCoord = sk_LocalInvocationID.xy;
1402 const float2 dstSizeInv = float2(0.25, 0.25);
1403 float2 unormCoord = float2(dstCoord) * dstSizeInv;
1404
1405 // Use explicit LOD, as quad derivatives are not available to a compute shader.
1406 half4 color = sampleLod(src, unormCoord, 0);
1407 textureWrite(dst, dstCoord, color);
1408 }
1409 )";
1410 }
1411
1412 std::tuple<SkISize, SkColorType> calculateTextureParameters(
1413 int index, const ResourceDesc& r) const override {
1414 SkASSERT(index == 0 || index == 1);
1415 return {{kDstDim, kDstDim}, kRGBA_8888_SkColorType};
1416 }
1417
1418 SamplerDesc calculateSamplerParameters(int index, const ResourceDesc&) const override {
1419 SkASSERT(index == 1);
1420 // Use the repeat tile mode to sample an infinite checkerboard.
1421 constexpr SkTileMode kTileModes[2] = {SkTileMode::kRepeat, SkTileMode::kRepeat};
1422 return {SkFilterMode::kLinear, kTileModes};
1423 }
1424
1426 return WorkgroupSize(1, 1, 1);
1427 }
1428 } step2;
1429
1430 DispatchGroup::Builder builder(recorder.get());
1431 builder.appendStep(&step1);
1432 builder.appendStep(&step2);
1433
1434 sk_sp<TextureProxy> dst = builder.getSharedTextureResource(1);
1435 if (!dst) {
1436 ERRORF(reporter, "shared resource at slot 1 is missing");
1437 return;
1438 }
1439
1440 // Record the compute task
1442 groups.push_back(builder.finalize());
1443 recorder->priv().add(ComputeTask::Make(std::move(groups)));
1444
1445 // Submit the work and wait for it to complete.
1446 std::unique_ptr<Recording> recording = recorder->snap();
1447 if (!recording) {
1448 ERRORF(reporter, "Failed to make recording");
1449 return;
1450 }
1451
1452 InsertRecordingInfo insertInfo;
1453 insertInfo.fRecording = recording.get();
1454 context->insertRecording(insertInfo);
1455 testContext->syncedSubmit(context);
1456
1458 SkImageInfo imgInfo =
1460 bitmap.allocPixels(imgInfo);
1461
1462 SkPixmap pixels;
1463 bool peekPixelsSuccess = bitmap.peekPixels(&pixels);
1464 REPORTER_ASSERT(reporter, peekPixelsSuccess);
1465
1466 bool readPixelsSuccess = context->priv().readPixels(pixels, dst.get(), imgInfo, 0, 0);
1467 REPORTER_ASSERT(reporter, readPixelsSuccess);
1468
1469 for (uint32_t x = 0; x < kDstDim; ++x) {
1470 for (uint32_t y = 0; y < kDstDim; ++y) {
1471 SkColor4f color = pixels.getColor4f(x, y);
1472 REPORTER_ASSERT(reporter, color.fR > 0.49 && color.fR < 0.51,
1473 "At position {%u, %u}, "
1474 "expected red channel in range [0.49, 0.51], "
1475 "found {%.3f}",
1476 x, y, color.fR);
1477 }
1478 }
1479}
SkTileMode
Definition: SkTileMode.h:13
virtual SamplerDesc calculateSamplerParameters(int resourceIndex, const ResourceDesc &) const
Definition: ComputeStep.cpp:83

◆ DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS() [11/15]

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS ( Compute_SingleDispatchTest  ,
reporter  ,
context  ,
testContext   
)

Definition at line 111 of file ComputeTest.cpp.

114 {
115 constexpr uint32_t kProblemSize = 512;
116 constexpr float kFactor = 4.f;
117
118 // The ComputeStep packs kProblemSize floats into kProblemSize / 4 vectors and each thread
119 // processes 1 vector at a time.
120 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
121
122 std::unique_ptr<Recorder> recorder = context->makeRecorder();
123
124 class TestComputeStep : public ComputeStep {
125 public:
126 // TODO(skia:40045541): SkSL doesn't support std430 layout well, so the buffers
127 // below all pack their data into vectors to be compatible with SPIR-V/WGSL.
128 TestComputeStep() : ComputeStep(
129 /*name=*/"TestArrayMultiply",
130 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
131 /*resources=*/{
132 // Input buffer:
133 {
134 // TODO(b/299979165): Declare this binding as read-only.
136 /*flow=*/DataFlow::kPrivate,
137 /*policy=*/ResourcePolicy::kMapped,
138 /*sksl=*/"inputBlock {\n"
139 " float factor;\n"
140 " layout(offset=16) float4 in_data[];\n"
141 "}",
142 },
143 // Output buffer:
144 {
146 /*flow=*/DataFlow::kShared, // shared to allow us to access it from the
147 // Builder
148 /*policy=*/ResourcePolicy::kMapped, // mappable for read-back
149 /*slot=*/0,
150 /*sksl=*/"outputBlock { float4 out_data[]; }",
151 }
152 }) {}
153 ~TestComputeStep() override = default;
154
155 // A kernel that multiplies a large array of floats by a supplied factor.
156 std::string computeSkSL() const override {
157 return R"(
158 void main() {
159 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor;
160 }
161 )";
162 }
163
164 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
165 if (index == 0) {
166 SkASSERT(r.fFlow == DataFlow::kPrivate);
167 return sizeof(float) * (kProblemSize + 4);
168 }
169 SkASSERT(index == 1);
170 SkASSERT(r.fSlot == 0);
171 SkASSERT(r.fFlow == DataFlow::kShared);
172 return sizeof(float) * kProblemSize;
173 }
174
175 void prepareStorageBuffer(int resourceIndex,
176 const ResourceDesc& r,
177 void* buffer,
178 size_t bufferSize) const override {
179 // Only initialize the input buffer.
180 if (resourceIndex != 0) {
181 return;
182 }
183 SkASSERT(r.fFlow == DataFlow::kPrivate);
184
185 size_t dataCount = sizeof(float) * (kProblemSize + 4);
186 SkASSERT(bufferSize == dataCount);
187 SkSpan<float> inData(static_cast<float*>(buffer), dataCount);
188 inData[0] = kFactor;
189 for (unsigned int i = 0; i < kProblemSize; ++i) {
190 inData[i + 4] = i + 1;
191 }
192 }
193
195 return WorkgroupSize(1, 1, 1);
196 }
197 } step;
198
199 DispatchGroup::Builder builder(recorder.get());
200 if (!builder.appendStep(&step)) {
201 ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");
202 return;
203 }
204
205 // The output buffer should have been placed in the right output slot.
206 BindBufferInfo outputInfo = builder.getSharedBufferResource(0);
207 if (!outputInfo) {
208 ERRORF(reporter, "Failed to allocate an output buffer at slot 0");
209 return;
210 }
211
212 // Record the compute task
214 groups.push_back(builder.finalize());
215 recorder->priv().add(ComputeTask::Make(std::move(groups)));
216
217 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
218 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.fBuffer);
219
220 // Submit the work and wait for it to complete.
221 std::unique_ptr<Recording> recording = recorder->snap();
222 if (!recording) {
223 ERRORF(reporter, "Failed to make recording");
224 return;
225 }
226
227 InsertRecordingInfo insertInfo;
228 insertInfo.fRecording = recording.get();
229 context->insertRecording(insertInfo);
230 testContext->syncedSubmit(context);
231
232 // Verify the contents of the output buffer.
233 float* outData = static_cast<float*>(
234 map_buffer(context, testContext, outputBuffer.get(), outputInfo.fOffset));
235 SkASSERT(outputBuffer->isMapped() && outData != nullptr);
236 for (unsigned int i = 0; i < kProblemSize; ++i) {
237 const float expected = (i + 1) * kFactor;
238 const float found = outData[i];
239 REPORTER_ASSERT(reporter, expected == found, "expected '%f', found '%f'", expected, found);
240 }
241}

◆ DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS() [12/15]

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS ( Compute_StorageTexture  ,
reporter  ,
context  ,
testContext   
)

Definition at line 770 of file ComputeTest.cpp.

773 {
774 std::unique_ptr<Recorder> recorder = context->makeRecorder();
775
776 // For this test we allocate a 16x16 tile which is written to by a single workgroup of the same
777 // size.
778 constexpr uint32_t kDim = 16;
779
780 class TestComputeStep : public ComputeStep {
781 public:
782 TestComputeStep() : ComputeStep(
783 /*name=*/"TestStorageTexture",
784 /*localDispatchSize=*/{kDim, kDim, 1},
785 /*resources=*/{
786 {
788 /*flow=*/DataFlow::kShared,
789 /*policy=*/ResourcePolicy::kNone,
790 /*slot=*/0,
791 /*sksl=*/"dst",
792 }
793 }) {}
794 ~TestComputeStep() override = default;
795
796 std::string computeSkSL() const override {
797 return R"(
798 void main() {
799 textureWrite(dst, sk_LocalInvocationID.xy, half4(0.0, 1.0, 0.0, 1.0));
800 }
801 )";
802 }
803
804 std::tuple<SkISize, SkColorType> calculateTextureParameters(
805 int index, const ResourceDesc& r) const override {
806 return {{kDim, kDim}, kRGBA_8888_SkColorType};
807 }
808
810 return WorkgroupSize(1, 1, 1);
811 }
812 } step;
813
814 DispatchGroup::Builder builder(recorder.get());
815 if (!builder.appendStep(&step)) {
816 ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");
817 return;
818 }
819
820 sk_sp<TextureProxy> texture = builder.getSharedTextureResource(0);
821 if (!texture) {
822 ERRORF(reporter, "Shared resource at slot 0 is missing");
823 return;
824 }
825
826 // Record the compute task
828 groups.push_back(builder.finalize());
829 recorder->priv().add(ComputeTask::Make(std::move(groups)));
830
831 // Submit the work and wait for it to complete.
832 std::unique_ptr<Recording> recording = recorder->snap();
833 if (!recording) {
834 ERRORF(reporter, "Failed to make recording");
835 return;
836 }
837
838 InsertRecordingInfo insertInfo;
839 insertInfo.fRecording = recording.get();
840 context->insertRecording(insertInfo);
841 testContext->syncedSubmit(context);
842
844 SkImageInfo imgInfo =
846 bitmap.allocPixels(imgInfo);
847
848 SkPixmap pixels;
849 bool peekPixelsSuccess = bitmap.peekPixels(&pixels);
850 REPORTER_ASSERT(reporter, peekPixelsSuccess);
851
852 bool readPixelsSuccess = context->priv().readPixels(pixels, texture.get(), imgInfo, 0, 0);
853 REPORTER_ASSERT(reporter, readPixelsSuccess);
854
855 for (uint32_t x = 0; x < kDim; ++x) {
856 for (uint32_t y = 0; y < kDim; ++y) {
857 SkColor4f expected = SkColor4f::FromColor(SK_ColorGREEN);
858 SkColor4f color = pixels.getColor4f(x, y);
859 REPORTER_ASSERT(reporter, expected == color,
860 "At position {%u, %u}, "
861 "expected {%.1f, %.1f, %.1f, %.1f}, "
862 "found {%.1f, %.1f, %.1f, %.1f}",
863 x, y,
864 expected.fR, expected.fG, expected.fB, expected.fA,
865 color.fR, color.fG, color.fB, color.fA);
866 }
867 }
868}
constexpr SkColor SK_ColorGREEN
Definition: SkColor.h:131
FlTexture * texture

◆ DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS() [13/15]

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS ( Compute_StorageTextureMultipleComputeSteps  ,
reporter  ,
context  ,
testContext   
)

Definition at line 1172 of file ComputeTest.cpp.

1175 {
1176 std::unique_ptr<Recorder> recorder = context->makeRecorder();
1177
1178 // For this test we allocate a 16x16 tile which is written to by a single workgroup of the same
1179 // size.
1180 constexpr uint32_t kDim = 16;
1181
1182 // Writes to a texture in slot 0.
1183 class TestComputeStep1 : public ComputeStep {
1184 public:
1185 TestComputeStep1() : ComputeStep(
1186 /*name=*/"TestStorageTexturesFirstPass",
1187 /*localDispatchSize=*/{kDim, kDim, 1},
1188 /*resources=*/{
1189 {
1191 /*flow=*/DataFlow::kShared,
1192 /*policy=*/ResourcePolicy::kNone,
1193 /*slot=*/0,
1194 /*sksl=*/"dst",
1195 }
1196 }) {}
1197 ~TestComputeStep1() override = default;
1198
1199 std::string computeSkSL() const override {
1200 return R"(
1201 void main() {
1202 textureWrite(dst, sk_LocalInvocationID.xy, half4(0.0, 1.0, 0.0, 1.0));
1203 }
1204 )";
1205 }
1206
1207 std::tuple<SkISize, SkColorType> calculateTextureParameters(
1208 int index, const ResourceDesc& r) const override {
1209 SkASSERT(index == 0);
1210 return {{kDim, kDim}, kRGBA_8888_SkColorType};
1211 }
1212
1214 return WorkgroupSize(1, 1, 1);
1215 }
1216 } step1;
1217
1218 // Reads from the texture in slot 0 and writes it to another texture in slot 1.
1219 class TestComputeStep2 : public ComputeStep {
1220 public:
1221 TestComputeStep2() : ComputeStep(
1222 /*name=*/"TestStorageTexturesSecondPass",
1223 /*localDispatchSize=*/{kDim, kDim, 1},
1224 /*resources=*/{
1225 {
1227 /*flow=*/DataFlow::kShared,
1228 /*policy=*/ResourcePolicy::kNone,
1229 /*slot=*/0,
1230 /*sksl=*/"src",
1231 },
1232 {
1234 /*flow=*/DataFlow::kShared,
1235 /*policy=*/ResourcePolicy::kNone,
1236 /*slot=*/1,
1237 /*sksl=*/"dst",
1238 }
1239 }) {}
1240 ~TestComputeStep2() override = default;
1241
1242 std::string computeSkSL() const override {
1243 return R"(
1244 void main() {
1245 half4 color = textureRead(src, sk_LocalInvocationID.xy);
1246 textureWrite(dst, sk_LocalInvocationID.xy, color);
1247 }
1248 )";
1249 }
1250
1251 std::tuple<SkISize, SkColorType> calculateTextureParameters(
1252 int index, const ResourceDesc& r) const override {
1253 SkASSERT(index == 1);
1254 return {{kDim, kDim}, kRGBA_8888_SkColorType};
1255 }
1256
1258 return WorkgroupSize(1, 1, 1);
1259 }
1260 } step2;
1261
1262 DispatchGroup::Builder builder(recorder.get());
1263 builder.appendStep(&step1);
1264 builder.appendStep(&step2);
1265
1266 sk_sp<TextureProxy> dst = builder.getSharedTextureResource(1);
1267 if (!dst) {
1268 ERRORF(reporter, "shared resource at slot 1 is missing");
1269 return;
1270 }
1271
1272 // Record the compute task
1274 groups.push_back(builder.finalize());
1275 recorder->priv().add(ComputeTask::Make(std::move(groups)));
1276
1277 // Submit the work and wait for it to complete.
1278 std::unique_ptr<Recording> recording = recorder->snap();
1279 if (!recording) {
1280 ERRORF(reporter, "Failed to make recording");
1281 return;
1282 }
1283
1284 InsertRecordingInfo insertInfo;
1285 insertInfo.fRecording = recording.get();
1286 context->insertRecording(insertInfo);
1287 testContext->syncedSubmit(context);
1288
1290 SkImageInfo imgInfo =
1292 bitmap.allocPixels(imgInfo);
1293
1294 SkPixmap pixels;
1295 bool peekPixelsSuccess = bitmap.peekPixels(&pixels);
1296 REPORTER_ASSERT(reporter, peekPixelsSuccess);
1297
1298 bool readPixelsSuccess = context->priv().readPixels(pixels, dst.get(), imgInfo, 0, 0);
1299 REPORTER_ASSERT(reporter, readPixelsSuccess);
1300
1301 for (uint32_t x = 0; x < kDim; ++x) {
1302 for (uint32_t y = 0; y < kDim; ++y) {
1303 SkColor4f expected = SkColor4f::FromColor(SK_ColorGREEN);
1304 SkColor4f color = pixels.getColor4f(x, y);
1305 REPORTER_ASSERT(reporter, expected == color,
1306 "At position {%u, %u}, "
1307 "expected {%.1f, %.1f, %.1f, %.1f}, "
1308 "found {%.1f, %.1f, %.1f, %.1f}",
1309 x, y,
1310 expected.fR, expected.fG, expected.fB, expected.fA,
1311 color.fR, color.fG, color.fB, color.fA);
1312 }
1313 }
1314}

◆ DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS() [14/15]

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS ( Compute_StorageTextureReadAndWrite  ,
reporter  ,
context  ,
testContext   
)

Definition at line 872 of file ComputeTest.cpp.

875 {
876 std::unique_ptr<Recorder> recorder = context->makeRecorder();
877
878 // For this test we allocate a 16x16 tile which is written to by a single workgroup of the same
879 // size.
880 constexpr uint32_t kDim = 16;
881
882 class TestComputeStep : public ComputeStep {
883 public:
884 TestComputeStep() : ComputeStep(
885 /*name=*/"TestStorageTextureReadAndWrite",
886 /*localDispatchSize=*/{kDim, kDim, 1},
887 /*resources=*/{
888 {
890 /*flow=*/DataFlow::kShared,
891 /*policy=*/ResourcePolicy::kNone,
892 /*slot=*/0,
893 /*sksl=*/"src",
894 },
895 {
897 /*flow=*/DataFlow::kShared,
898 /*policy=*/ResourcePolicy::kNone,
899 /*slot=*/1,
900 /*sksl=*/"dst",
901 }
902 }) {}
903 ~TestComputeStep() override = default;
904
905 std::string computeSkSL() const override {
906 return R"(
907 void main() {
908 half4 color = textureRead(src, sk_LocalInvocationID.xy);
909 textureWrite(dst, sk_LocalInvocationID.xy, color);
910 }
911 )";
912 }
913
914 std::tuple<SkISize, SkColorType> calculateTextureParameters(
915 int index, const ResourceDesc& r) const override {
916 SkASSERT(index == 1);
917 return {{kDim, kDim}, kRGBA_8888_SkColorType};
918 }
919
921 return WorkgroupSize(1, 1, 1);
922 }
923 } step;
924
925 // Create and populate an input texture.
926 SkBitmap srcBitmap;
927 SkImageInfo srcInfo =
929 srcBitmap.allocPixels(srcInfo);
930 SkPixmap srcPixels;
931 bool srcPeekPixelsSuccess = srcBitmap.peekPixels(&srcPixels);
932 REPORTER_ASSERT(reporter, srcPeekPixelsSuccess);
933 for (uint32_t x = 0; x < kDim; ++x) {
934 for (uint32_t y = 0; y < kDim; ++y) {
935 *srcPixels.writable_addr32(x, y) =
936 SkColorSetARGB(255, x * 256 / kDim, y * 256 / kDim, 0);
937 }
938 }
939
940 auto texInfo = context->priv().caps()->getDefaultSampledTextureInfo(kRGBA_8888_SkColorType,
944 sk_sp<TextureProxy> srcProxy = TextureProxy::Make(context->priv().caps(),
945 recorder->priv().resourceProvider(),
946 {kDim, kDim},
947 texInfo,
948 "ComputeTestSrcProxy",
950 MipLevel mipLevel;
951 mipLevel.fPixels = srcPixels.addr();
952 mipLevel.fRowBytes = srcPixels.rowBytes();
954 srcProxy,
955 srcPixels.info().colorInfo(),
956 srcPixels.info().colorInfo(),
957 {mipLevel},
958 SkIRect::MakeWH(kDim, kDim),
959 std::make_unique<ImageUploadContext>());
960 if (!upload.isValid()) {
961 ERRORF(reporter, "Could not create UploadInstance");
962 return;
963 }
964 recorder->priv().add(UploadTask::Make(std::move(upload)));
965
966 DispatchGroup::Builder builder(recorder.get());
967
968 // Assign the input texture to slot 0. This corresponds to the ComputeStep's "src" texture
969 // binding.
970 builder.assignSharedTexture(std::move(srcProxy), 0);
971
972 if (!builder.appendStep(&step)) {
973 ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");
974 return;
975 }
976
977 sk_sp<TextureProxy> dst = builder.getSharedTextureResource(1);
978 if (!dst) {
979 ERRORF(reporter, "shared resource at slot 1 is missing");
980 return;
981 }
982
983 // Record the compute task
985 groups.push_back(builder.finalize());
986 recorder->priv().add(ComputeTask::Make(std::move(groups)));
987
988 // Submit the work and wait for it to complete.
989 std::unique_ptr<Recording> recording = recorder->snap();
990 if (!recording) {
991 ERRORF(reporter, "Failed to make recording");
992 return;
993 }
994
995 InsertRecordingInfo insertInfo;
996 insertInfo.fRecording = recording.get();
997 context->insertRecording(insertInfo);
998 testContext->syncedSubmit(context);
999
1001 SkImageInfo imgInfo =
1003 bitmap.allocPixels(imgInfo);
1004
1005 SkPixmap pixels;
1006 bool peekPixelsSuccess = bitmap.peekPixels(&pixels);
1007 REPORTER_ASSERT(reporter, peekPixelsSuccess);
1008
1009 bool readPixelsSuccess = context->priv().readPixels(pixels, dst.get(), imgInfo, 0, 0);
1010 REPORTER_ASSERT(reporter, readPixelsSuccess);
1011
1012 for (uint32_t x = 0; x < kDim; ++x) {
1013 for (uint32_t y = 0; y < kDim; ++y) {
1014 SkColor4f expected = SkColor4f::FromBytes_RGBA(
1015 SkColorSetARGB(255, x * 256 / kDim, y * 256 / kDim, 0));
1016 SkColor4f color = pixels.getColor4f(x, y);
1017 REPORTER_ASSERT(reporter, expected == color,
1018 "At position {%u, %u}, "
1019 "expected {%.1f, %.1f, %.1f, %.1f}, "
1020 "found {%.1f, %.1f, %.1f, %.1f}",
1021 x, y,
1022 expected.fR, expected.fG, expected.fB, expected.fA,
1023 color.fR, color.fG, color.fB, color.fA);
1024 }
1025 }
1026}
void allocPixels(const SkImageInfo &info, size_t rowBytes)
Definition: SkBitmap.cpp:258
bool peekPixels(SkPixmap *pixmap) const
Definition: SkBitmap.cpp:635
size_t rowBytes() const
Definition: SkPixmap.h:145
const SkImageInfo & info() const
Definition: SkPixmap.h:135
uint32_t * writable_addr32(int x, int y) const
Definition: SkPixmap.h:537
const void * addr() const
Definition: SkPixmap.h:153
Definition: upload.py:1
static constexpr SkIRect MakeWH(int32_t w, int32_t h)
Definition: SkRect.h:56
const SkColorInfo & colorInfo() const
Definition: SkImageInfo.h:404

◆ DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS() [15/15]

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS ( Compute_UniformBufferTest  ,
reporter  ,
context  ,
testContext   
)

Definition at line 496 of file ComputeTest.cpp.

499 {
500 // TODO(b/315834710): This fails on Dawn D3D11
501 if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {
502 return;
503 }
504
505 constexpr uint32_t kProblemSize = 512;
506 constexpr float kFactor = 4.f;
507
508 // The ComputeStep packs kProblemSize floats into kProblemSize / 4 vectors and each thread
509 // processes 1 vector at a time.
510 constexpr uint32_t kWorkgroupSize = kProblemSize / 4;
511
512 std::unique_ptr<Recorder> recorder = context->makeRecorder();
513
514 class TestComputeStep : public ComputeStep {
515 public:
516 TestComputeStep() : ComputeStep(
517 /*name=*/"TestArrayMultiply",
518 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
519 /*resources=*/{
520 // Uniform buffer:
521 {
523 /*flow=*/DataFlow::kPrivate,
524 /*policy=*/ResourcePolicy::kMapped,
525 /*sksl=*/"uniformBlock { float factor; }"
526 },
527 // Input buffer:
528 {
530 /*flow=*/DataFlow::kPrivate,
531 /*policy=*/ResourcePolicy::kMapped,
532 /*sksl=*/"inputBlock { float4 in_data[]; }",
533 },
534 // Output buffer:
535 {
537 /*flow=*/DataFlow::kShared, // shared to allow us to access it from the
538 // Builder
539 /*policy=*/ResourcePolicy::kMapped, // mappable for read-back
540 /*slot=*/0,
541 /*sksl=*/"outputBlock { float4 out_data[]; }",
542 }
543 }) {}
544 ~TestComputeStep() override = default;
545
546 // A kernel that multiplies a large array of floats by a supplied factor.
547 std::string computeSkSL() const override {
548 return R"(
549 void main() {
550 out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor;
551 }
552 )";
553 }
554
555 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
556 if (index == 0) {
557 SkASSERT(r.fFlow == DataFlow::kPrivate);
558 return sizeof(float);
559 }
560 if (index == 1) {
561 SkASSERT(r.fFlow == DataFlow::kPrivate);
562 return sizeof(float) * kProblemSize;
563 }
564 SkASSERT(index == 2);
565 SkASSERT(r.fSlot == 0);
566 SkASSERT(r.fFlow == DataFlow::kShared);
567 return sizeof(float) * kProblemSize;
568 }
569
570 void prepareStorageBuffer(int resourceIndex,
571 const ResourceDesc& r,
572 void* buffer,
573 size_t bufferSize) const override {
574 // Only initialize the input storage buffer.
575 if (resourceIndex != 1) {
576 return;
577 }
578 SkASSERT(r.fFlow == DataFlow::kPrivate);
579 size_t dataCount = sizeof(float) * kProblemSize;
580 SkASSERT(bufferSize == dataCount);
581 SkSpan<float> inData(static_cast<float*>(buffer), dataCount);
582 for (unsigned int i = 0; i < kProblemSize; ++i) {
583 inData[i] = i + 1;
584 }
585 }
586
587 void prepareUniformBuffer(int resourceIndex,
588 const ResourceDesc&,
589 UniformManager* mgr) const override {
590 SkASSERT(resourceIndex == 0);
592 const Uniform uniforms[] = {{"factor", SkSLType::kFloat}};
593 mgr->setExpectedUniforms(uniforms);
594 )
595 mgr->write(kFactor);
596 }
597
599 return WorkgroupSize(1, 1, 1);
600 }
601 } step;
602
603 DispatchGroup::Builder builder(recorder.get());
604 if (!builder.appendStep(&step)) {
605 ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");
606 return;
607 }
608
609 // The output buffer should have been placed in the right output slot.
610 BindBufferInfo outputInfo = builder.getSharedBufferResource(0);
611 if (!outputInfo) {
612 ERRORF(reporter, "Failed to allocate an output buffer at slot 0");
613 return;
614 }
615
616 // Record the compute task
618 groups.push_back(builder.finalize());
619 recorder->priv().add(ComputeTask::Make(std::move(groups)));
620
621 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
622 auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.fBuffer);
623
624 // Submit the work and wait for it to complete.
625 std::unique_ptr<Recording> recording = recorder->snap();
626 if (!recording) {
627 ERRORF(reporter, "Failed to make recording");
628 return;
629 }
630
631 InsertRecordingInfo insertInfo;
632 insertInfo.fRecording = recording.get();
633 context->insertRecording(insertInfo);
634 testContext->syncedSubmit(context);
635
636 // Verify the contents of the output buffer.
637 float* outData = static_cast<float*>(
638 map_buffer(context, testContext, outputBuffer.get(), outputInfo.fOffset));
639 SkASSERT(outputBuffer->isMapped() && outData != nullptr);
640 for (unsigned int i = 0; i < kProblemSize; ++i) {
641 const float expected = (i + 1) * kFactor;
642 const float found = outData[i];
643 REPORTER_ASSERT(reporter, expected == found, "expected '%f', found '%f'", expected, found);
644 }
645}
SkDEBUGCODE(SK_SPI) SkThreadID SkGetThreadID()
virtual void prepareUniformBuffer(int resourceIndex, const ResourceDesc &, UniformManager *) const
Definition: ComputeStep.cpp:58

◆ DEF_GRAPHITE_TEST_FOR_DAWN_CONTEXT()

DEF_GRAPHITE_TEST_FOR_DAWN_CONTEXT ( Compute_NativeShaderSourceWGSL  ,
reporter  ,
context  ,
testContext   
)

Definition at line 2525 of file ComputeTest.cpp.

2525 {
2526 // This fails on Dawn D3D11, b/315834710
2527 if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {
2528 return;
2529 }
2530
2531 std::unique_ptr<Recorder> recorder = context->makeRecorder();
2532
2533 constexpr uint32_t kWorkgroupCount = 32;
2534 constexpr uint32_t kWorkgroupSize = 256; // The WebGPU default workgroup size limit is 256
2535
2536 class TestComputeStep : public ComputeStep {
2537 public:
2538 TestComputeStep() : ComputeStep(
2539 /*name=*/"TestAtomicOperationsWGSL",
2540 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
2541 /*resources=*/{
2542 {
2544 /*flow=*/DataFlow::kShared,
2545 /*policy=*/ResourcePolicy::kMapped,
2546 /*slot=*/0,
2547 }
2548 },
2549 /*workgroupBuffers=*/{},
2550 /*baseFlags=*/Flags::kSupportsNativeShader) {}
2551 ~TestComputeStep() override = default;
2552
2553 NativeShaderSource nativeShaderSource(NativeShaderFormat format) const override {
2554 SkASSERT(format == NativeShaderFormat::kWGSL);
2555 static constexpr std::string_view kSource = R"(
2556 @group(0) @binding(0) var<storage, read_write> globalCounter: atomic<u32>;
2557
2558 var<workgroup> localCounter: atomic<u32>;
2559
2560 @compute @workgroup_size(256)
2561 fn atomicCount(@builtin(local_invocation_id) localId: vec3u) {
2562 // Initialize the local counter.
2563 if localId.x == 0u {
2564 atomicStore(&localCounter, 0u);
2565 }
2566
2567 // Synchronize the threads in the workgroup so they all see the initial value.
2568 workgroupBarrier();
2569
2570 // All threads increment the counter.
2571 atomicAdd(&localCounter, 1u);
2572
2573 // Synchronize the threads again to ensure they have all executed the increment
2574 // and the following load reads the same value across all threads in the
2575 // workgroup.
2576 workgroupBarrier();
2577
2578 // Add the workgroup-only tally to the global counter.
2579 if localId.x == 0u {
2580 let tally = atomicLoad(&localCounter);
2581 atomicAdd(&globalCounter, tally);
2582 }
2583 }
2584 )";
2585 return {kSource, "atomicCount"};
2586 }
2587
2588 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
2589 SkASSERT(index == 0);
2590 SkASSERT(r.fSlot == 0);
2591 SkASSERT(r.fFlow == DataFlow::kShared);
2592 return sizeof(uint32_t);
2593 }
2594
2596 return WorkgroupSize(kWorkgroupCount, 1, 1);
2597 }
2598
2599 void prepareStorageBuffer(int resourceIndex,
2600 const ResourceDesc& r,
2601 void* buffer,
2602 size_t bufferSize) const override {
2603 SkASSERT(resourceIndex == 0);
2604 *static_cast<uint32_t*>(buffer) = 0;
2605 }
2606 } step;
2607
2608 DispatchGroup::Builder builder(recorder.get());
2609 builder.appendStep(&step);
2610
2611 BindBufferInfo info = builder.getSharedBufferResource(0);
2612 if (!info) {
2613 ERRORF(reporter, "shared resource at slot 0 is missing");
2614 return;
2615 }
2616
2617 // Record the compute pass task.
2619 groups.push_back(builder.finalize());
2620 recorder->priv().add(ComputeTask::Make(std::move(groups)));
2621
2622 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
2623 auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer);
2624
2625 // Submit the work and wait for it to complete.
2626 std::unique_ptr<Recording> recording = recorder->snap();
2627 if (!recording) {
2628 ERRORF(reporter, "Failed to make recording");
2629 return;
2630 }
2631
2632 InsertRecordingInfo insertInfo;
2633 insertInfo.fRecording = recording.get();
2634 context->insertRecording(insertInfo);
2635 testContext->syncedSubmit(context);
2636
2637 // Verify the contents of the output buffer.
2638 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
2639 const uint32_t result = static_cast<const uint32_t*>(
2640 map_buffer(context, testContext, buffer.get(), info.fOffset))[0];
2642 result == kExpectedCount,
2643 "expected '%u', found '%u'",
2644 kExpectedCount,
2645 result);
2646}
virtual NativeShaderSource nativeShaderSource(NativeShaderFormat) const
Definition: ComputeStep.cpp:67
uint32_t uint32_t * format

◆ DEF_GRAPHITE_TEST_FOR_METAL_CONTEXT() [1/2]

DEF_GRAPHITE_TEST_FOR_METAL_CONTEXT ( Compute_NativeShaderSourceMetal  ,
reporter  ,
context  ,
testContext   
)

Definition at line 2272 of file ComputeTest.cpp.

2275 {
2276 std::unique_ptr<Recorder> recorder = context->makeRecorder();
2277
2278 constexpr uint32_t kWorkgroupCount = 32;
2279 constexpr uint32_t kWorkgroupSize = 1024;
2280
2281 class TestComputeStep : public ComputeStep {
2282 public:
2283 TestComputeStep() : ComputeStep(
2284 /*name=*/"TestAtomicOperationsMetal",
2285 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
2286 /*resources=*/{
2287 {
2289 /*flow=*/DataFlow::kShared,
2290 /*policy=*/ResourcePolicy::kMapped,
2291 /*slot=*/0,
2292 }
2293 },
2294 /*workgroupBuffers=*/{},
2295 /*baseFlags=*/Flags::kSupportsNativeShader) {}
2296 ~TestComputeStep() override = default;
2297
2298 NativeShaderSource nativeShaderSource(NativeShaderFormat format) const override {
2299 SkASSERT(format == NativeShaderFormat::kMSL);
2300 static constexpr std::string_view kSource = R"(
2301 #include <metal_stdlib>
2302
2303 using namespace metal;
2304
2305 kernel void atomicCount(uint3 localId [[thread_position_in_threadgroup]],
2306 device atomic_uint& globalCounter [[buffer(0)]]) {
2307 threadgroup atomic_uint localCounter;
2308
2309 // Initialize the local counter.
2310 if (localId.x == 0u) {
2311 atomic_store_explicit(&localCounter, 0u, memory_order_relaxed);
2312 }
2313
2314 // Synchronize the threads in the workgroup so they all see the initial value.
2315 threadgroup_barrier(mem_flags::mem_threadgroup);
2316
2317 // All threads increment the counter.
2318 atomic_fetch_add_explicit(&localCounter, 1u, memory_order_relaxed);
2319
2320 // Synchronize the threads again to ensure they have all executed the increment
2321 // and the following load reads the same value across all threads in the
2322 // workgroup.
2323 threadgroup_barrier(mem_flags::mem_threadgroup);
2324
2325 // Add the workgroup-only tally to the global counter.
2326 if (localId.x == 0u) {
2327 uint tally = atomic_load_explicit(&localCounter, memory_order_relaxed);
2328 atomic_fetch_add_explicit(&globalCounter, tally, memory_order_relaxed);
2329 }
2330 }
2331 )";
2332 return {kSource, "atomicCount"};
2333 }
2334
2335 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
2336 SkASSERT(index == 0);
2337 SkASSERT(r.fSlot == 0);
2338 SkASSERT(r.fFlow == DataFlow::kShared);
2339 return sizeof(uint32_t);
2340 }
2341
2343 return WorkgroupSize(kWorkgroupCount, 1, 1);
2344 }
2345
2346 void prepareStorageBuffer(int resourceIndex,
2347 const ResourceDesc& r,
2348 void* buffer,
2349 size_t bufferSize) const override {
2350 SkASSERT(resourceIndex == 0);
2351 *static_cast<uint32_t*>(buffer) = 0;
2352 }
2353 } step;
2354
2355 DispatchGroup::Builder builder(recorder.get());
2356 builder.appendStep(&step);
2357
2358 BindBufferInfo info = builder.getSharedBufferResource(0);
2359 if (!info) {
2360 ERRORF(reporter, "shared resource at slot 0 is missing");
2361 return;
2362 }
2363
2364 // Record the compute pass task.
2366 groups.push_back(builder.finalize());
2367 recorder->priv().add(ComputeTask::Make(std::move(groups)));
2368
2369 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
2370 auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer);
2371
2372 // Submit the work and wait for it to complete.
2373 std::unique_ptr<Recording> recording = recorder->snap();
2374 if (!recording) {
2375 ERRORF(reporter, "Failed to make recording");
2376 return;
2377 }
2378
2379 InsertRecordingInfo insertInfo;
2380 insertInfo.fRecording = recording.get();
2381 context->insertRecording(insertInfo);
2382 testContext->syncedSubmit(context);
2383
2384 // Verify the contents of the output buffer.
2385 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
2386 const uint32_t result = static_cast<const uint32_t*>(
2387 map_buffer(context, testContext, buffer.get(), info.fOffset))[0];
2389 result == kExpectedCount,
2390 "expected '%u', found '%u'",
2391 kExpectedCount,
2392 result);
2393}

◆ DEF_GRAPHITE_TEST_FOR_METAL_CONTEXT() [2/2]

DEF_GRAPHITE_TEST_FOR_METAL_CONTEXT ( Compute_WorkgroupBufferDescMetal  ,
reporter  ,
context  ,
testContext   
)

Definition at line 2395 of file ComputeTest.cpp.

2398 {
2399 std::unique_ptr<Recorder> recorder = context->makeRecorder();
2400
2401 constexpr uint32_t kWorkgroupCount = 32;
2402 constexpr uint32_t kWorkgroupSize = 1024;
2403
2404 class TestComputeStep : public ComputeStep {
2405 public:
2406 TestComputeStep() : ComputeStep(
2407 /*name=*/"TestAtomicOperationsMetal",
2408 /*localDispatchSize=*/{kWorkgroupSize, 1, 1},
2409 /*resources=*/{
2410 {
2412 /*flow=*/DataFlow::kShared,
2413 /*policy=*/ResourcePolicy::kMapped,
2414 /*slot=*/0,
2415 }
2416 },
2417 /*workgroupBuffers=*/{
2418 {
2419 /*size=*/sizeof(uint32_t),
2420 /*index=*/0u,
2421 }
2422 },
2423 /*baseFlags=*/Flags::kSupportsNativeShader) {}
2424 ~TestComputeStep() override = default;
2425
2426 // This is the same MSL kernel as in Compute_NativeShaderSourceMetal, except `localCounter`
2427 // is an entry-point parameter instead of a local variable. This forces the workgroup
2428 // binding to be encoded explicitly in the command encoder.
2429 NativeShaderSource nativeShaderSource(NativeShaderFormat format) const override {
2430 SkASSERT(format == NativeShaderFormat::kMSL);
2431 static constexpr std::string_view kSource = R"(
2432 #include <metal_stdlib>
2433
2434 using namespace metal;
2435
2436 kernel void atomicCount(uint3 localId [[thread_position_in_threadgroup]],
2437 device atomic_uint& globalCounter [[buffer(0)]],
2438 threadgroup atomic_uint& localCounter [[threadgroup(0)]]) {
2439 // Initialize the local counter.
2440 if (localId.x == 0u) {
2441 atomic_store_explicit(&localCounter, 0u, memory_order_relaxed);
2442 }
2443
2444 // Synchronize the threads in the workgroup so they all see the initial value.
2445 threadgroup_barrier(mem_flags::mem_threadgroup);
2446
2447 // All threads increment the counter.
2448 atomic_fetch_add_explicit(&localCounter, 1u, memory_order_relaxed);
2449
2450 // Synchronize the threads again to ensure they have all executed the increment
2451 // and the following load reads the same value across all threads in the
2452 // workgroup.
2453 threadgroup_barrier(mem_flags::mem_threadgroup);
2454
2455 // Add the workgroup-only tally to the global counter.
2456 if (localId.x == 0u) {
2457 uint tally = atomic_load_explicit(&localCounter, memory_order_relaxed);
2458 atomic_fetch_add_explicit(&globalCounter, tally, memory_order_relaxed);
2459 }
2460 }
2461 )";
2462 return {kSource, "atomicCount"};
2463 }
2464
2465 size_t calculateBufferSize(int index, const ResourceDesc& r) const override {
2466 SkASSERT(index == 0);
2467 SkASSERT(r.fSlot == 0);
2468 SkASSERT(r.fFlow == DataFlow::kShared);
2469 return sizeof(uint32_t);
2470 }
2471
2473 return WorkgroupSize(kWorkgroupCount, 1, 1);
2474 }
2475
2476 void prepareStorageBuffer(int resourceIndex,
2477 const ResourceDesc& r,
2478 void* buffer,
2479 size_t bufferSize) const override {
2480 SkASSERT(resourceIndex == 0);
2481 *static_cast<uint32_t*>(buffer) = 0;
2482 }
2483 } step;
2484
2485 DispatchGroup::Builder builder(recorder.get());
2486 builder.appendStep(&step);
2487
2488 BindBufferInfo info = builder.getSharedBufferResource(0);
2489 if (!info) {
2490 ERRORF(reporter, "shared resource at slot 0 is missing");
2491 return;
2492 }
2493
2494 // Record the compute pass task.
2496 groups.push_back(builder.finalize());
2497 recorder->priv().add(ComputeTask::Make(std::move(groups)));
2498
2499 // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
2500 auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer);
2501
2502 // Submit the work and wait for it to complete.
2503 std::unique_ptr<Recording> recording = recorder->snap();
2504 if (!recording) {
2505 ERRORF(reporter, "Failed to make recording");
2506 return;
2507 }
2508
2509 InsertRecordingInfo insertInfo;
2510 insertInfo.fRecording = recording.get();
2511 context->insertRecording(insertInfo);
2512 testContext->syncedSubmit(context);
2513
2514 // Verify the contents of the output buffer.
2515 constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;
2516 const uint32_t result = static_cast<const uint32_t*>(
2517 map_buffer(context, testContext, buffer.get(), info.fOffset))[0];
2519 result == kExpectedCount,
2520 "expected '%u', found '%u'",
2521 kExpectedCount,
2522 result);
2523}