d8/dea/ComputeTest_8cpp_source.html

/*

 * Copyright 2022 Google LLC

 *

 * Use of this source code is governed by a BSD-style license that can be

 * found in the LICENSE file.

 */


#include "tests/Test.h"


#include "include/core/SkBitmap.h"

#include "include/gpu/graphite/Context.h"

#include "include/gpu/graphite/Recorder.h"

#include "include/gpu/graphite/Recording.h"

#include "src/gpu/graphite/Buffer.h"

#include "src/gpu/graphite/Caps.h"

#include "src/gpu/graphite/ComputePipelineDesc.h"

#include "src/gpu/graphite/ComputeTypes.h"

#include "src/gpu/graphite/ContextPriv.h"

#include "src/gpu/graphite/RecorderPriv.h"

#include "src/gpu/graphite/ResourceProvider.h"

#include "src/gpu/graphite/UniformManager.h"

#include "src/gpu/graphite/compute/ComputeStep.h"

#include "src/gpu/graphite/compute/DispatchGroup.h"

#include "src/gpu/graphite/task/ComputeTask.h"

#include "src/gpu/graphite/task/CopyTask.h"

#include "src/gpu/graphite/task/SynchronizeToCpuTask.h"

#include "src/gpu/graphite/task/UploadTask.h"


#include "tools/graphite/GraphiteTestContext.h"


using namespace skgpu::graphite;

using namespace skiatest::graphite;


namespace {


void* map_buffer(Context* context,

                 skiatest::graphite::GraphiteTestContext* testContext,

                 Buffer* buffer,

                 size_t offset) {

    SkASSERT(buffer);

    if (context->priv().caps()->bufferMapsAreAsync()) {

        buffer->asyncMap();

        while (!buffer->isMapped()) {

            testContext->tick();

        }

    }

    std::byte* ptr = static_cast<std::byte*>(buffer->map());

    SkASSERT(ptr);


    return ptr + offset;

}


sk_sp<Buffer> sync_buffer_to_cpu(Recorder* recorder, const Buffer* buffer) {

    if (recorder->priv().caps()->drawBufferCanBeMappedForReadback()) {

        // `buffer` can be mapped directly, however it may still require a synchronization step

        // by the underlying API (e.g. a managed buffer in Metal). SynchronizeToCpuTask

        // automatically handles this for us.

        recorder->priv().add(SynchronizeToCpuTask::Make(sk_ref_sp(buffer)));

        return sk_ref_sp(buffer);

    }


    // The backend requires a transfer buffer for CPU read-back

    auto xferBuffer =

            recorder->priv().resourceProvider()->findOrCreateBuffer(buffer->size(),

                                                                    BufferType::kXferGpuToCpu,

                                                                    AccessPattern::kHostVisible,

                                                                    "ComputeTest_TransferToCpu");

    SkASSERT(xferBuffer);


    recorder->priv().add(CopyBufferToBufferTask::Make(buffer,

                                                      /*srcOffset=*/0,

                                                      xferBuffer,

                                                      /*dstOffset=*/0,

                                                      buffer->size()));

    return xferBuffer;

}


std::unique_ptr<Recording> submit_recording(Context* context,

                                            GraphiteTestContext* testContext,

                                            Recorder* recorder) {

    std::unique_ptr<Recording> recording = recorder->snap();

    if (!recording) {

        return nullptr;

    }


    InsertRecordingInfo insertInfo;

    insertInfo.fRecording = recording.get();

    context->insertRecording(insertInfo);

    testContext->syncedSubmit(context);


    return recording;

}


bool is_dawn_or_metal_context_type(skiatest::GpuContextType ctxType) {

    return skiatest::IsDawnContextType(ctxType) || skiatest::IsMetalContextType(ctxType);

}


}  // namespace


#define DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(            \

        name, reporter, graphite_context, test_context)           \

    DEF_GRAPHITE_TEST_FOR_CONTEXTS(name,                          \

                                   is_dawn_or_metal_context_type, \

                                   reporter,                      \

                                   graphite_context,              \

                                   test_context,                  \

                                   CtsEnforcement::kNever)


// TODO(b/262427430, b/262429132): Enable this test on other backends once they all support

// compute programs.

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_SingleDispatchTest,

                                              reporter,

                                              context,

                                              testContext) {

    constexpr uint32_t kProblemSize = 512;

    constexpr float kFactor = 4.f;


    // The ComputeStep packs kProblemSize floats into kProblemSize / 4 vectors and each thread

    // processes 1 vector at a time.

    constexpr uint32_t kWorkgroupSize = kProblemSize / 4;


    std::unique_ptr<Recorder> recorder = context->makeRecorder();


    class TestComputeStep : public ComputeStep {

    public:

        // TODO(skia:40045541): SkSL doesn't support std430 layout well, so the buffers

        // below all pack their data into vectors to be compatible with SPIR-V/WGSL.

        TestComputeStep() : ComputeStep(

                /*name=*/"TestArrayMultiply",

                /*localDispatchSize=*/{kWorkgroupSize, 1, 1},

                /*resources=*/{

                    // Input buffer:

                    {

                        // TODO(b/299979165): Declare this binding as read-only.

                        /*type=*/ResourceType::kStorageBuffer,

                        /*flow=*/DataFlow::kPrivate,

                        /*policy=*/ResourcePolicy::kMapped,

                        /*sksl=*/"inputBlock {\n"

                            "    float factor;\n"

                            "    layout(offset=16) float4 in_data[];\n"

                            "}",

                    },

                    // Output buffer:

                    {

                        /*type=*/ResourceType::kStorageBuffer,

                        /*flow=*/DataFlow::kShared,  // shared to allow us to access it from the

                                                     // Builder

                        /*policy=*/ResourcePolicy::kMapped,  // mappable for read-back

                        /*slot=*/0,

                        /*sksl=*/"outputBlock { float4 out_data[]; }",

                    }

                }) {}

        ~TestComputeStep() override = default;


        // A kernel that multiplies a large array of floats by a supplied factor.

        std::string computeSkSL() const override {

            return R"(

                void main() {

                    out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor;

                }

            )";

        }


        size_t calculateBufferSize(int index, const ResourceDesc& r) const override {

            if (index == 0) {

                SkASSERT(r.fFlow == DataFlow::kPrivate);

                return sizeof(float) * (kProblemSize + 4);

            }

            SkASSERT(index == 1);

            SkASSERT(r.fSlot == 0);

            SkASSERT(r.fFlow == DataFlow::kShared);

            return sizeof(float) * kProblemSize;

        }


        void prepareStorageBuffer(int resourceIndex,

                                  const ResourceDesc& r,

                                  void* buffer,

                                  size_t bufferSize) const override {

            // Only initialize the input buffer.

            if (resourceIndex != 0) {

                return;

            }

            SkASSERT(r.fFlow == DataFlow::kPrivate);


            size_t dataCount = sizeof(float) * (kProblemSize + 4);

            SkASSERT(bufferSize == dataCount);

            SkSpan<float> inData(static_cast<float*>(buffer), dataCount);

            inData[0] = kFactor;

            for (unsigned int i = 0; i < kProblemSize; ++i) {

                inData[i + 4] = i + 1;

            }

        }


        WorkgroupSize calculateGlobalDispatchSize() const override {

            return WorkgroupSize(1, 1, 1);

        }

    } step;


    DispatchGroup::Builder builder(recorder.get());

    if (!builder.appendStep(&step)) {

        ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");

        return;

    }


    // The output buffer should have been placed in the right output slot.

    BindBufferInfo outputInfo = builder.getSharedBufferResource(0);

    if (!outputInfo) {

        ERRORF(reporter, "Failed to allocate an output buffer at slot 0");

        return;

    }


    // Record the compute task

    ComputeTask::DispatchGroupList groups;

    groups.push_back(builder.finalize());

    recorder->priv().add(ComputeTask::Make(std::move(groups)));


    // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.

    auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.fBuffer);


    // Submit the work and wait for it to complete.

    std::unique_ptr<Recording> recording = recorder->snap();

    if (!recording) {

        ERRORF(reporter, "Failed to make recording");

        return;

    }


    InsertRecordingInfo insertInfo;

    insertInfo.fRecording = recording.get();

    context->insertRecording(insertInfo);

    testContext->syncedSubmit(context);


    // Verify the contents of the output buffer.

    float* outData = static_cast<float*>(

            map_buffer(context, testContext, outputBuffer.get(), outputInfo.fOffset));

    SkASSERT(outputBuffer->isMapped() && outData != nullptr);

    for (unsigned int i = 0; i < kProblemSize; ++i) {

        const float expected = (i + 1) * kFactor;

        const float found = outData[i];

        REPORTER_ASSERT(reporter, expected == found, "expected '%f', found '%f'", expected, found);

    }

}


// TODO(b/262427430, b/262429132): Enable this test on other backends once they all support

// compute programs.

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_DispatchGroupTest,

                                              reporter,

                                              context,

                                              testContext) {

    // TODO(b/315834710): This fails on Dawn D3D11

    if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {

        return;

    }


    constexpr uint32_t kProblemSize = 512;

    constexpr float kFactor1 = 4.f;

    constexpr float kFactor2 = 3.f;


    // The ComputeStep packs kProblemSize floats into kProblemSize / 4 vectors and each thread

    // processes 1 vector at a time.

    constexpr uint32_t kWorkgroupSize = kProblemSize / 4;


    std::unique_ptr<Recorder> recorder = context->makeRecorder();


    // Define two steps that perform two multiplication passes over the same input.


    class TestComputeStep1 : public ComputeStep {

    public:

        // TODO(skia:40045541): SkSL doesn't support std430 layout well, so the buffers

        // below all pack their data into vectors to be compatible with SPIR-V/WGSL.

        TestComputeStep1() : ComputeStep(

                /*name=*/"TestArrayMultiplyFirstPass",

                /*localDispatchSize=*/{kWorkgroupSize, 1, 1},

                /*resources=*/{

                    // Input buffer:

                    {

                        // TODO(b/299979165): Declare this binding as read-only.

                        /*type=*/ResourceType::kStorageBuffer,

                        /*flow=*/DataFlow::kPrivate,

                        /*policy=*/ResourcePolicy::kMapped,  // mappable for read-back

                        /*sksl=*/"inputBlock {\n"

                            "    float factor;\n"

                            "    layout(offset=16) float4 in_data[];\n"

                            "}",

                    },

                    // Output buffers:

                    {

                        /*type=*/ResourceType::kStorageBuffer,

                        /*flow=*/DataFlow::kShared,

                        /*policy=*/ResourcePolicy::kNone,  // GPU-only, read by second step

                        /*slot=*/0,

                        /*sksl=*/"outputBlock1 { float4 forward_data[]; }",

                    },

                    {

                        /*type=*/ResourceType::kStorageBuffer,

                        /*flow=*/DataFlow::kShared,

                        /*policy=*/ResourcePolicy::kMapped,  // mappable for read-back

                        /*slot=*/1,

                        /*sksl=*/"outputBlock2 { float2 extra_data; }",

                    }

                }) {}

        ~TestComputeStep1() override = default;


        // A kernel that multiplies a large array of floats by a supplied factor.

        std::string computeSkSL() const override {

            return R"(

                void main() {

                    uint idx = sk_GlobalInvocationID.x;

                    forward_data[idx] = in_data[idx] * factor;

                    if (idx == 0) {

                        extra_data.x = factor;

                        extra_data.y = 2 * factor;

                    }

                }

            )";

        }


        size_t calculateBufferSize(int index, const ResourceDesc& r) const override {

            if (index == 0) {

                SkASSERT(r.fFlow == DataFlow::kPrivate);

                return sizeof(float) * (kProblemSize + 4);

            }

            if (index == 1) {

                SkASSERT(r.fFlow == DataFlow::kShared);

                SkASSERT(r.fSlot == 0);

                return sizeof(float) * kProblemSize;

            }


            SkASSERT(index == 2);

            SkASSERT(r.fSlot == 1);

            SkASSERT(r.fFlow == DataFlow::kShared);

            return 2 * sizeof(float);

        }


        void prepareStorageBuffer(int resourceIndex,

                                  const ResourceDesc& r,

                                  void* buffer,

                                  size_t bufferSize) const override {

            if (resourceIndex != 0) {

                return;

            }


            size_t dataCount = sizeof(float) * (kProblemSize + 4);

            SkASSERT(bufferSize == dataCount);

            SkSpan<float> inData(static_cast<float*>(buffer), dataCount);

            inData[0] = kFactor1;

            for (unsigned int i = 0; i < kProblemSize; ++i) {

                inData[i + 4] = i + 1;

            }

        }


        WorkgroupSize calculateGlobalDispatchSize() const override {

            return WorkgroupSize(1, 1, 1);

        }

    } step1;


    class TestComputeStep2 : public ComputeStep {

    public:

        TestComputeStep2() : ComputeStep(

                /*name=*/"TestArrayMultiplySecondPass",

                /*localDispatchSize=*/{kWorkgroupSize, 1, 1},

                /*resources=*/{

                    // Input buffer:

                    {

                        /*type=*/ResourceType::kStorageBuffer,

                        /*flow=*/DataFlow::kShared,

                        /*policy=*/ResourcePolicy::kNone,  // GPU-only

                        /*slot=*/0, // this is the output from the first step

                        /*sksl=*/"inputBlock { float4 in_data[]; }",

                    },

                    {

                        /*type=*/ResourceType::kStorageBuffer,

                        /*flow=*/DataFlow::kPrivate,

                        /*policy=*/ResourcePolicy::kMapped,

                        /*sksl=*/"factorBlock { float factor; }"

                    },

                    // Output buffer:

                    {

                        /*type=*/ResourceType::kStorageBuffer,

                        /*flow=*/DataFlow::kShared,

                        /*policy=*/ResourcePolicy::kMapped,  // mappable for read-back

                        /*slot=*/2,

                        /*sksl=*/"outputBlock { float4 out_data[]; }",

                    }

                }) {}

        ~TestComputeStep2() override = default;


        // A kernel that multiplies a large array of floats by a supplied factor.

        std::string computeSkSL() const override {

            return R"(

                void main() {

                    out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor;

                }

            )";

        }


        size_t calculateBufferSize(int index, const ResourceDesc& r) const override {

            SkASSERT(index != 0);

            if (index == 1) {

                SkASSERT(r.fFlow == DataFlow::kPrivate);

                return sizeof(float) * 4;

            }

            SkASSERT(index == 2);

            SkASSERT(r.fSlot == 2);

            SkASSERT(r.fFlow == DataFlow::kShared);

            return sizeof(float) * kProblemSize;

        }


        void prepareStorageBuffer(int resourceIndex,

                                  const ResourceDesc& r,

                                  void* buffer,

                                  size_t bufferSize) const override {

            if (resourceIndex != 1) {

                return;

            }

            SkASSERT(r.fFlow == DataFlow::kPrivate);

            *static_cast<float*>(buffer) = kFactor2;

        }


        WorkgroupSize calculateGlobalDispatchSize() const override {

            return WorkgroupSize(1, 1, 1);

        }

    } step2;


    DispatchGroup::Builder builder(recorder.get());

    builder.appendStep(&step1);

    builder.appendStep(&step2);


    // Slots 0, 1, and 2 should all contain shared buffers. Slot 1 contains the extra output buffer

    // from step 1 while slot 2 contains the result of the second multiplication pass from step 1.

    // Slot 0 is not mappable.

    REPORTER_ASSERT(reporter,

                    std::holds_alternative<BufferView>(builder.outputTable().fSharedSlots[0]),

                    "shared resource at slot 0 is missing");

    BindBufferInfo outputInfo = builder.getSharedBufferResource(2);

    if (!outputInfo) {

        ERRORF(reporter, "Failed to allocate an output buffer at slot 0");

        return;

    }


    // Extra output buffer from step 1 (corresponding to 'outputBlock2')

    BindBufferInfo extraOutputInfo = builder.getSharedBufferResource(1);

    if (!extraOutputInfo) {

        ERRORF(reporter, "shared resource at slot 1 is missing");

        return;

    }


    // Record the compute task

    ComputeTask::DispatchGroupList groups;

    groups.push_back(builder.finalize());

    recorder->priv().add(ComputeTask::Make(std::move(groups)));


    // Ensure the output buffers get synchronized to the CPU once the GPU submission has finished.

    auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.fBuffer);

    auto extraOutputBuffer = sync_buffer_to_cpu(recorder.get(), extraOutputInfo.fBuffer);


    // Submit the work and wait for it to complete.

    std::unique_ptr<Recording> recording = recorder->snap();

    if (!recording) {

        ERRORF(reporter, "Failed to make recording");

        return;

    }


    InsertRecordingInfo insertInfo;

    insertInfo.fRecording = recording.get();

    context->insertRecording(insertInfo);

    testContext->syncedSubmit(context);


    // Verify the contents of the output buffer from step 2

    float* outData = static_cast<float*>(

            map_buffer(context, testContext, outputBuffer.get(), outputInfo.fOffset));

    SkASSERT(outputBuffer->isMapped() && outData != nullptr);

    for (unsigned int i = 0; i < kProblemSize; ++i) {

        const float expected = (i + 1) * kFactor1 * kFactor2;

        const float found = outData[i];

        REPORTER_ASSERT(reporter, expected == found, "expected '%f', found '%f'", expected, found);

    }


    // Verify the contents of the extra output buffer from step 1

    float* extraOutData = static_cast<float*>(

            map_buffer(context, testContext, extraOutputBuffer.get(), extraOutputInfo.fOffset));

    SkASSERT(extraOutputBuffer->isMapped() && extraOutData != nullptr);

    REPORTER_ASSERT(reporter,

                    kFactor1 == extraOutData[0],

                    "expected '%f', found '%f'",

                    kFactor1,

                    extraOutData[0]);

    REPORTER_ASSERT(reporter,

                    2 * kFactor1 == extraOutData[1],

                    "expected '%f', found '%f'",

                    2 * kFactor2,

                    extraOutData[1]);

}


// TODO(b/262427430, b/262429132): Enable this test on other backends once they all support

// compute programs.

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_UniformBufferTest,

                                              reporter,

                                              context,

                                              testContext) {

    // TODO(b/315834710): This fails on Dawn D3D11

    if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {

        return;

    }


    constexpr uint32_t kProblemSize = 512;

    constexpr float kFactor = 4.f;


    // The ComputeStep packs kProblemSize floats into kProblemSize / 4 vectors and each thread

    // processes 1 vector at a time.

    constexpr uint32_t kWorkgroupSize = kProblemSize / 4;


    std::unique_ptr<Recorder> recorder = context->makeRecorder();


    class TestComputeStep : public ComputeStep {

    public:

        TestComputeStep() : ComputeStep(

                /*name=*/"TestArrayMultiply",

                /*localDispatchSize=*/{kWorkgroupSize, 1, 1},

                /*resources=*/{

                    // Uniform buffer:

                    {

                        /*type=*/ResourceType::kUniformBuffer,

                        /*flow=*/DataFlow::kPrivate,

                        /*policy=*/ResourcePolicy::kMapped,

                        /*sksl=*/"uniformBlock { float factor; }"

                    },

                    // Input buffer:

                    {

                        /*type=*/ResourceType::kStorageBuffer,

                        /*flow=*/DataFlow::kPrivate,

                        /*policy=*/ResourcePolicy::kMapped,

                        /*sksl=*/"inputBlock { float4 in_data[]; }",

                    },

                    // Output buffer:

                    {

                        /*type=*/ResourceType::kStorageBuffer,

                        /*flow=*/DataFlow::kShared,  // shared to allow us to access it from the

                                                     // Builder

                        /*policy=*/ResourcePolicy::kMapped,  // mappable for read-back

                        /*slot=*/0,

                        /*sksl=*/"outputBlock { float4 out_data[]; }",

                    }

                }) {}

        ~TestComputeStep() override = default;


        // A kernel that multiplies a large array of floats by a supplied factor.

        std::string computeSkSL() const override {

            return R"(

                void main() {

                    out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor;

                }

            )";

        }


        size_t calculateBufferSize(int index, const ResourceDesc& r) const override {

            if (index == 0) {

                SkASSERT(r.fFlow == DataFlow::kPrivate);

                return sizeof(float);

            }

            if (index == 1) {

                SkASSERT(r.fFlow == DataFlow::kPrivate);

                return sizeof(float) * kProblemSize;

            }

            SkASSERT(index == 2);

            SkASSERT(r.fSlot == 0);

            SkASSERT(r.fFlow == DataFlow::kShared);

            return sizeof(float) * kProblemSize;

        }


        void prepareStorageBuffer(int resourceIndex,

                                  const ResourceDesc& r,

                                  void* buffer,

                                  size_t bufferSize) const override {

            // Only initialize the input storage buffer.

            if (resourceIndex != 1) {

                return;

            }

            SkASSERT(r.fFlow == DataFlow::kPrivate);

            size_t dataCount = sizeof(float) * kProblemSize;

            SkASSERT(bufferSize == dataCount);

            SkSpan<float> inData(static_cast<float*>(buffer), dataCount);

            for (unsigned int i = 0; i < kProblemSize; ++i) {

                inData[i] = i + 1;

            }

        }


        void prepareUniformBuffer(int resourceIndex,

                                  const ResourceDesc&,

                                  UniformManager* mgr) const override {

            SkASSERT(resourceIndex == 0);

            SkDEBUGCODE(

                const Uniform uniforms[] = {{"factor", SkSLType::kFloat}};

                mgr->setExpectedUniforms(uniforms);

            )

            mgr->write(kFactor);

        }


        WorkgroupSize calculateGlobalDispatchSize() const override {

            return WorkgroupSize(1, 1, 1);

        }

    } step;


    DispatchGroup::Builder builder(recorder.get());

    if (!builder.appendStep(&step)) {

        ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");

        return;

    }


    // The output buffer should have been placed in the right output slot.

    BindBufferInfo outputInfo = builder.getSharedBufferResource(0);

    if (!outputInfo) {

        ERRORF(reporter, "Failed to allocate an output buffer at slot 0");

        return;

    }


    // Record the compute task

    ComputeTask::DispatchGroupList groups;

    groups.push_back(builder.finalize());

    recorder->priv().add(ComputeTask::Make(std::move(groups)));


    // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.

    auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.fBuffer);


    // Submit the work and wait for it to complete.

    std::unique_ptr<Recording> recording = recorder->snap();

    if (!recording) {

        ERRORF(reporter, "Failed to make recording");

        return;

    }


    InsertRecordingInfo insertInfo;

    insertInfo.fRecording = recording.get();

    context->insertRecording(insertInfo);

    testContext->syncedSubmit(context);


    // Verify the contents of the output buffer.

    float* outData = static_cast<float*>(

            map_buffer(context, testContext, outputBuffer.get(), outputInfo.fOffset));

    SkASSERT(outputBuffer->isMapped() && outData != nullptr);

    for (unsigned int i = 0; i < kProblemSize; ++i) {

        const float expected = (i + 1) * kFactor;

        const float found = outData[i];

        REPORTER_ASSERT(reporter, expected == found, "expected '%f', found '%f'", expected, found);

    }

}


// TODO(b/262427430, b/262429132): Enable this test on other backends once they all support

// compute programs.

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_ExternallyAssignedBuffer,

                                              reporter,

                                              context,

                                              testContext) {

    constexpr uint32_t kProblemSize = 512;

    constexpr float kFactor = 4.f;


    // The ComputeStep packs kProblemSize floats into kProblemSize / 4 vectors and each thread

    // processes 1 vector at a time.

    constexpr uint32_t kWorkgroupSize = kProblemSize / 4;


    std::unique_ptr<Recorder> recorder = context->makeRecorder();


    class TestComputeStep : public ComputeStep {

    public:

        TestComputeStep() : ComputeStep(

                /*name=*/"ExternallyAssignedBuffer",

                /*localDispatchSize=*/{kWorkgroupSize, 1, 1},

                /*resources=*/{

                    // Input buffer:

                    {

                        /*type=*/ResourceType::kStorageBuffer,

                        /*flow=*/DataFlow::kPrivate,

                        /*policy=*/ResourcePolicy::kMapped,

                        /*sksl=*/"inputBlock {\n"

                                 "    float factor;\n"

                                 "    layout(offset = 16) float4 in_data[];\n"

                                 "}\n",

                    },

                    // Output buffer:

                    {

                        /*type=*/ResourceType::kStorageBuffer,

                        /*flow=*/DataFlow::kShared,  // shared to allow us to access it from the

                                                     // Builder

                        /*policy=*/ResourcePolicy::kMapped,  // mappable for read-back

                        /*slot=*/0,

                        /*sksl=*/"outputBlock { float4 out_data[]; }",

                    }

                }) {}

        ~TestComputeStep() override = default;


        // A kernel that multiplies a large array of floats by a supplied factor.

        std::string computeSkSL() const override {

            return R"(

                void main() {

                    out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor;

                }

            )";

        }


        size_t calculateBufferSize(int resourceIndex, const ResourceDesc& r) const override {

            SkASSERT(resourceIndex == 0);

            SkASSERT(r.fFlow == DataFlow::kPrivate);

            return sizeof(float) * (kProblemSize + 4);

        }


        void prepareStorageBuffer(int resourceIndex,

                                  const ResourceDesc& r,

                                  void* buffer,

                                  size_t bufferSize) const override {

            SkASSERT(resourceIndex == 0);

            SkASSERT(r.fFlow == DataFlow::kPrivate);


            size_t dataCount = sizeof(float) * (kProblemSize + 4);

            SkASSERT(bufferSize == dataCount);

            SkSpan<float> inData(static_cast<float*>(buffer), dataCount);

            inData[0] = kFactor;

            for (unsigned int i = 0; i < kProblemSize; ++i) {

                inData[i + 4] = i + 1;

            }

        }

    } step;


    // We allocate a buffer and directly assign it to the DispatchGroup::Builder. The ComputeStep

    // will not participate in the creation of this buffer.

    auto [_, outputInfo] =

            recorder->priv().drawBufferManager()->getStoragePointer(sizeof(float) * kProblemSize);

    REPORTER_ASSERT(reporter, outputInfo, "Failed to allocate output buffer");


    DispatchGroup::Builder builder(recorder.get());

    builder.assignSharedBuffer({outputInfo, sizeof(float) * kProblemSize}, 0);


    // Initialize the step with a pre-determined global size

    if (!builder.appendStep(&step, {WorkgroupSize(1, 1, 1)})) {

        ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");

        return;

    }


    // Record the compute task

    ComputeTask::DispatchGroupList groups;

    groups.push_back(builder.finalize());

    recorder->priv().add(ComputeTask::Make(std::move(groups)));


    // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.

    auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.fBuffer);


    // Submit the work and wait for it to complete.

    std::unique_ptr<Recording> recording = recorder->snap();

    if (!recording) {

        ERRORF(reporter, "Failed to make recording");

        return;

    }


    InsertRecordingInfo insertInfo;

    insertInfo.fRecording = recording.get();

    context->insertRecording(insertInfo);

    testContext->syncedSubmit(context);


    // Verify the contents of the output buffer.

    float* outData = static_cast<float*>(

            map_buffer(context, testContext, outputBuffer.get(), outputInfo.fOffset));

    SkASSERT(outputBuffer->isMapped() && outData != nullptr);

    for (unsigned int i = 0; i < kProblemSize; ++i) {

        const float expected = (i + 1) * kFactor;

        const float found = outData[i];

        REPORTER_ASSERT(reporter, expected == found, "expected '%f', found '%f'", expected, found);

    }

}


// Tests the storage texture binding for a compute dispatch that writes the same color to every

// pixel of a storage texture.

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_StorageTexture,

                                              reporter,

                                              context,

                                              testContext) {

    std::unique_ptr<Recorder> recorder = context->makeRecorder();


    // For this test we allocate a 16x16 tile which is written to by a single workgroup of the same

    // size.

    constexpr uint32_t kDim = 16;


    class TestComputeStep : public ComputeStep {

    public:

        TestComputeStep() : ComputeStep(

                /*name=*/"TestStorageTexture",

                /*localDispatchSize=*/{kDim, kDim, 1},

                /*resources=*/{

                    {

                        /*type=*/ResourceType::kWriteOnlyStorageTexture,

                        /*flow=*/DataFlow::kShared,

                        /*policy=*/ResourcePolicy::kNone,

                        /*slot=*/0,

                        /*sksl=*/"dst",

                    }

                }) {}

        ~TestComputeStep() override = default;


        std::string computeSkSL() const override {

            return R"(

                void main() {

                    textureWrite(dst, sk_LocalInvocationID.xy, half4(0.0, 1.0, 0.0, 1.0));

                }

            )";

        }


        std::tuple<SkISize, SkColorType> calculateTextureParameters(

                int index, const ResourceDesc& r) const override {

            return {{kDim, kDim}, kRGBA_8888_SkColorType};

        }


        WorkgroupSize calculateGlobalDispatchSize() const override {

            return WorkgroupSize(1, 1, 1);

        }

    } step;


    DispatchGroup::Builder builder(recorder.get());

    if (!builder.appendStep(&step)) {

        ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");

        return;

    }


    sk_sp<TextureProxy> texture = builder.getSharedTextureResource(0);

    if (!texture) {

        ERRORF(reporter, "Shared resource at slot 0 is missing");

        return;

    }


    // Record the compute task

    ComputeTask::DispatchGroupList groups;

    groups.push_back(builder.finalize());

    recorder->priv().add(ComputeTask::Make(std::move(groups)));


    // Submit the work and wait for it to complete.

    std::unique_ptr<Recording> recording = recorder->snap();

    if (!recording) {

        ERRORF(reporter, "Failed to make recording");

        return;

    }


    InsertRecordingInfo insertInfo;

    insertInfo.fRecording = recording.get();

    context->insertRecording(insertInfo);

    testContext->syncedSubmit(context);


    SkBitmap bitmap;

    SkImageInfo imgInfo =

            SkImageInfo::Make(kDim, kDim, kRGBA_8888_SkColorType, kUnpremul_SkAlphaType);

    bitmap.allocPixels(imgInfo);


    SkPixmap pixels;

    bool peekPixelsSuccess = bitmap.peekPixels(&pixels);

    REPORTER_ASSERT(reporter, peekPixelsSuccess);


    bool readPixelsSuccess = context->priv().readPixels(pixels, texture.get(), imgInfo, 0, 0);

    REPORTER_ASSERT(reporter, readPixelsSuccess);


    for (uint32_t x = 0; x < kDim; ++x) {

        for (uint32_t y = 0; y < kDim; ++y) {

            SkColor4f expected = SkColor4f::FromColor(SK_ColorGREEN);

            SkColor4f color = pixels.getColor4f(x, y);

            REPORTER_ASSERT(reporter, expected == color,

                            "At position {%u, %u}, "

                            "expected {%.1f, %.1f, %.1f, %.1f}, "

                            "found {%.1f, %.1f, %.1f, %.1f}",

                            x, y,

                            expected.fR, expected.fG, expected.fB, expected.fA,

                            color.fR, color.fG, color.fB, color.fA);

        }

    }

}


// Tests the readonly texture binding for a compute dispatch that random-access reads from a

// CPU-populated texture and copies it to a storage texture.

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_StorageTextureReadAndWrite,

                                              reporter,

                                              context,

                                              testContext) {

    std::unique_ptr<Recorder> recorder = context->makeRecorder();


    // For this test we allocate a 16x16 tile which is written to by a single workgroup of the same

    // size.

    constexpr uint32_t kDim = 16;


    class TestComputeStep : public ComputeStep {

    public:

        TestComputeStep() : ComputeStep(

                /*name=*/"TestStorageTextureReadAndWrite",

                /*localDispatchSize=*/{kDim, kDim, 1},

                /*resources=*/{

                    {

                        /*type=*/ResourceType::kReadOnlyTexture,

                        /*flow=*/DataFlow::kShared,

                        /*policy=*/ResourcePolicy::kNone,

                        /*slot=*/0,

                        /*sksl=*/"src",

                    },

                    {

                        /*type=*/ResourceType::kWriteOnlyStorageTexture,

                        /*flow=*/DataFlow::kShared,

                        /*policy=*/ResourcePolicy::kNone,

                        /*slot=*/1,

                        /*sksl=*/"dst",

                    }

                }) {}

        ~TestComputeStep() override = default;


        std::string computeSkSL() const override {

            return R"(

                void main() {

                    half4 color = textureRead(src, sk_LocalInvocationID.xy);

                    textureWrite(dst, sk_LocalInvocationID.xy, color);

                }

            )";

        }


        std::tuple<SkISize, SkColorType> calculateTextureParameters(

                int index, const ResourceDesc& r) const override {

            SkASSERT(index == 1);

            return {{kDim, kDim}, kRGBA_8888_SkColorType};

        }


        WorkgroupSize calculateGlobalDispatchSize() const override {

            return WorkgroupSize(1, 1, 1);

        }

    } step;


    // Create and populate an input texture.

    SkBitmap srcBitmap;

    SkImageInfo srcInfo =

            SkImageInfo::Make(kDim, kDim, kRGBA_8888_SkColorType, kUnpremul_SkAlphaType);

    srcBitmap.allocPixels(srcInfo);

    SkPixmap srcPixels;

    bool srcPeekPixelsSuccess = srcBitmap.peekPixels(&srcPixels);

    REPORTER_ASSERT(reporter, srcPeekPixelsSuccess);

    for (uint32_t x = 0; x < kDim; ++x) {

        for (uint32_t y = 0; y < kDim; ++y) {

            *srcPixels.writable_addr32(x, y) =

                    SkColorSetARGB(255, x * 256 / kDim, y * 256 / kDim, 0);

        }

    }


    auto texInfo = context->priv().caps()->getDefaultSampledTextureInfo(kRGBA_8888_SkColorType,

                                                                        skgpu::Mipmapped::kNo,

                                                                        skgpu::Protected::kNo,

                                                                        skgpu::Renderable::kNo);

    sk_sp<TextureProxy> srcProxy = TextureProxy::Make(context->priv().caps(),

                                                      recorder->priv().resourceProvider(),

                                                      {kDim, kDim},

                                                      texInfo,

                                                      "ComputeTestSrcProxy",

                                                      skgpu::Budgeted::kNo);

    MipLevel mipLevel;

    mipLevel.fPixels = srcPixels.addr();

    mipLevel.fRowBytes = srcPixels.rowBytes();

    UploadInstance upload = UploadInstance::Make(recorder.get(),

                                                 srcProxy,

                                                 srcPixels.info().colorInfo(),

                                                 srcPixels.info().colorInfo(),

                                                 {mipLevel},

                                                 SkIRect::MakeWH(kDim, kDim),

                                                 std::make_unique<ImageUploadContext>());

    if (!upload.isValid()) {

        ERRORF(reporter, "Could not create UploadInstance");

        return;

    }

    recorder->priv().add(UploadTask::Make(std::move(upload)));


    DispatchGroup::Builder builder(recorder.get());


    // Assign the input texture to slot 0. This corresponds to the ComputeStep's "src" texture

    // binding.

    builder.assignSharedTexture(std::move(srcProxy), 0);


    if (!builder.appendStep(&step)) {

        ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");

        return;

    }


    sk_sp<TextureProxy> dst = builder.getSharedTextureResource(1);

    if (!dst) {

        ERRORF(reporter, "shared resource at slot 1 is missing");

        return;

    }


    // Record the compute task

    ComputeTask::DispatchGroupList groups;

    groups.push_back(builder.finalize());

    recorder->priv().add(ComputeTask::Make(std::move(groups)));


    // Submit the work and wait for it to complete.

    std::unique_ptr<Recording> recording = recorder->snap();

    if (!recording) {

        ERRORF(reporter, "Failed to make recording");

        return;

    }


    InsertRecordingInfo insertInfo;

    insertInfo.fRecording = recording.get();

    context->insertRecording(insertInfo);

    testContext->syncedSubmit(context);


    SkBitmap bitmap;

    SkImageInfo imgInfo =

            SkImageInfo::Make(kDim, kDim, kRGBA_8888_SkColorType, kUnpremul_SkAlphaType);

    bitmap.allocPixels(imgInfo);


    SkPixmap pixels;

    bool peekPixelsSuccess = bitmap.peekPixels(&pixels);

    REPORTER_ASSERT(reporter, peekPixelsSuccess);


    bool readPixelsSuccess = context->priv().readPixels(pixels, dst.get(), imgInfo, 0, 0);

    REPORTER_ASSERT(reporter, readPixelsSuccess);


    for (uint32_t x = 0; x < kDim; ++x) {

        for (uint32_t y = 0; y < kDim; ++y) {

            SkColor4f expected = SkColor4f::FromBytes_RGBA(

                    SkColorSetARGB(255, x * 256 / kDim, y * 256 / kDim, 0));

            SkColor4f color = pixels.getColor4f(x, y);

            REPORTER_ASSERT(reporter, expected == color,

                            "At position {%u, %u}, "

                            "expected {%.1f, %.1f, %.1f, %.1f}, "

                            "found {%.1f, %.1f, %.1f, %.1f}",

                            x, y,

                            expected.fR, expected.fG, expected.fB, expected.fA,

                            color.fR, color.fG, color.fB, color.fA);

        }

    }

}


DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_ReadOnlyStorageBuffer,

                                              reporter,

                                              context,

                                              testContext) {

    std::unique_ptr<Recorder> recorder = context->makeRecorder();


    // For this test we allocate a 16x16 tile which is written to by a single workgroup of the same

    // size.

    constexpr uint32_t kDim = 16;


    class TestComputeStep : public ComputeStep {

    public:

        TestComputeStep() : ComputeStep(

                /*name=*/"TestReadOnlyStorageBuffer",

                /*localDispatchSize=*/{kDim, kDim, 1},

                /*resources=*/{

                    {

                        /*type=*/ResourceType::kReadOnlyStorageBuffer,

                        /*flow=*/DataFlow::kShared,

                        /*policy=*/ResourcePolicy::kMapped,

                        /*slot=*/0,

                        /*sksl=*/"src { uint in_data[]; }",

                    },

                    {

                        /*type=*/ResourceType::kWriteOnlyStorageTexture,

                        /*flow=*/DataFlow::kShared,

                        /*policy=*/ResourcePolicy::kNone,

                        /*slot=*/1,

                        /*sksl=*/"dst",

                    }

                }) {}

        ~TestComputeStep() override = default;


        std::string computeSkSL() const override {

            return R"(

                void main() {

                    uint ix = sk_LocalInvocationID.y * 16 + sk_LocalInvocationID.x;

                    uint value = in_data[ix];

                    half4 splat = half4(

                        half(value & 0xFF),

                        half((value >> 8) & 0xFF),

                        half((value >> 16) & 0xFF),

                        half((value >> 24) & 0xFF)

                    );

                    textureWrite(dst, sk_LocalInvocationID.xy, splat / 255.0);

                }

            )";

        }


        size_t calculateBufferSize(int index, const ResourceDesc& r) const override {

            SkASSERT(index == 0);

            return kDim * kDim * sizeof(uint32_t);

        }


        void prepareStorageBuffer(int index,

                                  const ResourceDesc&,

                                  void* buffer,

                                  size_t bufferSize) const override {

            SkASSERT(index == 0);

            SkASSERT(bufferSize == kDim * kDim * sizeof(uint32_t));


            uint32_t* inputs = reinterpret_cast<uint32_t*>(buffer);

            for (uint32_t y = 0; y < kDim; ++y) {

                for (uint32_t x = 0; x < kDim; ++x) {

                    uint32_t value =

                            ((x * 256 / kDim) & 0xFF) | ((y * 256 / kDim) & 0xFF) << 8 | 255 << 24;

                    *(inputs++) = value;

                }

            }

        }


        std::tuple<SkISize, SkColorType> calculateTextureParameters(

                int index, const ResourceDesc& r) const override {

            SkASSERT(index == 1);

            return {{kDim, kDim}, kRGBA_8888_SkColorType};

        }


        WorkgroupSize calculateGlobalDispatchSize() const override {

            return WorkgroupSize(1, 1, 1);

        }

    } step;


    DispatchGroup::Builder builder(recorder.get());

    if (!builder.appendStep(&step)) {

        ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");

        return;

    }


    sk_sp<TextureProxy> dst = builder.getSharedTextureResource(1);

    if (!dst) {

        ERRORF(reporter, "shared resource at slot 1 is missing");

        return;

    }


    // Record the compute task

    ComputeTask::DispatchGroupList groups;

    groups.push_back(builder.finalize());

    recorder->priv().add(ComputeTask::Make(std::move(groups)));


    // Submit the work and wait for it to complete.

    std::unique_ptr<Recording> recording = recorder->snap();

    if (!recording) {

        ERRORF(reporter, "Failed to make recording");

        return;

    }


    InsertRecordingInfo insertInfo;

    insertInfo.fRecording = recording.get();

    context->insertRecording(insertInfo);

    testContext->syncedSubmit(context);


    SkBitmap bitmap;

    SkImageInfo imgInfo =

            SkImageInfo::Make(kDim, kDim, kRGBA_8888_SkColorType, kUnpremul_SkAlphaType);

    bitmap.allocPixels(imgInfo);


    SkPixmap pixels;

    bool peekPixelsSuccess = bitmap.peekPixels(&pixels);

    REPORTER_ASSERT(reporter, peekPixelsSuccess);


    bool readPixelsSuccess = context->priv().readPixels(pixels, dst.get(), imgInfo, 0, 0);

    REPORTER_ASSERT(reporter, readPixelsSuccess);


    for (uint32_t x = 0; x < kDim; ++x) {

        for (uint32_t y = 0; y < kDim; ++y) {

            SkColor4f expected =

                    SkColor4f::FromColor(SkColorSetARGB(255, x * 256 / kDim, y * 256 / kDim, 0));

            SkColor4f color = pixels.getColor4f(x, y);

            bool pass = true;

            for (int i = 0; i < 4; i++) {

                pass &= color[i] == expected[i];

            }

            REPORTER_ASSERT(reporter, pass,

                            "At position {%u, %u}, "

                            "expected {%.1f, %.1f, %.1f, %.1f}, "

                            "found {%.1f, %.1f, %.1f, %.1f}",

                            x, y,

                            expected.fR, expected.fG, expected.fB, expected.fA,

                            color.fR, color.fG, color.fB, color.fA);

        }

    }

}


// Tests that a texture written by one compute step can be sampled by a subsequent step.

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_StorageTextureMultipleComputeSteps,

                                              reporter,

                                              context,

                                              testContext) {

    std::unique_ptr<Recorder> recorder = context->makeRecorder();


    // For this test we allocate a 16x16 tile which is written to by a single workgroup of the same

    // size.

    constexpr uint32_t kDim = 16;


    // Writes to a texture in slot 0.

    class TestComputeStep1 : public ComputeStep {

    public:

        TestComputeStep1() : ComputeStep(

                /*name=*/"TestStorageTexturesFirstPass",

                /*localDispatchSize=*/{kDim, kDim, 1},

                /*resources=*/{

                    {

                        /*type=*/ResourceType::kWriteOnlyStorageTexture,

                        /*flow=*/DataFlow::kShared,

                        /*policy=*/ResourcePolicy::kNone,

                        /*slot=*/0,

                        /*sksl=*/"dst",

                    }

                }) {}

        ~TestComputeStep1() override = default;


        std::string computeSkSL() const override {

            return R"(

                void main() {

                    textureWrite(dst, sk_LocalInvocationID.xy, half4(0.0, 1.0, 0.0, 1.0));

                }

            )";

        }


        std::tuple<SkISize, SkColorType> calculateTextureParameters(

                int index, const ResourceDesc& r) const override {

            SkASSERT(index == 0);

            return {{kDim, kDim}, kRGBA_8888_SkColorType};

        }


        WorkgroupSize calculateGlobalDispatchSize() const override {

            return WorkgroupSize(1, 1, 1);

        }

    } step1;


    // Reads from the texture in slot 0 and writes it to another texture in slot 1.

    class TestComputeStep2 : public ComputeStep {

    public:

        TestComputeStep2() : ComputeStep(

                /*name=*/"TestStorageTexturesSecondPass",

                /*localDispatchSize=*/{kDim, kDim, 1},

                /*resources=*/{

                    {

                        /*type=*/ResourceType::kReadOnlyTexture,

                        /*flow=*/DataFlow::kShared,

                        /*policy=*/ResourcePolicy::kNone,

                        /*slot=*/0,

                        /*sksl=*/"src",

                    },

                    {

                        /*type=*/ResourceType::kWriteOnlyStorageTexture,

                        /*flow=*/DataFlow::kShared,

                        /*policy=*/ResourcePolicy::kNone,

                        /*slot=*/1,

                        /*sksl=*/"dst",

                    }

                }) {}

        ~TestComputeStep2() override = default;


        std::string computeSkSL() const override {

            return R"(

                void main() {

                    half4 color = textureRead(src, sk_LocalInvocationID.xy);

                    textureWrite(dst, sk_LocalInvocationID.xy, color);

                }

            )";

        }


        std::tuple<SkISize, SkColorType> calculateTextureParameters(

                int index, const ResourceDesc& r) const override {

            SkASSERT(index == 1);

            return {{kDim, kDim}, kRGBA_8888_SkColorType};

        }


        WorkgroupSize calculateGlobalDispatchSize() const override {

            return WorkgroupSize(1, 1, 1);

        }

    } step2;


    DispatchGroup::Builder builder(recorder.get());

    builder.appendStep(&step1);

    builder.appendStep(&step2);


    sk_sp<TextureProxy> dst = builder.getSharedTextureResource(1);

    if (!dst) {

        ERRORF(reporter, "shared resource at slot 1 is missing");

        return;

    }


    // Record the compute task

    ComputeTask::DispatchGroupList groups;

    groups.push_back(builder.finalize());

    recorder->priv().add(ComputeTask::Make(std::move(groups)));


    // Submit the work and wait for it to complete.

    std::unique_ptr<Recording> recording = recorder->snap();

    if (!recording) {

        ERRORF(reporter, "Failed to make recording");

        return;

    }


    InsertRecordingInfo insertInfo;

    insertInfo.fRecording = recording.get();

    context->insertRecording(insertInfo);

    testContext->syncedSubmit(context);


    SkBitmap bitmap;

    SkImageInfo imgInfo =

            SkImageInfo::Make(kDim, kDim, kRGBA_8888_SkColorType, kUnpremul_SkAlphaType);

    bitmap.allocPixels(imgInfo);


    SkPixmap pixels;

    bool peekPixelsSuccess = bitmap.peekPixels(&pixels);

    REPORTER_ASSERT(reporter, peekPixelsSuccess);


    bool readPixelsSuccess = context->priv().readPixels(pixels, dst.get(), imgInfo, 0, 0);

    REPORTER_ASSERT(reporter, readPixelsSuccess);


    for (uint32_t x = 0; x < kDim; ++x) {

        for (uint32_t y = 0; y < kDim; ++y) {

            SkColor4f expected = SkColor4f::FromColor(SK_ColorGREEN);

            SkColor4f color = pixels.getColor4f(x, y);

            REPORTER_ASSERT(reporter, expected == color,

                            "At position {%u, %u}, "

                            "expected {%.1f, %.1f, %.1f, %.1f}, "

                            "found {%.1f, %.1f, %.1f, %.1f}",

                            x, y,

                            expected.fR, expected.fG, expected.fB, expected.fA,

                            color.fR, color.fG, color.fB, color.fA);

        }

    }

}


// Tests that a texture can be sampled by a compute step using a sampler.

// TODO(armansito): Once the previous TODO is done, add additional tests that exercise mixed use of

// texture, buffer, and sampler bindings.

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_SampledTexture,

                                              reporter,

                                              context,

                                              testContext) {

    std::unique_ptr<Recorder> recorder = context->makeRecorder();


    // The first ComputeStep initializes a 16x16 texture with a checkerboard pattern of alternating

    // red and black pixels. The second ComputeStep downsamples this texture into a 4x4 using

    // bilinear filtering at pixel borders, intentionally averaging the values of each 4x4 tile in

    // the source texture, and writes the result to the destination texture.

    constexpr uint32_t kSrcDim = 16;

    constexpr uint32_t kDstDim = 4;


    class TestComputeStep1 : public ComputeStep {

    public:

        TestComputeStep1() : ComputeStep(

                /*name=*/"Test_SampledTexture_Init",

                /*localDispatchSize=*/{kSrcDim, kSrcDim, 1},

                /*resources=*/{

                    {

                        /*type=*/ResourceType::kWriteOnlyStorageTexture,

                        /*flow=*/DataFlow::kShared,

                        /*policy=*/ResourcePolicy::kNone,

                        /*slot=*/0,

                        /*sksl=*/"dst",

                    }

                }) {}

        ~TestComputeStep1() override = default;


        std::string computeSkSL() const override {

            return R"(

                void main() {

                    uint2 c = sk_LocalInvocationID.xy;

                    uint checkerBoardColor = (c.x + (c.y % 2)) % 2;

                    textureWrite(dst, c, half4(checkerBoardColor, 0, 0, 1));

                }

            )";

        }


        std::tuple<SkISize, SkColorType> calculateTextureParameters(

                int index, const ResourceDesc& r) const override {

            SkASSERT(index == 0);

            return {{kSrcDim, kSrcDim}, kRGBA_8888_SkColorType};

        }


        WorkgroupSize calculateGlobalDispatchSize() const override {

            return WorkgroupSize(1, 1, 1);

        }

    } step1;


    class TestComputeStep2 : public ComputeStep {

    public:

        TestComputeStep2() : ComputeStep(

                /*name=*/"Test_SampledTexture_Sample",

                /*localDispatchSize=*/{kDstDim, kDstDim, 1},

                /*resources=*/{

                    // Declare the storage texture before the sampled texture. This tests that

                    // binding index assignment works consistently across all backends when a

                    // sampler-less texture and a texture+sampler pair are intermixed and sampler

                    // bindings aren't necessarily contiguous when the ranges are distinct.

                    {

                        /*type=*/ResourceType::kWriteOnlyStorageTexture,

                        /*flow=*/DataFlow::kShared,

                        /*policy=*/ResourcePolicy::kNone,

                        /*slot=*/1,

                        /*sksl=*/"dst",

                    },

                    {

                        /*type=*/ResourceType::kSampledTexture,

                        /*flow=*/DataFlow::kShared,

                        /*policy=*/ResourcePolicy::kNone,

                        /*slot=*/0,

                        /*sksl=*/"src",

                    }

                }) {}

        ~TestComputeStep2() override = default;


        std::string computeSkSL() const override {

            return R"(

                void main() {

                    // Normalize the 4x4 invocation indices and sample the source texture using

                    // that.

                    uint2 dstCoord = sk_LocalInvocationID.xy;

                    const float2 dstSizeInv = float2(0.25, 0.25);

                    float2 unormCoord = float2(dstCoord) * dstSizeInv;


                    // Use explicit LOD, as quad derivatives are not available to a compute shader.

                    half4 color = sampleLod(src, unormCoord, 0);

                    textureWrite(dst, dstCoord, color);

                }

            )";

        }


        std::tuple<SkISize, SkColorType> calculateTextureParameters(

                int index, const ResourceDesc& r) const override {

            SkASSERT(index == 0 || index == 1);

            return {{kDstDim, kDstDim}, kRGBA_8888_SkColorType};

        }


        SamplerDesc calculateSamplerParameters(int index, const ResourceDesc&) const override {

            SkASSERT(index == 1);

            // Use the repeat tile mode to sample an infinite checkerboard.

            constexpr SkTileMode kTileModes[2] = {SkTileMode::kRepeat, SkTileMode::kRepeat};

            return {SkFilterMode::kLinear, kTileModes};

        }


        WorkgroupSize calculateGlobalDispatchSize() const override {

            return WorkgroupSize(1, 1, 1);

        }

    } step2;


    DispatchGroup::Builder builder(recorder.get());

    builder.appendStep(&step1);

    builder.appendStep(&step2);


    sk_sp<TextureProxy> dst = builder.getSharedTextureResource(1);

    if (!dst) {

        ERRORF(reporter, "shared resource at slot 1 is missing");

        return;

    }


    // Record the compute task

    ComputeTask::DispatchGroupList groups;

    groups.push_back(builder.finalize());

    recorder->priv().add(ComputeTask::Make(std::move(groups)));


    // Submit the work and wait for it to complete.

    std::unique_ptr<Recording> recording = recorder->snap();

    if (!recording) {

        ERRORF(reporter, "Failed to make recording");

        return;

    }


    InsertRecordingInfo insertInfo;

    insertInfo.fRecording = recording.get();

    context->insertRecording(insertInfo);

    testContext->syncedSubmit(context);


    SkBitmap bitmap;

    SkImageInfo imgInfo =

            SkImageInfo::Make(kDstDim, kDstDim, kRGBA_8888_SkColorType, kUnpremul_SkAlphaType);

    bitmap.allocPixels(imgInfo);


    SkPixmap pixels;

    bool peekPixelsSuccess = bitmap.peekPixels(&pixels);

    REPORTER_ASSERT(reporter, peekPixelsSuccess);


    bool readPixelsSuccess = context->priv().readPixels(pixels, dst.get(), imgInfo, 0, 0);

    REPORTER_ASSERT(reporter, readPixelsSuccess);


    for (uint32_t x = 0; x < kDstDim; ++x) {

        for (uint32_t y = 0; y < kDstDim; ++y) {

            SkColor4f color = pixels.getColor4f(x, y);

            REPORTER_ASSERT(reporter, color.fR > 0.49 && color.fR < 0.51,

                            "At position {%u, %u}, "

                            "expected red channel in range [0.49, 0.51], "

                            "found {%.3f}",

                            x, y, color.fR);

        }

    }

}


// TODO(b/260622403): The shader tested here is identical to

// `resources/sksl/compute/AtomicsOperations.compute`. It would be nice to be able to exercise SkSL

// features like this as part of SkSLTest.cpp instead of as a graphite test.

// TODO(b/262427430, b/262429132): Enable this test on other backends once they all support

// compute programs.

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_AtomicOperationsTest,

                                              reporter,

                                              context,

                                              testContext) {

    // This fails on Dawn D3D11, b/315834710

    if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {

        return;

    }


    std::unique_ptr<Recorder> recorder = context->makeRecorder();


    constexpr uint32_t kWorkgroupCount = 32;

    constexpr uint32_t kWorkgroupSize = 256;


    class TestComputeStep : public ComputeStep {

    public:

        TestComputeStep() : ComputeStep(

                /*name=*/"TestAtomicOperations",

                /*localDispatchSize=*/{kWorkgroupSize, 1, 1},

                /*resources=*/{

                    {

                        /*type=*/ResourceType::kStorageBuffer,

                        /*flow=*/DataFlow::kShared,

                        /*policy=*/ResourcePolicy::kMapped,

                        /*slot=*/0,

                        /*sksl=*/"ssbo { atomicUint globalCounter; }",

                    }

                }) {}

        ~TestComputeStep() override = default;


        // A kernel that increments a global (device memory) counter across multiple workgroups.

        // Each workgroup maintains its own independent tally in a workgroup-shared counter which

        // is then added to the global count.

        //

        // This exercises atomic store/load/add and coherent reads and writes over memory in storage

        // and workgroup address spaces.

        std::string computeSkSL() const override {

            return R"(

                workgroup atomicUint localCounter;


                void main() {

                    // Initialize the local counter.

                    if (sk_LocalInvocationID.x == 0) {

                        atomicStore(localCounter, 0);

                    }


                    // Synchronize the threads in the workgroup so they all see the initial value.

                    workgroupBarrier();


                    // All threads increment the counter.

                    atomicAdd(localCounter, 1);


                    // Synchronize the threads again to ensure they have all executed the increment

                    // and the following load reads the same value across all threads in the

                    // workgroup.

                    workgroupBarrier();


                    // Add the workgroup-only tally to the global counter.

                    if (sk_LocalInvocationID.x == 0) {

                        atomicAdd(globalCounter, atomicLoad(localCounter));

                    }

                }

            )";

        }


        size_t calculateBufferSize(int index, const ResourceDesc& r) const override {

            SkASSERT(index == 0);

            SkASSERT(r.fSlot == 0);

            SkASSERT(r.fFlow == DataFlow::kShared);

            return sizeof(uint32_t);

        }


        WorkgroupSize calculateGlobalDispatchSize() const override {

            return WorkgroupSize(kWorkgroupCount, 1, 1);

        }


        void prepareStorageBuffer(int resourceIndex,

                                  const ResourceDesc& r,

                                  void* buffer,

                                  size_t bufferSize) const override {

            SkASSERT(resourceIndex == 0);

            *static_cast<uint32_t*>(buffer) = 0;

        }

    } step;


    DispatchGroup::Builder builder(recorder.get());

    builder.appendStep(&step);


    BindBufferInfo info = builder.getSharedBufferResource(0);

    if (!info) {

        ERRORF(reporter, "shared resource at slot 0 is missing");

        return;

    }


    // Record the compute pass task.

    ComputeTask::DispatchGroupList groups;

    groups.push_back(builder.finalize());

    recorder->priv().add(ComputeTask::Make(std::move(groups)));


    // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.

    auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer);


    // Submit the work and wait for it to complete.

    std::unique_ptr<Recording> recording = recorder->snap();

    if (!recording) {

        ERRORF(reporter, "Failed to make recording");

        return;

    }


    InsertRecordingInfo insertInfo;

    insertInfo.fRecording = recording.get();

    context->insertRecording(insertInfo);

    testContext->syncedSubmit(context);


    // Verify the contents of the output buffer.

    constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;

    const uint32_t result = static_cast<const uint32_t*>(

            map_buffer(context, testContext, buffer.get(), info.fOffset))[0];

    REPORTER_ASSERT(reporter,

                    result == kExpectedCount,

                    "expected '%u', found '%u'",

                    kExpectedCount,

                    result);

}


// TODO(b/260622403): The shader tested here is identical to

// `resources/sksl/compute/AtomicsOperationsOverArrayAndStruct.compute`. It would be nice to be able

// to exercise SkSL features like this as part of SkSLTest.cpp instead of as a graphite test.

// TODO(b/262427430, b/262429132): Enable this test on other backends once they all support

// compute programs.

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_AtomicOperationsOverArrayAndStructTest,

                                              reporter,

                                              context,

                                              testContext) {

    // This fails on Dawn D3D11, b/315834710

    if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {

        return;

    }


    std::unique_ptr<Recorder> recorder = context->makeRecorder();


    constexpr uint32_t kWorkgroupCount = 32;

    constexpr uint32_t kWorkgroupSize = 256;


    class TestComputeStep : public ComputeStep {

    public:

        TestComputeStep() : ComputeStep(

                /*name=*/"TestAtomicOperationsOverArrayAndStruct",

                /*localDispatchSize=*/{kWorkgroupSize, 1, 1},

                /*resources=*/{

                    {

                        /*type=*/ResourceType::kStorageBuffer,

                        /*flow=*/DataFlow::kShared,

                        /*policy=*/ResourcePolicy::kMapped,

                        /*slot=*/0,

                        /*sksl=*/"ssbo {\n"

                            "   atomicUint globalCountsFirstHalf;\n"

                            "   atomicUint globalCountsSecondHalf;\n"

                            "}\n"

                    }

                }) {}

        ~TestComputeStep() override = default;


        // Construct a kernel that increments a two global (device memory) counters across multiple

        // workgroups. Each workgroup maintains its own independent tallies in workgroup-shared

        // counters which are then added to the global counts.

        //

        // This exercises atomic store/load/add and coherent reads and writes over memory in storage

        // and workgroup address spaces.

        std::string computeSkSL() const override {

            return R"(

                const uint WORKGROUP_SIZE = 256;


                workgroup atomicUint localCounts[2];


                void main() {

                    // Initialize the local counts.

                    if (sk_LocalInvocationID.x == 0) {

                        atomicStore(localCounts[0], 0);

                        atomicStore(localCounts[1], 0);

                    }


                    // Synchronize the threads in the workgroup so they all see the initial value.

                    workgroupBarrier();


                    // Each thread increments one of the local counters based on its invocation

                    // index.

                    uint idx = sk_LocalInvocationID.x < (WORKGROUP_SIZE / 2) ? 0 : 1;

                    atomicAdd(localCounts[idx], 1);


                    // Synchronize the threads again to ensure they have all executed the increments

                    // and the following load reads the same value across all threads in the

                    // workgroup.

                    workgroupBarrier();


                    // Add the workgroup-only tally to the global counter.

                    if (sk_LocalInvocationID.x == 0) {

                        atomicAdd(globalCountsFirstHalf, atomicLoad(localCounts[0]));

                        atomicAdd(globalCountsSecondHalf, atomicLoad(localCounts[1]));

                    }

                }

            )";

        }


        size_t calculateBufferSize(int index, const ResourceDesc& r) const override {

            SkASSERT(index == 0);

            SkASSERT(r.fSlot == 0);

            SkASSERT(r.fFlow == DataFlow::kShared);

            return 2 * sizeof(uint32_t);

        }


        WorkgroupSize calculateGlobalDispatchSize() const override {

            return WorkgroupSize(kWorkgroupCount, 1, 1);

        }


        void prepareStorageBuffer(int resourceIndex,

                                  const ResourceDesc& r,

                                  void* buffer,

                                  size_t bufferSize) const override {

            SkASSERT(resourceIndex == 0);

            uint32_t* data = static_cast<uint32_t*>(buffer);

            data[0] = 0;

            data[1] = 0;

        }

    } step;


    DispatchGroup::Builder builder(recorder.get());

    builder.appendStep(&step);


    BindBufferInfo info = builder.getSharedBufferResource(0);

    if (!info) {

        ERRORF(reporter, "shared resource at slot 0 is missing");

        return;

    }


    // Record the compute pass task.

    ComputeTask::DispatchGroupList groups;

    groups.push_back(builder.finalize());

    recorder->priv().add(ComputeTask::Make(std::move(groups)));


    // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.

    auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer);


    // Submit the work and wait for it to complete.

    std::unique_ptr<Recording> recording = recorder->snap();

    if (!recording) {

        ERRORF(reporter, "Failed to make recording");

        return;

    }


    InsertRecordingInfo insertInfo;

    insertInfo.fRecording = recording.get();

    context->insertRecording(insertInfo);

    testContext->syncedSubmit(context);


    // Verify the contents of the output buffer.

    constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize / 2;


    const uint32_t* ssboData = static_cast<const uint32_t*>(

            map_buffer(context, testContext, buffer.get(), info.fOffset));

    const uint32_t firstHalfCount = ssboData[0];

    const uint32_t secondHalfCount = ssboData[1];

    REPORTER_ASSERT(reporter,

                    firstHalfCount == kExpectedCount,

                    "expected '%u', found '%u'",

                    kExpectedCount,

                    firstHalfCount);

    REPORTER_ASSERT(reporter,

                    secondHalfCount == kExpectedCount,

                    "expected '%u', found '%u'",

                    kExpectedCount,

                    secondHalfCount);

}


DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_ClearedBuffer,

                                              reporter,

                                              context,

                                              testContext) {

    constexpr uint32_t kProblemSize = 512;


    // The ComputeStep packs kProblemSize floats into kProblemSize / 4 vectors and each thread

    // processes 1 vector at a time.

    constexpr uint32_t kWorkgroupSize = kProblemSize / 4;


    std::unique_ptr<Recorder> recorder = context->makeRecorder();


    // The ComputeStep requests an unmapped buffer that is zero-initialized. It writes the output to

    // a mapped buffer which test verifies.

    class TestComputeStep : public ComputeStep {

    public:

        TestComputeStep() : ComputeStep(

                /*name=*/"TestClearedBuffer",

                /*localDispatchSize=*/{kWorkgroupSize, 1, 1},

                /*resources=*/{

                    // Zero initialized input buffer

                    {

                        // TODO(b/299979165): Declare this binding as read-only.

                        /*type=*/ResourceType::kStorageBuffer,

                        /*flow=*/DataFlow::kPrivate,

                        /*policy=*/ResourcePolicy::kClear,

                        /*sksl=*/"inputBlock { uint4 in_data[]; }\n",

                    },

                    // Output buffer:

                    {

                        /*type=*/ResourceType::kStorageBuffer,

                        /*flow=*/DataFlow::kShared,  // shared to allow us to access it from the

                                                     // Builder

                        /*policy=*/ResourcePolicy::kMapped,  // mappable for read-back

                        /*slot=*/0,

                        /*sksl=*/"outputBlock { uint4 out_data[]; }\n",

                    }

                }) {}

        ~TestComputeStep() override = default;


        std::string computeSkSL() const override {

            return R"(

                void main() {

                    out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x];

                }

            )";

        }


        size_t calculateBufferSize(int index, const ResourceDesc& r) const override {

            return sizeof(uint32_t) * kProblemSize;

        }


        void prepareStorageBuffer(int resourceIndex,

                                  const ResourceDesc& r,

                                  void* buffer,

                                  size_t bufferSize) const override {

            // Should receive this call only for the mapped buffer.

            SkASSERT(resourceIndex == 1);

        }


        WorkgroupSize calculateGlobalDispatchSize() const override {

            return WorkgroupSize(1, 1, 1);

        }

    } step;


    DispatchGroup::Builder builder(recorder.get());

    if (!builder.appendStep(&step)) {

        ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");

        return;

    }


    // The output buffer should have been placed in the right output slot.

    BindBufferInfo outputInfo = builder.getSharedBufferResource(0);

    if (!outputInfo) {

        ERRORF(reporter, "Failed to allocate an output buffer at slot 0");

        return;

    }


    // Record the compute task

    ComputeTask::DispatchGroupList groups;

    groups.push_back(builder.finalize());

    recorder->priv().add(ComputeTask::Make(std::move(groups)));


    // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.

    auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.fBuffer);


    // Submit the work and wait for it to complete.

    std::unique_ptr<Recording> recording = recorder->snap();

    if (!recording) {

        ERRORF(reporter, "Failed to make recording");

        return;

    }


    InsertRecordingInfo insertInfo;

    insertInfo.fRecording = recording.get();

    context->insertRecording(insertInfo);

    testContext->syncedSubmit(context);


    // Verify the contents of the output buffer.

    uint32_t* outData = static_cast<uint32_t*>(

            map_buffer(context, testContext, outputBuffer.get(), outputInfo.fOffset));

    SkASSERT(outputBuffer->isMapped() && outData != nullptr);

    for (unsigned int i = 0; i < kProblemSize; ++i) {

        const uint32_t found = outData[i];

        REPORTER_ASSERT(reporter, 0u == found, "expected '0u', found '%u'", found);

    }

}


DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_ClearOrdering,

                                              reporter,

                                              context,

                                              testContext) {

    // Initiate two independent DispatchGroups operating on the same buffer. The first group

    // writes garbage to the buffer and the second group copies the contents to an output buffer.

    // This test validates that the reads, writes, and clear occur in the expected order.

    constexpr uint32_t kWorkgroupSize = 64;


    // Initialize buffer with non-zero data.

    class FillWithGarbage : public ComputeStep {

    public:

        FillWithGarbage() : ComputeStep(

                /*name=*/"FillWithGarbage",

                /*localDispatchSize=*/{kWorkgroupSize, 1, 1},

                /*resources=*/{

                    {

                        /*type=*/ResourceType::kStorageBuffer,

                        /*flow=*/DataFlow::kShared,

                        /*policy=*/ResourcePolicy::kNone,

                        /*slot=*/0,

                        /*sksl=*/"outputBlock { uint4 out_data[]; }\n",

                    }

                }) {}

        ~FillWithGarbage() override = default;


        std::string computeSkSL() const override {

            return R"(

                void main() {

                    out_data[sk_GlobalInvocationID.x] = uint4(0xFE);

                }

            )";

        }

    } garbageStep;


    // Second stage just copies the data to a destination buffer. This is only to verify that this

    // stage, issued in a separate DispatchGroup, observes the clear.

    class CopyBuffer : public ComputeStep {

    public:

        CopyBuffer() : ComputeStep(

                /*name=*/"CopyBuffer",

                /*localDispatchSize=*/{kWorkgroupSize, 1, 1},

                /*resources=*/{

                    {

                        /*type=*/ResourceType::kStorageBuffer,

                        /*flow=*/DataFlow::kShared,

                        /*policy=*/ResourcePolicy::kNone,

                        /*slot=*/0,

                        /*sksl=*/"inputBlock { uint4 in_data[]; }\n",

                    },

                    {

                        /*type=*/ResourceType::kStorageBuffer,

                        /*flow=*/DataFlow::kShared,

                        /*policy=*/ResourcePolicy::kNone,

                        /*slot=*/1,

                        /*sksl=*/"outputBlock { uint4 out_data[]; }\n",

                    }

                }) {}

        ~CopyBuffer() override = default;


        std::string computeSkSL() const override {

            return R"(

                void main() {

                    out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x];

                }

            )";

        }

    } copyStep;


    std::unique_ptr<Recorder> recorder = context->makeRecorder();

    DispatchGroup::Builder builder(recorder.get());


    constexpr size_t kElementCount = 4 * kWorkgroupSize;

    constexpr size_t kBufferSize = sizeof(uint32_t) * kElementCount;

    auto input = recorder->priv().drawBufferManager()->getStorage(kBufferSize);

    auto [_, output] = recorder->priv().drawBufferManager()->getStoragePointer(kBufferSize);


    ComputeTask::DispatchGroupList groups;


    // First group.

    builder.assignSharedBuffer({input, kBufferSize}, 0);

    builder.appendStep(&garbageStep, {{1, 1, 1}});

    groups.push_back(builder.finalize());


    // Second group.

    builder.reset();

    builder.assignSharedBuffer({input, kBufferSize}, 0, ClearBuffer::kYes);

    builder.assignSharedBuffer({output, kBufferSize}, 1);

    builder.appendStep(&copyStep, {{1, 1, 1}});

    groups.push_back(builder.finalize());


    recorder->priv().add(ComputeTask::Make(std::move(groups)));

    // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.

    auto outputBuffer = sync_buffer_to_cpu(recorder.get(), output.fBuffer);


    // Submit the work and wait for it to complete.

    std::unique_ptr<Recording> recording = submit_recording(context, testContext, recorder.get());

    if (!recording) {

        ERRORF(reporter, "Failed to make recording");

        return;

    }


    // Verify the contents of the output buffer.

    uint32_t* outData = static_cast<uint32_t*>(

            map_buffer(context, testContext, outputBuffer.get(), output.fOffset));

    SkASSERT(outputBuffer->isMapped() && outData != nullptr);

    for (unsigned int i = 0; i < kElementCount; ++i) {

        const uint32_t found = outData[i];

        REPORTER_ASSERT(reporter, 0u == found, "expected '0u', found '%u'", found);

    }

}


DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_ClearOrderingScratchBuffers,

                                              reporter,

                                              context,

                                              testContext) {

    // This test is the same as the ClearOrdering test but the two stages write to a recycled

    // ScratchBuffer. This is primarily to test ScratchBuffer reuse.

    constexpr uint32_t kWorkgroupSize = 64;


    // Initialize buffer with non-zero data.

    class FillWithGarbage : public ComputeStep {

    public:

        FillWithGarbage() : ComputeStep(

                /*name=*/"FillWithGarbage",

                /*localDispatchSize=*/{kWorkgroupSize, 1, 1},

                /*resources=*/{

                    {

                        /*type=*/ResourceType::kStorageBuffer,

                        /*flow=*/DataFlow::kShared,

                        /*policy=*/ResourcePolicy::kNone,

                        /*slot=*/0,

                        /*sksl=*/"outputBlock { uint4 out_data[]; }\n",

                    }

                }) {}

        ~FillWithGarbage() override = default;


        std::string computeSkSL() const override {

            return R"(

                void main() {

                    out_data[sk_GlobalInvocationID.x] = uint4(0xFE);

                }

            )";

        }

    } garbageStep;


    // Second stage just copies the data to a destination buffer. This is only to verify that this

    // stage (issued in a separate DispatchGroup) sees the changes.

    class CopyBuffer : public ComputeStep {

    public:

        CopyBuffer() : ComputeStep(

                /*name=*/"CopyBuffer",

                /*localDispatchSize=*/{kWorkgroupSize, 1, 1},

                /*resources=*/{

                    {

                        /*type=*/ResourceType::kStorageBuffer,

                        /*flow=*/DataFlow::kShared,

                        /*policy=*/ResourcePolicy::kNone,

                        /*slot=*/0,

                        /*sksl=*/"inputBlock { uint4 in_data[]; }\n",

                    },

                    {

                        /*type=*/ResourceType::kStorageBuffer,

                        /*flow=*/DataFlow::kShared,

                        /*policy=*/ResourcePolicy::kNone,

                        /*slot=*/1,

                        /*sksl=*/"outputBlock { uint4 out_data[]; }\n",

                    }

                }) {}

        ~CopyBuffer() override = default;


        std::string computeSkSL() const override {

            return R"(

                void main() {

                    out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x];

                }

            )";

        }

    } copyStep;


    std::unique_ptr<Recorder> recorder = context->makeRecorder();

    DispatchGroup::Builder builder(recorder.get());


    constexpr size_t kElementCount = 4 * kWorkgroupSize;

    constexpr size_t kBufferSize = sizeof(uint32_t) * kElementCount;

    auto [_, output] = recorder->priv().drawBufferManager()->getStoragePointer(kBufferSize);


    ComputeTask::DispatchGroupList groups;


    // First group.

    {

        auto scratch = recorder->priv().drawBufferManager()->getScratchStorage(kBufferSize);

        auto input = scratch.suballocate(kBufferSize);

        builder.assignSharedBuffer({input, kBufferSize}, 0);


        // `scratch` returns to the scratch buffer pool when it goes out of scope

    }

    builder.appendStep(&garbageStep, {{1, 1, 1}});

    groups.push_back(builder.finalize());


    // Second group.

    builder.reset();

    {

        auto scratch = recorder->priv().drawBufferManager()->getScratchStorage(kBufferSize);

        auto input = scratch.suballocate(kBufferSize);

        builder.assignSharedBuffer({input, kBufferSize}, 0, ClearBuffer::kYes);

    }

    builder.assignSharedBuffer({output, kBufferSize}, 1);

    builder.appendStep(&copyStep, {{1, 1, 1}});

    groups.push_back(builder.finalize());


    recorder->priv().add(ComputeTask::Make(std::move(groups)));

    // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.

    auto outputBuffer = sync_buffer_to_cpu(recorder.get(), output.fBuffer);


    // Submit the work and wait for it to complete.

    std::unique_ptr<Recording> recording = submit_recording(context, testContext, recorder.get());

    if (!recording) {

        ERRORF(reporter, "Failed to make recording");

        return;

    }


    // Verify the contents of the output buffer.

    uint32_t* outData = static_cast<uint32_t*>(

            map_buffer(context, testContext, outputBuffer.get(), output.fOffset));

    SkASSERT(outputBuffer->isMapped() && outData != nullptr);

    for (unsigned int i = 0; i < kElementCount; ++i) {

        const uint32_t found = outData[i];

        REPORTER_ASSERT(reporter, 0u == found, "expected '0u', found '%u'", found);

    }

}


DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_IndirectDispatch,

                                              reporter,

                                              context,

                                              testContext) {

    // This fails on Dawn D3D11, b/315834710

    if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {

        return;

    }


    std::unique_ptr<Recorder> recorder = context->makeRecorder();


    constexpr uint32_t kWorkgroupCount = 32;

    constexpr uint32_t kWorkgroupSize = 64;


    // `IndirectStep` populates a buffer with the global workgroup count for `CountStep`.

    // `CountStep` is recorded using `DispatchGroup::appendStepIndirect()` and its workgroups get

    // dispatched according to the values computed by `IndirectStep` on the GPU.

    class IndirectStep : public ComputeStep {

    public:

        IndirectStep()

                : ComputeStep(

                          /*name=*/"TestIndirectDispatch_IndirectStep",

                          /*localDispatchSize=*/{kWorkgroupSize, 1, 1},

                          /*resources=*/

                          {{

                                  /*type=*/ResourceType::kIndirectBuffer,

                                  /*flow=*/DataFlow::kShared,

                                  /*policy=*/ResourcePolicy::kClear,

                                  /*slot=*/0,

                                  // TODO(armansito): Ideally the SSBO would have a single member of

                                  // type `IndirectDispatchArgs` struct type. SkSL modules don't

                                  // support struct declarations so this is currently not possible.

                                  /*sksl=*/"ssbo { uint indirect[]; }",

                          }}) {}

        ~IndirectStep() override = default;


        // Kernel that specifies a workgroup size of `kWorkgroupCount` to be used by the indirect

        // dispatch.

        std::string computeSkSL() const override {

            return R"(

                // This needs to match `kWorkgroupCount` declared above.

                const uint kWorkgroupCount = 32;


                void main() {

                    if (sk_LocalInvocationID.x == 0) {

                        indirect[0] = kWorkgroupCount;

                        indirect[1] = 1;

                        indirect[2] = 1;

                    }

                }

            )";

        }


        size_t calculateBufferSize(int index, const ResourceDesc& r) const override {

            SkASSERT(index == 0);

            SkASSERT(r.fSlot == 0);

            SkASSERT(r.fFlow == DataFlow::kShared);

            return kIndirectDispatchArgumentSize;

        }


        WorkgroupSize calculateGlobalDispatchSize() const override {

            return WorkgroupSize(1, 1, 1);

        }

    } indirectStep;


    class CountStep : public ComputeStep {

    public:

        CountStep()

                : ComputeStep(

                          /*name=*/"TestIndirectDispatch_CountStep",

                          /*localDispatchSize=*/{kWorkgroupSize, 1, 1},

                          /*resources=*/

                          {{

                                  /*type=*/ResourceType::kStorageBuffer,

                                  /*flow=*/DataFlow::kShared,

                                  /*policy=*/ResourcePolicy::kMapped,

                                  /*slot=*/1,

                                  /*sksl=*/"ssbo { atomicUint globalCounter; }",

                          }}) {}

        ~CountStep() override = default;


        std::string computeSkSL() const override {

            return R"(

                workgroup atomicUint localCounter;


                void main() {

                    // Initialize the local counter.

                    if (sk_LocalInvocationID.x == 0) {

                        atomicStore(localCounter, 0);

                    }


                    // Synchronize the threads in the workgroup so they all see the initial value.

                    workgroupBarrier();


                    // All threads increment the counter.

                    atomicAdd(localCounter, 1);


                    // Synchronize the threads again to ensure they have all executed the increment

                    // and the following load reads the same value across all threads in the

                    // workgroup.

                    workgroupBarrier();


                    // Add the workgroup-only tally to the global counter.

                    if (sk_LocalInvocationID.x == 0) {

                        atomicAdd(globalCounter, atomicLoad(localCounter));

                    }

                }

            )";

        }


        size_t calculateBufferSize(int index, const ResourceDesc& r) const override {

            SkASSERT(index == 0);

            SkASSERT(r.fSlot == 1);

            SkASSERT(r.fFlow == DataFlow::kShared);

            return sizeof(uint32_t);

        }


        void prepareStorageBuffer(int resourceIndex,

                                  const ResourceDesc& r,

                                  void* buffer,

                                  size_t bufferSize) const override {

            SkASSERT(resourceIndex == 0);

            *static_cast<uint32_t*>(buffer) = 0;

        }

    } countStep;


    DispatchGroup::Builder builder(recorder.get());

    builder.appendStep(&indirectStep);

    BindBufferInfo indirectBufferInfo = builder.getSharedBufferResource(0);

    if (!indirectBufferInfo) {

        ERRORF(reporter, "Shared resource at slot 0 is missing");

        return;

    }

    builder.appendStepIndirect(&countStep, {indirectBufferInfo, kIndirectDispatchArgumentSize});


    BindBufferInfo info = builder.getSharedBufferResource(1);

    if (!info) {

        ERRORF(reporter, "Shared resource at slot 1 is missing");

        return;

    }


    // Record the compute pass task.

    ComputeTask::DispatchGroupList groups;

    groups.push_back(builder.finalize());

    recorder->priv().add(ComputeTask::Make(std::move(groups)));


    // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.

    auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer);


    // Submit the work and wait for it to complete.

    std::unique_ptr<Recording> recording = recorder->snap();

    if (!recording) {

        ERRORF(reporter, "Failed to make recording");

        return;

    }


    InsertRecordingInfo insertInfo;

    insertInfo.fRecording = recording.get();

    context->insertRecording(insertInfo);

    testContext->syncedSubmit(context);


    // Verify the contents of the output buffer.

    constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;

    const uint32_t result = static_cast<const uint32_t*>(

            map_buffer(context, testContext, buffer.get(), info.fOffset))[0];

    REPORTER_ASSERT(reporter,

                    result == kExpectedCount,

                    "expected '%u', found '%u'",

                    kExpectedCount,

                    result);

}


DEF_GRAPHITE_TEST_FOR_METAL_CONTEXT(Compute_NativeShaderSourceMetal,

                                    reporter,

                                    context,

                                    testContext) {

    std::unique_ptr<Recorder> recorder = context->makeRecorder();


    constexpr uint32_t kWorkgroupCount = 32;

    constexpr uint32_t kWorkgroupSize = 1024;


    class TestComputeStep : public ComputeStep {

    public:

        TestComputeStep() : ComputeStep(

                /*name=*/"TestAtomicOperationsMetal",

                /*localDispatchSize=*/{kWorkgroupSize, 1, 1},

                /*resources=*/{

                    {

                        /*type=*/ResourceType::kStorageBuffer,

                        /*flow=*/DataFlow::kShared,

                        /*policy=*/ResourcePolicy::kMapped,

                        /*slot=*/0,

                    }

                },

                /*workgroupBuffers=*/{},

                /*baseFlags=*/Flags::kSupportsNativeShader) {}

        ~TestComputeStep() override = default;


        NativeShaderSource nativeShaderSource(NativeShaderFormat format) const override {

            SkASSERT(format == NativeShaderFormat::kMSL);

            static constexpr std::string_view kSource = R"(

                #include <metal_stdlib>


                using namespace metal;


                kernel void atomicCount(uint3 localId [[thread_position_in_threadgroup]],

                                        device atomic_uint& globalCounter [[buffer(0)]]) {

                    threadgroup atomic_uint localCounter;


                    // Initialize the local counter.

                    if (localId.x == 0u) {

                        atomic_store_explicit(&localCounter, 0u, memory_order_relaxed);

                    }


                    // Synchronize the threads in the workgroup so they all see the initial value.

                    threadgroup_barrier(mem_flags::mem_threadgroup);


                    // All threads increment the counter.

                    atomic_fetch_add_explicit(&localCounter, 1u, memory_order_relaxed);


                    // Synchronize the threads again to ensure they have all executed the increment

                    // and the following load reads the same value across all threads in the

                    // workgroup.

                    threadgroup_barrier(mem_flags::mem_threadgroup);


                    // Add the workgroup-only tally to the global counter.

                    if (localId.x == 0u) {

                        uint tally = atomic_load_explicit(&localCounter, memory_order_relaxed);

                        atomic_fetch_add_explicit(&globalCounter, tally, memory_order_relaxed);

                    }

                }

            )";

            return {kSource, "atomicCount"};

        }


        size_t calculateBufferSize(int index, const ResourceDesc& r) const override {

            SkASSERT(index == 0);

            SkASSERT(r.fSlot == 0);

            SkASSERT(r.fFlow == DataFlow::kShared);

            return sizeof(uint32_t);

        }


        WorkgroupSize calculateGlobalDispatchSize() const override {

            return WorkgroupSize(kWorkgroupCount, 1, 1);

        }


        void prepareStorageBuffer(int resourceIndex,

                                  const ResourceDesc& r,

                                  void* buffer,

                                  size_t bufferSize) const override {

            SkASSERT(resourceIndex == 0);

            *static_cast<uint32_t*>(buffer) = 0;

        }

    } step;


    DispatchGroup::Builder builder(recorder.get());

    builder.appendStep(&step);


    BindBufferInfo info = builder.getSharedBufferResource(0);

    if (!info) {

        ERRORF(reporter, "shared resource at slot 0 is missing");

        return;

    }


    // Record the compute pass task.

    ComputeTask::DispatchGroupList groups;

    groups.push_back(builder.finalize());

    recorder->priv().add(ComputeTask::Make(std::move(groups)));


    // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.

    auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer);


    // Submit the work and wait for it to complete.

    std::unique_ptr<Recording> recording = recorder->snap();

    if (!recording) {

        ERRORF(reporter, "Failed to make recording");

        return;

    }


    InsertRecordingInfo insertInfo;

    insertInfo.fRecording = recording.get();

    context->insertRecording(insertInfo);

    testContext->syncedSubmit(context);


    // Verify the contents of the output buffer.

    constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;

    const uint32_t result = static_cast<const uint32_t*>(

            map_buffer(context, testContext, buffer.get(), info.fOffset))[0];

    REPORTER_ASSERT(reporter,

                    result == kExpectedCount,

                    "expected '%u', found '%u'",

                    kExpectedCount,

                    result);

}


DEF_GRAPHITE_TEST_FOR_METAL_CONTEXT(Compute_WorkgroupBufferDescMetal,

                                    reporter,

                                    context,

                                    testContext) {

    std::unique_ptr<Recorder> recorder = context->makeRecorder();


    constexpr uint32_t kWorkgroupCount = 32;

    constexpr uint32_t kWorkgroupSize = 1024;


    class TestComputeStep : public ComputeStep {

    public:

        TestComputeStep() : ComputeStep(

                /*name=*/"TestAtomicOperationsMetal",

                /*localDispatchSize=*/{kWorkgroupSize, 1, 1},

                /*resources=*/{

                    {

                        /*type=*/ResourceType::kStorageBuffer,

                        /*flow=*/DataFlow::kShared,

                        /*policy=*/ResourcePolicy::kMapped,

                        /*slot=*/0,

                    }

                },

                /*workgroupBuffers=*/{

                    {

                        /*size=*/sizeof(uint32_t),

                        /*index=*/0u,

                    }

                },

                /*baseFlags=*/Flags::kSupportsNativeShader) {}

        ~TestComputeStep() override = default;


        // This is the same MSL kernel as in Compute_NativeShaderSourceMetal, except `localCounter`

        // is an entry-point parameter instead of a local variable. This forces the workgroup

        // binding to be encoded explicitly in the command encoder.

        NativeShaderSource nativeShaderSource(NativeShaderFormat format) const override {

            SkASSERT(format == NativeShaderFormat::kMSL);

            static constexpr std::string_view kSource = R"(

                #include <metal_stdlib>


                using namespace metal;


                kernel void atomicCount(uint3 localId [[thread_position_in_threadgroup]],

                                        device atomic_uint& globalCounter [[buffer(0)]],

                                        threadgroup atomic_uint& localCounter [[threadgroup(0)]]) {

                    // Initialize the local counter.

                    if (localId.x == 0u) {

                        atomic_store_explicit(&localCounter, 0u, memory_order_relaxed);

                    }


                    // Synchronize the threads in the workgroup so they all see the initial value.

                    threadgroup_barrier(mem_flags::mem_threadgroup);


                    // All threads increment the counter.

                    atomic_fetch_add_explicit(&localCounter, 1u, memory_order_relaxed);


                    // Synchronize the threads again to ensure they have all executed the increment

                    // and the following load reads the same value across all threads in the

                    // workgroup.

                    threadgroup_barrier(mem_flags::mem_threadgroup);


                    // Add the workgroup-only tally to the global counter.

                    if (localId.x == 0u) {

                        uint tally = atomic_load_explicit(&localCounter, memory_order_relaxed);

                        atomic_fetch_add_explicit(&globalCounter, tally, memory_order_relaxed);

                    }

                }

            )";

            return {kSource, "atomicCount"};

        }


        size_t calculateBufferSize(int index, const ResourceDesc& r) const override {

            SkASSERT(index == 0);

            SkASSERT(r.fSlot == 0);

            SkASSERT(r.fFlow == DataFlow::kShared);

            return sizeof(uint32_t);

        }


        WorkgroupSize calculateGlobalDispatchSize() const override {

            return WorkgroupSize(kWorkgroupCount, 1, 1);

        }


        void prepareStorageBuffer(int resourceIndex,

                                  const ResourceDesc& r,

                                  void* buffer,

                                  size_t bufferSize) const override {

            SkASSERT(resourceIndex == 0);

            *static_cast<uint32_t*>(buffer) = 0;

        }

    } step;


    DispatchGroup::Builder builder(recorder.get());

    builder.appendStep(&step);


    BindBufferInfo info = builder.getSharedBufferResource(0);

    if (!info) {

        ERRORF(reporter, "shared resource at slot 0 is missing");

        return;

    }


    // Record the compute pass task.

    ComputeTask::DispatchGroupList groups;

    groups.push_back(builder.finalize());

    recorder->priv().add(ComputeTask::Make(std::move(groups)));


    // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.

    auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer);


    // Submit the work and wait for it to complete.

    std::unique_ptr<Recording> recording = recorder->snap();

    if (!recording) {

        ERRORF(reporter, "Failed to make recording");

        return;

    }


    InsertRecordingInfo insertInfo;

    insertInfo.fRecording = recording.get();

    context->insertRecording(insertInfo);

    testContext->syncedSubmit(context);


    // Verify the contents of the output buffer.

    constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;

    const uint32_t result = static_cast<const uint32_t*>(

            map_buffer(context, testContext, buffer.get(), info.fOffset))[0];

    REPORTER_ASSERT(reporter,

                    result == kExpectedCount,

                    "expected '%u', found '%u'",

                    kExpectedCount,

                    result);

}


DEF_GRAPHITE_TEST_FOR_DAWN_CONTEXT(Compute_NativeShaderSourceWGSL, reporter, context, testContext) {

    // This fails on Dawn D3D11, b/315834710

    if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) {

        return;

    }


    std::unique_ptr<Recorder> recorder = context->makeRecorder();


    constexpr uint32_t kWorkgroupCount = 32;

    constexpr uint32_t kWorkgroupSize = 256;  // The WebGPU default workgroup size limit is 256


    class TestComputeStep : public ComputeStep {

    public:

        TestComputeStep() : ComputeStep(

                /*name=*/"TestAtomicOperationsWGSL",

                /*localDispatchSize=*/{kWorkgroupSize, 1, 1},

                /*resources=*/{

                    {

                        /*type=*/ResourceType::kStorageBuffer,

                        /*flow=*/DataFlow::kShared,

                        /*policy=*/ResourcePolicy::kMapped,

                        /*slot=*/0,

                    }

                },

                /*workgroupBuffers=*/{},

                /*baseFlags=*/Flags::kSupportsNativeShader) {}

        ~TestComputeStep() override = default;


        NativeShaderSource nativeShaderSource(NativeShaderFormat format) const override {

            SkASSERT(format == NativeShaderFormat::kWGSL);

            static constexpr std::string_view kSource = R"(

                @group(0) @binding(0) var<storage, read_write> globalCounter: atomic<u32>;


                var<workgroup> localCounter: atomic<u32>;


                @compute @workgroup_size(256)

                fn atomicCount(@builtin(local_invocation_id) localId: vec3u) {

                    // Initialize the local counter.

                    if localId.x == 0u {

                        atomicStore(&localCounter, 0u);

                    }


                    // Synchronize the threads in the workgroup so they all see the initial value.

                    workgroupBarrier();


                    // All threads increment the counter.

                    atomicAdd(&localCounter, 1u);


                    // Synchronize the threads again to ensure they have all executed the increment

                    // and the following load reads the same value across all threads in the

                    // workgroup.

                    workgroupBarrier();


                    // Add the workgroup-only tally to the global counter.

                    if localId.x == 0u {

                        let tally = atomicLoad(&localCounter);

                        atomicAdd(&globalCounter, tally);

                    }

                }

            )";

            return {kSource, "atomicCount"};

        }


        size_t calculateBufferSize(int index, const ResourceDesc& r) const override {

            SkASSERT(index == 0);

            SkASSERT(r.fSlot == 0);

            SkASSERT(r.fFlow == DataFlow::kShared);

            return sizeof(uint32_t);

        }


        WorkgroupSize calculateGlobalDispatchSize() const override {

            return WorkgroupSize(kWorkgroupCount, 1, 1);

        }


        void prepareStorageBuffer(int resourceIndex,

                                  const ResourceDesc& r,

                                  void* buffer,

                                  size_t bufferSize) const override {

            SkASSERT(resourceIndex == 0);

            *static_cast<uint32_t*>(buffer) = 0;

        }

    } step;


    DispatchGroup::Builder builder(recorder.get());

    builder.appendStep(&step);


    BindBufferInfo info = builder.getSharedBufferResource(0);

    if (!info) {

        ERRORF(reporter, "shared resource at slot 0 is missing");

        return;

    }


    // Record the compute pass task.

    ComputeTask::DispatchGroupList groups;

    groups.push_back(builder.finalize());

    recorder->priv().add(ComputeTask::Make(std::move(groups)));


    // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.

    auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer);


    // Submit the work and wait for it to complete.

    std::unique_ptr<Recording> recording = recorder->snap();

    if (!recording) {

        ERRORF(reporter, "Failed to make recording");

        return;

    }


    InsertRecordingInfo insertInfo;

    insertInfo.fRecording = recording.get();

    context->insertRecording(insertInfo);

    testContext->syncedSubmit(context);


    // Verify the contents of the output buffer.

    constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize;

    const uint32_t result = static_cast<const uint32_t*>(

            map_buffer(context, testContext, buffer.get(), info.fOffset))[0];

    REPORTER_ASSERT(reporter,

                    result == kExpectedCount,

                    "expected '%u', found '%u'",

                    kExpectedCount,

                    result);

}

step
static int step(int x, SkScalar min, SkScalar max)
Definition: BlurTest.cpp:215

Buffer.h

Caps.h

ComputePipelineDesc.h

ComputeStep.h

ComputeTask.h

DEF_GRAPHITE_TEST_FOR_METAL_CONTEXT
DEF_GRAPHITE_TEST_FOR_METAL_CONTEXT(Compute_NativeShaderSourceMetal, reporter, context, testContext)
Definition: ComputeTest.cpp:2272

DEF_GRAPHITE_TEST_FOR_DAWN_CONTEXT
DEF_GRAPHITE_TEST_FOR_DAWN_CONTEXT(Compute_NativeShaderSourceWGSL, reporter, context, testContext)
Definition: ComputeTest.cpp:2525

DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS
#define DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS( name, reporter, graphite_context, test_context)
Definition: ComputeTest.cpp:100

ComputeTypes.h

ContextPriv.h

Context.h

CopyTask.h

info
static void info(const char *fmt,...) SK_PRINTF_LIKE(1
Definition: DM.cpp:213

DispatchGroup.h

reporter
reporter
Definition: FontMgrTest.cpp:39

GraphiteTestContext.h

kUnpremul_SkAlphaType
kUnpremul_SkAlphaType
Definition: ReadPixBench.cpp:58

RecorderPriv.h

Recorder.h

Recording.h

ResourceProvider.h

SkASSERT
#define SkASSERT(cond)
Definition: SkAssert.h:116

SkBitmap.h

kRGBA_8888_SkColorType
@ kRGBA_8888_SkColorType
pixel with 8 bits for red, green, blue, alpha; in 32-bit word
Definition: SkColorType.h:24

SkColorSetARGB
static constexpr SkColor SkColorSetARGB(U8CPU a, U8CPU r, U8CPU g, U8CPU b)
Definition: SkColor.h:49

SK_ColorGREEN
constexpr SkColor SK_ColorGREEN
Definition: SkColor.h:131

sk_ref_sp
sk_sp< T > sk_ref_sp(T *obj)
Definition: SkRefCnt.h:381

SkSLType::kFloat
@ kFloat

SkFilterMode::kLinear
@ kLinear

kBufferSize
static const size_t kBufferSize
Definition: SkString.cpp:27

SkDEBUGCODE
SkDEBUGCODE(SK_SPI) SkThreadID SkGetThreadID()

SkTileMode
SkTileMode
Definition: SkTileMode.h:13

SkTileMode::kRepeat
@ kRepeat

SynchronizeToCpuTask.h

Test.h

REPORTER_ASSERT
#define REPORTER_ASSERT(r, cond,...)
Definition: Test.h:286

ERRORF
#define ERRORF(r,...)
Definition: Test.h:293

UniformManager.h

UploadTask.h

SkBitmap
Definition: SkBitmap.h:59

SkBitmap::allocPixels
void allocPixels(const SkImageInfo &info, size_t rowBytes)
Definition: SkBitmap.cpp:258

SkBitmap::peekPixels
bool peekPixels(SkPixmap *pixmap) const
Definition: SkBitmap.cpp:635

SkPixmap
Definition: SkPixmap.h:40

SkPixmap::rowBytes
size_t rowBytes() const
Definition: SkPixmap.h:145

SkPixmap::info
const SkImageInfo & info() const
Definition: SkPixmap.h:135

SkPixmap::getColor4f
SkColor4f getColor4f(int x, int y) const
Definition: SkPixmap.cpp:388

SkPixmap::writable_addr32
uint32_t * writable_addr32(int x, int y) const
Definition: SkPixmap.h:537

SkPixmap::addr
const void * addr() const
Definition: SkPixmap.h:153

SkSpan< float >

sk_sp
Definition: SkRefCnt.h:220

skgpu::graphite::Buffer
Definition: Buffer.h:17

skgpu::graphite::Caps::bufferMapsAreAsync
bool bufferMapsAreAsync() const
Definition: Caps.h:251

skgpu::graphite::Caps::getDefaultSampledTextureInfo
virtual TextureInfo getDefaultSampledTextureInfo(SkColorType, Mipmapped mipmapped, Protected, Renderable) const =0

skgpu::graphite::ComputeStep
Definition: ComputeStep.h:67

skgpu::graphite::ComputeStep::calculateSamplerParameters
virtual SamplerDesc calculateSamplerParameters(int resourceIndex, const ResourceDesc &) const
Definition: ComputeStep.cpp:83

skgpu::graphite::ComputeStep::computeSkSL
virtual std::string computeSkSL() const
Definition: ComputeStep.cpp:62

skgpu::graphite::ComputeStep::calculateGlobalDispatchSize
virtual WorkgroupSize calculateGlobalDispatchSize() const
Definition: ComputeStep.cpp:89

skgpu::graphite::ComputeStep::ResourceType::kReadOnlyStorageBuffer
@ kReadOnlyStorageBuffer

skgpu::graphite::ComputeStep::ResourceType::kIndirectBuffer
@ kIndirectBuffer

skgpu::graphite::ComputeStep::ResourceType::kWriteOnlyStorageTexture
@ kWriteOnlyStorageTexture

skgpu::graphite::ComputeStep::ResourceType::kSampledTexture
@ kSampledTexture

skgpu::graphite::ComputeStep::ResourceType::kUniformBuffer
@ kUniformBuffer

skgpu::graphite::ComputeStep::ResourceType::kReadOnlyTexture
@ kReadOnlyTexture

skgpu::graphite::ComputeStep::ResourceType::kStorageBuffer
@ kStorageBuffer

skgpu::graphite::ComputeStep::prepareStorageBuffer
virtual void prepareStorageBuffer(int resourceIndex, const ResourceDesc &resource, void *buffer, size_t bufferSize) const
Definition: ComputeStep.cpp:53

skgpu::graphite::ComputeStep::calculateTextureParameters
virtual std::tuple< SkISize, SkColorType > calculateTextureParameters(int resourceIndex, const ResourceDesc &) const
Definition: ComputeStep.cpp:77

skgpu::graphite::ComputeStep::ComputeStep
ComputeStep(std::string_view name, WorkgroupSize localDispatchSize, SkSpan< const ResourceDesc > resources, SkSpan< const WorkgroupBufferDesc > workgroupBuffers={}, Flags baseFlags=Flags::kNone)
Definition: ComputeStep.cpp:28

skgpu::graphite::ComputeStep::nativeShaderSource
virtual NativeShaderSource nativeShaderSource(NativeShaderFormat) const
Definition: ComputeStep.cpp:67

skgpu::graphite::ComputeStep::ResourcePolicy::kNone
@ kNone

skgpu::graphite::ComputeStep::ResourcePolicy::kMapped
@ kMapped

skgpu::graphite::ComputeStep::ResourcePolicy::kClear
@ kClear

skgpu::graphite::ComputeStep::DataFlow::kPrivate
@ kPrivate

skgpu::graphite::ComputeStep::DataFlow::kShared
@ kShared

skgpu::graphite::ComputeStep::Flags::kSupportsNativeShader
@ kSupportsNativeShader

skgpu::graphite::ComputeStep::prepareUniformBuffer
virtual void prepareUniformBuffer(int resourceIndex, const ResourceDesc &, UniformManager *) const
Definition: ComputeStep.cpp:58

skgpu::graphite::ComputeStep::calculateBufferSize
virtual size_t calculateBufferSize(int resourceIndex, const ResourceDesc &) const
Definition: ComputeStep.cpp:72

skgpu::graphite::ContextPriv::caps
const Caps * caps() const
Definition: ContextPriv.h:32

skgpu::graphite::Context
Definition: Context.h:43

skgpu::graphite::Context::makeRecorder
std::unique_ptr< Recorder > makeRecorder(const RecorderOptions &={})
Definition: Context.cpp:132

skgpu::graphite::Context::insertRecording
bool insertRecording(const InsertRecordingInfo &)
Definition: Context.cpp:156

skgpu::graphite::Context::priv
ContextPriv priv()
Definition: ContextPriv.h:85

skgpu::graphite::DispatchGroup::Builder
Definition: DispatchGroup.h:115

skgpu::graphite::RecorderPriv::caps
const Caps * caps() const
Definition: RecorderPriv.h:31

skgpu::graphite::RecorderPriv::resourceProvider
ResourceProvider * resourceProvider()
Definition: RecorderPriv.h:33

skgpu::graphite::RecorderPriv::add
void add(sk_sp< Task >)
Definition: Recorder.cpp:485

skgpu::graphite::Recorder
Definition: Recorder.h:75

skgpu::graphite::Recorder::priv
RecorderPriv priv()
Definition: RecorderPriv.h:106

skgpu::graphite::Recorder::snap
std::unique_ptr< Recording > snap()
Definition: Recorder.cpp:159

skgpu::graphite::ResourceProvider::findOrCreateBuffer
sk_sp< Buffer > findOrCreateBuffer(size_t size, BufferType type, AccessPattern, std::string_view label)
Definition: ResourceProvider.cpp:209

skgpu::graphite::UniformManager
Definition: UniformManager.h:198

skgpu::graphite::UniformManager::write
void write(float f)
Definition: UniformManager.h:209

skgpu::graphite::Uniform
Definition: Uniform.h:23

skgpu::graphite::UploadInstance
Definition: UploadTask.h:70

skia_private::STArray< 1, std::unique_ptr< DispatchGroup > >

skia_private::TArray::push_back
T & push_back()
Definition: SkTArray.h:205

skiatest::graphite::GraphiteTestContext
Definition: GraphiteTestContext.h:34

skiatest::graphite::GraphiteTestContext::tick
virtual void tick()
Definition: GraphiteTestContext.h:64

skiatest::graphite::GraphiteTestContext::syncedSubmit
void syncedSubmit(skgpu::graphite::Context *)
Definition: GraphiteTestContext.cpp:51

skiatest::graphite::GraphiteTestContext::contextType
virtual skgpu::ContextType contextType()=0

color
DlColor color
Definition: dl_golden_blur_unittests.cc:23

i
int i
Definition: fl_socket_accessible.cc:18

value
uint8_t value
Definition: fl_standard_message_codec.cc:36

result
GAsyncResult * result
Definition: fl_text_input_plugin.cc:106

format
uint32_t uint32_t * format
Definition: fl_texture_registrar_test.cc:41

texture
FlTexture * texture
Definition: mock_texture_registrar.cc:24

y
double y
Definition: mouse-input-test.cc:83

x
double x
Definition: mouse-input-test.cc:82

SkMultiPictureDocument::Make
SK_API sk_sp< SkDocument > Make(SkWStream *dst, const SkSerialProcs *=nullptr, std::function< void(const SkPicture *)> onEndPage=nullptr)
Definition: SkMultiPictureDocument.cpp:150

bitmap
Definition: bitmap.py:1

dart_profiler_symbols.output
output
Definition: dart_profiler_symbols.py:51

dependency.builder
builder
Definition: dependency.py:1

flutter::buffer
DEF_SWITCHES_START aot vmservice shared library Name of the *so containing AOT compiled Dart assets for launching the service isolate vm snapshot The VM snapshot data that will be memory mapped as read only SnapshotAssetPath must be present isolate snapshot The isolate snapshot data that will be memory mapped as read only SnapshotAssetPath must be present cache dir Path to the cache directory This is different from the persistent_cache_path in embedder which is used for Skia shader cache icu native lib Path to the library file that exports the ICU data vm service The hostname IP address on which the Dart VM Service should be served If not defaults to or::depending on whether ipv6 is specified vm service A custom Dart VM Service port The default is to pick a randomly available open port disable vm Disable the Dart VM Service The Dart VM Service is never available in release mode disable vm service Disable mDNS Dart VM Service publication Bind to the IPv6 localhost address for the Dart VM Service Ignored if vm service host is set endless trace buffer
Definition: switches.h:126

gn.compile_sksl_tests.inputs
inputs
Definition: compile_sksl_tests.py:24

gn.cp.dst
dst
Definition: cp.py:12

skgpu::graphite
Definition: BoundsManagerBench.cpp:27

skgpu::graphite::kIndirectDispatchArgumentSize
constexpr size_t kIndirectDispatchArgumentSize
Definition: ComputeTypes.h:24

skgpu::ContextType::kDawn_D3D11
@ kDawn_D3D11
Direct3D 12.

skgpu::Budgeted::kNo
@ kNo

skgpu::Renderable::kNo
@ kNo

skgpu::Mipmapped::kNo
@ kNo

skgpu::Protected::kNo
@ kNo

skiatest::graphite
Definition: gm.h:36

skiatest::GpuContextType
nullptr_t GpuContextType
Definition: Test.h:210

skiatest::IsMetalContextType
bool IsMetalContextType(skgpu::ContextType type)
Definition: DMGpuTestProcs.cpp:47

skiatest::IsDawnContextType
bool IsDawnContextType(skgpu::ContextType type)
Definition: DMGpuTestProcs.cpp:65

tools.skpbench.skpbench.float
float
Definition: skpbench.py:42

upload
Definition: upload.py:1

offset
SeparatedVector2 offset
Definition: stroke_path_geometry.cc:311

SkColor4f

SkIRect::MakeWH
static constexpr SkIRect MakeWH(int32_t w, int32_t h)
Definition: SkRect.h:56

SkImageInfo
Definition: SkImageInfo.h:208

SkImageInfo::colorInfo
const SkColorInfo & colorInfo() const
Definition: SkImageInfo.h:404

SkImageInfo::Make
static SkImageInfo Make(int width, int height, SkColorType ct, SkAlphaType at)
Definition: SkImageInfo.cpp:127

skgpu::graphite::BindBufferInfo
Definition: ResourceTypes.h:160

skgpu::graphite::BindBufferInfo::fOffset
size_t fOffset
Definition: ResourceTypes.h:162

skgpu::graphite::BindBufferInfo::fBuffer
const Buffer * fBuffer
Definition: ResourceTypes.h:161

skgpu::graphite::InsertRecordingInfo
Definition: GraphiteTypes.h:60

skgpu::graphite::InsertRecordingInfo::fRecording
Recording * fRecording
Definition: GraphiteTypes.h:61

skgpu::graphite::MipLevel
Definition: UploadTask.h:27

skgpu::graphite::MipLevel::fRowBytes
size_t fRowBytes
Definition: UploadTask.h:29

skgpu::graphite::MipLevel::fPixels
const void * fPixels
Definition: UploadTask.h:28

skgpu::graphite::SamplerDesc
Definition: ResourceTypes.h:212

skgpu::graphite::WorkgroupSize
Definition: ComputeTypes.h:48

data
std::shared_ptr< const fml::Mapping > data
Definition: texture_gles.cc:63