Flutter Engine
The Flutter Engine
gpu_tracer_vk.cc
Go to the documentation of this file.
1// Copyright 2013 The Flutter Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
6
7#include <memory>
8#include <optional>
9#include <thread>
10#include <utility>
11
12#include "fml/logging.h"
13#include "fml/trace_event.h"
18
19#include "vulkan/vulkan.hpp"
20
21namespace impeller {
22
23static constexpr uint32_t kPoolSize = 128u;
24
25GPUTracerVK::GPUTracerVK(std::weak_ptr<ContextVK> context,
26 bool enable_gpu_tracing)
27 : context_(std::move(context)) {
28 if (!enable_gpu_tracing) {
29 return;
30 }
31 timestamp_period_ = context_.lock()
32 ->GetDeviceHolder()
33 ->GetPhysicalDevice()
34 .getProperties()
35 .limits.timestampPeriod;
36 if (timestamp_period_ <= 0) {
37 // The device does not support timestamp queries.
38 return;
39 }
40// Disable tracing in release mode.
41#ifdef IMPELLER_DEBUG
42 enabled_ = true;
43#endif // IMPELLER_DEBUG
44}
45
47 if (!enabled_) {
48 return;
49 }
50 Lock lock(trace_state_mutex_);
51 std::shared_ptr<CommandBuffer> buffer = context.CreateCommandBuffer();
53
54 for (auto i = 0u; i < kTraceStatesSize; i++) {
55 vk::QueryPoolCreateInfo info;
56 info.queryCount = kPoolSize;
57 info.queryType = vk::QueryType::eTimestamp;
58
59 auto [status, pool] = context.GetDevice().createQueryPoolUnique(info);
60 if (status != vk::Result::eSuccess) {
61 VALIDATION_LOG << "Failed to create query pool.";
62 return;
63 }
64 trace_states_[i].query_pool = std::move(pool);
65 buffer_vk.GetEncoder()->GetCommandBuffer().resetQueryPool(
66 trace_states_[i].query_pool.get(), 0, kPoolSize);
67 }
68 if (!context.GetCommandQueue()->Submit({buffer}).ok()) {
69 VALIDATION_LOG << "Failed to reset query pool for trace events.";
70 enabled_ = false;
71 }
72}
73
75 return enabled_;
76}
77
79 if (!enabled_) {
80 return;
81 }
82 FML_DCHECK(!in_frame_);
83 in_frame_ = true;
84 raster_thread_id_ = std::this_thread::get_id();
85}
86
88 in_frame_ = false;
89
90 if (!enabled_) {
91 return;
92 }
93
94 Lock lock(trace_state_mutex_);
95 current_state_ = (current_state_ + 1) % kTraceStatesSize;
96
97 auto& state = trace_states_[current_state_];
98 // If there are still pending buffers on the trace state we're switching to,
99 // that means that a cmd buffer we were relying on to signal this likely
100 // never finished. This shouldn't happen unless there is a bug in the
101 // encoder logic. We set it to zero anyway to prevent a validation error
102 // from becoming a memory leak.
103 FML_DCHECK(state.pending_buffers == 0u);
104 state.pending_buffers = 0;
105 state.current_index = 0;
106}
107
108std::unique_ptr<GPUProbe> GPUTracerVK::CreateGPUProbe() {
109 return std::make_unique<GPUProbe>(weak_from_this());
110}
111
112void GPUTracerVK::RecordCmdBufferStart(const vk::CommandBuffer& buffer,
113 GPUProbe& probe) {
114 if (!enabled_ || std::this_thread::get_id() != raster_thread_id_ ||
115 !in_frame_) {
116 return;
117 }
118 Lock lock(trace_state_mutex_);
119 auto& state = trace_states_[current_state_];
120
121 // Reset previously completed queries.
122 if (!states_to_reset_.empty()) {
123 for (auto i = 0u; i < states_to_reset_.size(); i++) {
124 buffer.resetQueryPool(trace_states_[states_to_reset_[i]].query_pool.get(),
125 0, kPoolSize);
126 }
127 states_to_reset_.clear();
128 }
129
130 // We size the query pool to kPoolSize, but Flutter applications can create an
131 // unbounded amount of work per frame. If we encounter this, stop recording
132 // cmds.
133 if (state.current_index >= kPoolSize) {
134 return;
135 }
136
137 buffer.writeTimestamp(vk::PipelineStageFlagBits::eTopOfPipe,
138 trace_states_[current_state_].query_pool.get(),
139 state.current_index);
140 state.current_index += 1;
141 probe.index_ = current_state_;
142 state.pending_buffers += 1;
143}
144
145void GPUTracerVK::RecordCmdBufferEnd(const vk::CommandBuffer& buffer,
146 GPUProbe& probe) {
147 if (!enabled_ || std::this_thread::get_id() != raster_thread_id_ ||
148 !in_frame_ || !probe.index_.has_value()) {
149 return;
150 }
151 Lock lock(trace_state_mutex_);
152 GPUTraceState& state = trace_states_[current_state_];
153
154 if (state.current_index >= kPoolSize) {
155 return;
156 }
157
158 buffer.writeTimestamp(vk::PipelineStageFlagBits::eBottomOfPipe,
159 state.query_pool.get(), state.current_index);
160
161 state.current_index += 1;
162}
163
164void GPUTracerVK::OnFenceComplete(size_t frame_index) {
165 if (!enabled_) {
166 return;
167 }
168
169 size_t pending = 0;
170 size_t query_count = 0;
171 vk::QueryPool pool;
172 {
173 Lock lock(trace_state_mutex_);
174 GPUTraceState& state = trace_states_[frame_index];
175
176 FML_DCHECK(state.pending_buffers > 0);
177 state.pending_buffers -= 1;
178 pending = state.pending_buffers;
179 query_count = state.current_index;
180 pool = state.query_pool.get();
181 }
182
183 if (pending == 0) {
184 std::vector<uint64_t> bits(query_count);
185 std::shared_ptr<ContextVK> context = context_.lock();
186 if (!context) {
187 return;
188 }
189
190 auto result = context->GetDevice().getQueryPoolResults(
191 pool, 0, query_count, query_count * sizeof(uint64_t), bits.data(),
192 sizeof(uint64_t), vk::QueryResultFlagBits::e64);
193 // This may return VK_NOT_READY if the query couldn't be completed, or if
194 // there are queries still pending. From local testing, this happens
195 // occassionally on very expensive frames. Its unclear if we can do anything
196 // about this, because by design this should only signal after all cmd
197 // buffers have signaled. Adding VK_QUERY_RESULT_WAIT_BIT to the flags
198 // passed to getQueryPoolResults seems like it would fix this, but actually
199 // seems to result in more stuck query errors. Better to just drop them and
200 // move on.
201 if (result == vk::Result::eSuccess) {
202 uint64_t smallest_timestamp = std::numeric_limits<uint64_t>::max();
203 uint64_t largest_timestamp = 0;
204 for (auto i = 0u; i < bits.size(); i++) {
205 smallest_timestamp = std::min(smallest_timestamp, bits[i]);
206 largest_timestamp = std::max(largest_timestamp, bits[i]);
207 }
208 auto gpu_ms =
209 (((largest_timestamp - smallest_timestamp) * timestamp_period_) /
210 1000000);
211 FML_TRACE_COUNTER("flutter", "GPUTracer",
212 reinterpret_cast<int64_t>(this), // Trace Counter ID
213 "FrameTimeMS", gpu_ms);
214 }
215
216 // Record this query to be reset the next time a command is recorded.
217 Lock lock(trace_state_mutex_);
218 states_to_reset_.push_back(frame_index);
219 }
220}
221
222GPUProbe::GPUProbe(const std::weak_ptr<GPUTracerVK>& tracer)
223 : tracer_(tracer) {}
224
226 if (!index_.has_value()) {
227 return;
228 }
229 auto tracer = tracer_.lock();
230 if (!tracer) {
231 return;
232 }
233 tracer->OnFenceComplete(index_.value());
234}
235
236void GPUProbe::RecordCmdBufferStart(const vk::CommandBuffer& buffer) {
237 auto tracer = tracer_.lock();
238 if (!tracer) {
239 return;
240 }
241 tracer->RecordCmdBufferStart(buffer, *this);
242}
243
244void GPUProbe::RecordCmdBufferEnd(const vk::CommandBuffer& buffer) {
245 auto tracer = tracer_.lock();
246 if (!tracer) {
247 return;
248 }
249 tracer->RecordCmdBufferEnd(buffer, *this);
250}
251
252} // namespace impeller
AutoreleasePool pool
static void info(const char *fmt,...) SK_PRINTF_LIKE(1
Definition: DM.cpp:213
static CommandBufferVK & Cast(CommandBuffer &base)
Definition: backend_cast.h:13
const std::shared_ptr< CommandEncoderVK > & GetEncoder()
const vk::Device & GetDevice() const
Definition: context_vk.cc:513
std::shared_ptr< CommandBuffer > CreateCommandBuffer() const override
Create a new command buffer. Command buffers can be used to encode graphics, blit,...
Definition: context_vk.cc:502
std::shared_ptr< CommandQueue > GetCommandQueue() const override
Return the graphics queue for submitting command buffers.
Definition: context_vk.cc:577
void RecordCmdBufferStart(const vk::CommandBuffer &buffer)
Record a timestamp query into the provided cmd buffer to record start time.
GPUProbe(const std::weak_ptr< GPUTracerVK > &tracer)
void RecordCmdBufferEnd(const vk::CommandBuffer &buffer)
Record a timestamp query into the provided cmd buffer to record end time.
void MarkFrameStart()
Signal the start of a frame workload.
void MarkFrameEnd()
Signal the end of a frame workload.
std::unique_ptr< GPUProbe > CreateGPUProbe()
Create a GPUProbe to trace the execution of a command buffer on the GPU.
void InitializeQueryPool(const ContextVK &context)
Initialize the set of query pools.
GPUTracerVK(std::weak_ptr< ContextVK > context, bool enable_gpu_tracing)
bool IsEnabled() const
AtkStateType state
GAsyncResult * result
#define FML_DCHECK(condition)
Definition: logging.h:103
static float max(float r, float g, float b)
Definition: hsl.cpp:49
static float min(float r, float g, float b)
Definition: hsl.cpp:48
DEF_SWITCHES_START aot vmservice shared library Name of the *so containing AOT compiled Dart assets for launching the service isolate vm snapshot The VM snapshot data that will be memory mapped as read only SnapshotAssetPath must be present isolate snapshot The isolate snapshot data that will be memory mapped as read only SnapshotAssetPath must be present cache dir Path to the cache directory This is different from the persistent_cache_path in embedder which is used for Skia shader cache icu native lib Path to the library file that exports the ICU data vm service The hostname IP address on which the Dart VM Service should be served If not defaults to or::depending on whether ipv6 is specified vm service A custom Dart VM Service port The default is to pick a randomly available open port disable vm Disable the Dart VM Service The Dart VM Service is never available in release mode disable vm service Disable mDNS Dart VM Service publication Bind to the IPv6 localhost address for the Dart VM Service Ignored if vm service host is set endless trace buffer
Definition: switches.h:126
static constexpr uint32_t kPoolSize
Definition: ref_ptr.h:256
#define FML_TRACE_COUNTER(category_group, name, counter_id, arg1,...)
Definition: trace_event.h:85
#define VALIDATION_LOG
Definition: validation.h:73