Flutter Engine
 
Loading...
Searching...
No Matches
gpu_tracer_vk.cc
Go to the documentation of this file.
1// Copyright 2013 The Flutter Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
6
7#include <memory>
8#include <optional>
9#include <thread>
10#include <utility>
11
12#include "fml/logging.h"
13#include "fml/trace_event.h"
17
18namespace impeller {
19
20static constexpr uint32_t kPoolSize = 128u;
21
22GPUTracerVK::GPUTracerVK(std::weak_ptr<ContextVK> context,
23 bool enable_gpu_tracing)
24 : context_(std::move(context)) {
25 if (!enable_gpu_tracing) {
26 return;
27 }
28 timestamp_period_ = context_.lock()
29 ->GetDeviceHolder()
30 ->GetPhysicalDevice()
31 .getProperties()
32 .limits.timestampPeriod;
33 if (timestamp_period_ <= 0) {
34 // The device does not support timestamp queries.
35 return;
36 }
37// Disable tracing in release mode.
38#ifdef IMPELLER_DEBUG
39 enabled_ = true;
40#endif // IMPELLER_DEBUG
41}
42
44 if (!enabled_) {
45 return;
46 }
47 Lock lock(trace_state_mutex_);
48 std::shared_ptr<CommandBuffer> buffer = context.CreateCommandBuffer();
49 CommandBufferVK& buffer_vk = CommandBufferVK::Cast(*buffer);
50
51 for (auto i = 0u; i < kTraceStatesSize; i++) {
52 vk::QueryPoolCreateInfo info;
53 info.queryCount = kPoolSize;
54 info.queryType = vk::QueryType::eTimestamp;
55
56 auto [status, pool] = context.GetDevice().createQueryPoolUnique(info);
57 if (status != vk::Result::eSuccess) {
58 VALIDATION_LOG << "Failed to create query pool.";
59 return;
60 }
61 trace_states_[i].query_pool = std::move(pool);
62 buffer_vk.GetCommandBuffer().resetQueryPool(
63 trace_states_[i].query_pool.get(), 0, kPoolSize);
64 }
65 if (!context.GetCommandQueue()->Submit({buffer}).ok()) {
66 VALIDATION_LOG << "Failed to reset query pool for trace events.";
67 enabled_ = false;
68 }
69}
70
72 return enabled_;
73}
74
76 if (!enabled_) {
77 return;
78 }
79 FML_DCHECK(!in_frame_);
80 in_frame_ = true;
81 raster_thread_id_ = std::this_thread::get_id();
82}
83
85 in_frame_ = false;
86
87 if (!enabled_) {
88 return;
89 }
90
91 Lock lock(trace_state_mutex_);
92 current_state_ = (current_state_ + 1) % kTraceStatesSize;
93
94 auto& state = trace_states_[current_state_];
95 // If there are still pending buffers on the trace state we're switching to,
96 // that means that a cmd buffer we were relying on to signal this likely
97 // never finished. This shouldn't happen unless there is a bug in the
98 // encoder logic. We set it to zero anyway to prevent a validation error
99 // from becoming a memory leak.
100 FML_DCHECK(state.pending_buffers == 0u);
101 state.pending_buffers = 0;
102 state.current_index = 0;
103}
104
105std::unique_ptr<GPUProbe> GPUTracerVK::CreateGPUProbe() {
106 return std::make_unique<GPUProbe>(weak_from_this());
107}
108
109void GPUTracerVK::RecordCmdBufferStart(const vk::CommandBuffer& buffer,
110 GPUProbe& probe) {
111 if (!enabled_ || std::this_thread::get_id() != raster_thread_id_ ||
112 !in_frame_) {
113 return;
114 }
115 Lock lock(trace_state_mutex_);
116 auto& state = trace_states_[current_state_];
117
118 // Reset previously completed queries.
119 if (!states_to_reset_.empty()) {
120 for (auto i = 0u; i < states_to_reset_.size(); i++) {
121 buffer.resetQueryPool(trace_states_[states_to_reset_[i]].query_pool.get(),
122 0, kPoolSize);
123 }
124 states_to_reset_.clear();
125 }
126
127 // We size the query pool to kPoolSize, but Flutter applications can create an
128 // unbounded amount of work per frame. If we encounter this, stop recording
129 // cmds.
130 if (state.current_index >= kPoolSize) {
131 return;
132 }
133
134 buffer.writeTimestamp(vk::PipelineStageFlagBits::eTopOfPipe,
135 trace_states_[current_state_].query_pool.get(),
136 state.current_index);
137 state.current_index += 1;
138 probe.index_ = current_state_;
139 state.pending_buffers += 1;
140}
141
142void GPUTracerVK::RecordCmdBufferEnd(const vk::CommandBuffer& buffer,
143 GPUProbe& probe) {
144 if (!enabled_ || std::this_thread::get_id() != raster_thread_id_ ||
145 !in_frame_ || !probe.index_.has_value()) {
146 return;
147 }
148 Lock lock(trace_state_mutex_);
149 GPUTraceState& state = trace_states_[current_state_];
150
151 if (state.current_index >= kPoolSize) {
152 return;
153 }
154
155 buffer.writeTimestamp(vk::PipelineStageFlagBits::eBottomOfPipe,
156 state.query_pool.get(), state.current_index);
157
158 state.current_index += 1;
159}
160
161void GPUTracerVK::OnFenceComplete(size_t frame_index) {
162 if (!enabled_) {
163 return;
164 }
165
166 size_t pending = 0;
167 size_t query_count = 0;
168 vk::QueryPool pool;
169 {
170 Lock lock(trace_state_mutex_);
171 GPUTraceState& state = trace_states_[frame_index];
172
173 FML_DCHECK(state.pending_buffers > 0);
174 state.pending_buffers -= 1;
175 pending = state.pending_buffers;
176 query_count = state.current_index;
177 pool = state.query_pool.get();
178 }
179
180 if (pending == 0) {
181 std::vector<uint64_t> bits(query_count);
182 std::shared_ptr<ContextVK> context = context_.lock();
183 if (!context) {
184 return;
185 }
186
187 auto result = context->GetDevice().getQueryPoolResults(
188 pool, 0, query_count, query_count * sizeof(uint64_t), bits.data(),
189 sizeof(uint64_t), vk::QueryResultFlagBits::e64);
190 // This may return VK_NOT_READY if the query couldn't be completed, or if
191 // there are queries still pending. From local testing, this happens
192 // occassionally on very expensive frames. Its unclear if we can do anything
193 // about this, because by design this should only signal after all cmd
194 // buffers have signaled. Adding VK_QUERY_RESULT_WAIT_BIT to the flags
195 // passed to getQueryPoolResults seems like it would fix this, but actually
196 // seems to result in more stuck query errors. Better to just drop them and
197 // move on.
198 if (result == vk::Result::eSuccess) {
199 uint64_t smallest_timestamp = std::numeric_limits<uint64_t>::max();
200 uint64_t largest_timestamp = 0;
201 for (auto i = 0u; i < bits.size(); i++) {
202 smallest_timestamp = std::min(smallest_timestamp, bits[i]);
203 largest_timestamp = std::max(largest_timestamp, bits[i]);
204 }
205 auto gpu_ms =
206 (((largest_timestamp - smallest_timestamp) * timestamp_period_) /
207 1000000);
208 FML_TRACE_COUNTER("flutter", "GPUTracer",
209 reinterpret_cast<int64_t>(this), // Trace Counter ID
210 "FrameTimeMS", gpu_ms);
211 }
212
213 // Record this query to be reset the next time a command is recorded.
214 Lock lock(trace_state_mutex_);
215 states_to_reset_.push_back(frame_index);
216 }
217}
218
219GPUProbe::GPUProbe(const std::weak_ptr<GPUTracerVK>& tracer)
220 : tracer_(tracer) {}
221
223 if (!index_.has_value()) {
224 return;
225 }
226 auto tracer = tracer_.lock();
227 if (!tracer) {
228 return;
229 }
230 tracer->OnFenceComplete(index_.value());
231}
232
233void GPUProbe::RecordCmdBufferStart(const vk::CommandBuffer& buffer) {
234 auto tracer = tracer_.lock();
235 if (!tracer) {
236 return;
237 }
238 tracer->RecordCmdBufferStart(buffer, *this);
239}
240
241void GPUProbe::RecordCmdBufferEnd(const vk::CommandBuffer& buffer) {
242 auto tracer = tracer_.lock();
243 if (!tracer) {
244 return;
245 }
246 tracer->RecordCmdBufferEnd(buffer, *this);
247}
248
249} // namespace impeller
static CommandBufferVK & Cast(CommandBuffer &base)
vk::CommandBuffer GetCommandBuffer() const
Retrieve the native command buffer from this object.
const vk::Device & GetDevice() const
std::shared_ptr< CommandBuffer > CreateCommandBuffer() const override
Create a new command buffer. Command buffers can be used to encode graphics, blit,...
std::shared_ptr< CommandQueue > GetCommandQueue() const override
Return the graphics queue for submitting command buffers.
void RecordCmdBufferStart(const vk::CommandBuffer &buffer)
Record a timestamp query into the provided cmd buffer to record start time.
GPUProbe(const std::weak_ptr< GPUTracerVK > &tracer)
void RecordCmdBufferEnd(const vk::CommandBuffer &buffer)
Record a timestamp query into the provided cmd buffer to record end time.
void MarkFrameStart()
Signal the start of a frame workload.
void MarkFrameEnd()
Signal the end of a frame workload.
std::unique_ptr< GPUProbe > CreateGPUProbe()
Create a GPUProbe to trace the execution of a command buffer on the GPU.
void InitializeQueryPool(const ContextVK &context)
Initialize the set of query pools.
GPUTracerVK(std::weak_ptr< ContextVK > context, bool enable_gpu_tracing)
#define FML_DCHECK(condition)
Definition logging.h:122
DEF_SWITCHES_START aot vmservice shared library Name of the *so containing AOT compiled Dart assets for launching the service isolate vm snapshot The VM snapshot data that will be memory mapped as read only SnapshotAssetPath must be present isolate snapshot The isolate snapshot data that will be memory mapped as read only SnapshotAssetPath must be present cache dir Path to the cache directory This is different from the persistent_cache_path in embedder which is used for Skia shader cache icu native lib Path to the library file that exports the ICU data vm service The hostname IP address on which the Dart VM Service should be served If not defaults to or::depending on whether ipv6 is specified disable vm Disable the Dart VM Service The Dart VM Service is never available in release mode Bind to the IPv6 localhost address for the Dart VM Service Ignored if vm service host is set profile Make the profiler discard new samples once the profiler sample buffer is full When this flag is not the profiler sample buffer is used as a ring buffer
Definition switch_defs.h:98
static constexpr uint32_t kPoolSize
Definition ref_ptr.h:261
#define FML_TRACE_COUNTER(category_group, name, counter_id, arg1,...)
Definition trace_event.h:85
#define VALIDATION_LOG
Definition validation.h:91