Flutter Engine
The Flutter Engine
Loading...
Searching...
No Matches
gpu_tracer_vk.cc
Go to the documentation of this file.
1// Copyright 2013 The Flutter Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
6
7#include <memory>
8#include <optional>
9#include <thread>
10#include <utility>
11
12#include "fml/logging.h"
13#include "fml/trace_event.h"
18
19#include "vulkan/vulkan.hpp"
20
21namespace impeller {
22
23static constexpr uint32_t kPoolSize = 128u;
24
25GPUTracerVK::GPUTracerVK(std::weak_ptr<ContextVK> context,
26 bool enable_gpu_tracing)
27 : context_(std::move(context)) {
28 if (!enable_gpu_tracing) {
29 return;
30 }
31 timestamp_period_ = context_.lock()
32 ->GetDeviceHolder()
33 ->GetPhysicalDevice()
34 .getProperties()
35 .limits.timestampPeriod;
36 if (timestamp_period_ <= 0) {
37 // The device does not support timestamp queries.
38 return;
39 }
40// Disable tracing in release mode.
41#ifdef IMPELLER_DEBUG
42 enabled_ = true;
43#endif // IMPELLER_DEBUG
44}
45
47 if (!enabled_) {
48 return;
49 }
50 Lock lock(trace_state_mutex_);
51 std::shared_ptr<CommandBuffer> buffer = context.CreateCommandBuffer();
53
54 for (auto i = 0u; i < kTraceStatesSize; i++) {
55 vk::QueryPoolCreateInfo info;
56 info.queryCount = kPoolSize;
57 info.queryType = vk::QueryType::eTimestamp;
58
59 auto [status, pool] = context.GetDevice().createQueryPoolUnique(info);
60 if (status != vk::Result::eSuccess) {
61 VALIDATION_LOG << "Failed to create query pool.";
62 return;
63 }
64 trace_states_[i].query_pool = std::move(pool);
65 buffer_vk.GetEncoder()->GetCommandBuffer().resetQueryPool(
66 trace_states_[i].query_pool.get(), 0, kPoolSize);
67 }
68 if (!context.GetCommandQueue()->Submit({buffer}).ok()) {
69 VALIDATION_LOG << "Failed to reset query pool for trace events.";
70 enabled_ = false;
71 }
72}
73
75 return enabled_;
76}
77
79 if (!enabled_) {
80 return;
81 }
82 FML_DCHECK(!in_frame_);
83 in_frame_ = true;
84 raster_thread_id_ = std::this_thread::get_id();
85}
86
88 in_frame_ = false;
89
90 if (!enabled_) {
91 return;
92 }
93
94 Lock lock(trace_state_mutex_);
95 current_state_ = (current_state_ + 1) % kTraceStatesSize;
96
97 auto& state = trace_states_[current_state_];
98 // If there are still pending buffers on the trace state we're switching to,
99 // that means that a cmd buffer we were relying on to signal this likely
100 // never finished. This shouldn't happen unless there is a bug in the
101 // encoder logic. We set it to zero anyway to prevent a validation error
102 // from becoming a memory leak.
103 FML_DCHECK(state.pending_buffers == 0u);
104 state.pending_buffers = 0;
105 state.current_index = 0;
106}
107
108std::unique_ptr<GPUProbe> GPUTracerVK::CreateGPUProbe() {
109 return std::make_unique<GPUProbe>(weak_from_this());
110}
111
112void GPUTracerVK::RecordCmdBufferStart(const vk::CommandBuffer& buffer,
113 GPUProbe& probe) {
114 if (!enabled_ || std::this_thread::get_id() != raster_thread_id_ ||
115 !in_frame_) {
116 return;
117 }
118 Lock lock(trace_state_mutex_);
119 auto& state = trace_states_[current_state_];
120
121 // Reset previously completed queries.
122 if (!states_to_reset_.empty()) {
123 for (auto i = 0u; i < states_to_reset_.size(); i++) {
124 buffer.resetQueryPool(trace_states_[states_to_reset_[i]].query_pool.get(),
125 0, kPoolSize);
126 }
127 states_to_reset_.clear();
128 }
129
130 // We size the query pool to kPoolSize, but Flutter applications can create an
131 // unbounded amount of work per frame. If we encounter this, stop recording
132 // cmds.
133 if (state.current_index >= kPoolSize) {
134 return;
135 }
136
137 buffer.writeTimestamp(vk::PipelineStageFlagBits::eTopOfPipe,
138 trace_states_[current_state_].query_pool.get(),
139 state.current_index);
140 state.current_index += 1;
141 probe.index_ = current_state_;
142 state.pending_buffers += 1;
143}
144
145void GPUTracerVK::RecordCmdBufferEnd(const vk::CommandBuffer& buffer,
146 GPUProbe& probe) {
147 if (!enabled_ || std::this_thread::get_id() != raster_thread_id_ ||
148 !in_frame_ || !probe.index_.has_value()) {
149 return;
150 }
151 Lock lock(trace_state_mutex_);
152 GPUTraceState& state = trace_states_[current_state_];
153
154 if (state.current_index >= kPoolSize) {
155 return;
156 }
157
158 buffer.writeTimestamp(vk::PipelineStageFlagBits::eBottomOfPipe,
159 state.query_pool.get(), state.current_index);
160
161 state.current_index += 1;
162}
163
164void GPUTracerVK::OnFenceComplete(size_t frame_index) {
165 if (!enabled_) {
166 return;
167 }
168
169 size_t pending = 0;
170 size_t query_count = 0;
171 vk::QueryPool pool;
172 {
173 Lock lock(trace_state_mutex_);
174 GPUTraceState& state = trace_states_[frame_index];
175
176 FML_DCHECK(state.pending_buffers > 0);
177 state.pending_buffers -= 1;
178 pending = state.pending_buffers;
179 query_count = state.current_index;
180 pool = state.query_pool.get();
181 }
182
183 if (pending == 0) {
184 std::vector<uint64_t> bits(query_count);
185 std::shared_ptr<ContextVK> context = context_.lock();
186 if (!context) {
187 return;
188 }
189
190 auto result = context->GetDevice().getQueryPoolResults(
191 pool, 0, query_count, query_count * sizeof(uint64_t), bits.data(),
192 sizeof(uint64_t), vk::QueryResultFlagBits::e64);
193 // This may return VK_NOT_READY if the query couldn't be completed, or if
194 // there are queries still pending. From local testing, this happens
195 // occassionally on very expensive frames. Its unclear if we can do anything
196 // about this, because by design this should only signal after all cmd
197 // buffers have signaled. Adding VK_QUERY_RESULT_WAIT_BIT to the flags
198 // passed to getQueryPoolResults seems like it would fix this, but actually
199 // seems to result in more stuck query errors. Better to just drop them and
200 // move on.
201 if (result == vk::Result::eSuccess) {
202 uint64_t smallest_timestamp = std::numeric_limits<uint64_t>::max();
203 uint64_t largest_timestamp = 0;
204 for (auto i = 0u; i < bits.size(); i++) {
205 smallest_timestamp = std::min(smallest_timestamp, bits[i]);
206 largest_timestamp = std::max(largest_timestamp, bits[i]);
207 }
208 auto gpu_ms =
209 (((largest_timestamp - smallest_timestamp) * timestamp_period_) /
210 1000000);
211 FML_TRACE_COUNTER("flutter", "GPUTracer",
212 reinterpret_cast<int64_t>(this), // Trace Counter ID
213 "FrameTimeMS", gpu_ms);
214 }
215
216 // Record this query to be reset the next time a command is recorded.
217 Lock lock(trace_state_mutex_);
218 states_to_reset_.push_back(frame_index);
219 }
220}
221
222GPUProbe::GPUProbe(const std::weak_ptr<GPUTracerVK>& tracer)
223 : tracer_(tracer) {}
224
226 if (!index_.has_value()) {
227 return;
228 }
229 auto tracer = tracer_.lock();
230 if (!tracer) {
231 return;
232 }
233 tracer->OnFenceComplete(index_.value());
234}
235
236void GPUProbe::RecordCmdBufferStart(const vk::CommandBuffer& buffer) {
237 auto tracer = tracer_.lock();
238 if (!tracer) {
239 return;
240 }
241 tracer->RecordCmdBufferStart(buffer, *this);
242}
243
244void GPUProbe::RecordCmdBufferEnd(const vk::CommandBuffer& buffer) {
245 auto tracer = tracer_.lock();
246 if (!tracer) {
247 return;
248 }
249 tracer->RecordCmdBufferEnd(buffer, *this);
250}
251
252} // namespace impeller
AutoreleasePool pool
static void info(const char *fmt,...) SK_PRINTF_LIKE(1
Definition DM.cpp:213
static CommandBufferVK & Cast(CommandBuffer &base)
const std::shared_ptr< CommandEncoderVK > & GetEncoder()
const vk::Device & GetDevice() const
std::shared_ptr< CommandBuffer > CreateCommandBuffer() const override
Create a new command buffer. Command buffers can be used to encode graphics, blit,...
std::shared_ptr< CommandQueue > GetCommandQueue() const override
Return the graphics queue for submitting command buffers.
void RecordCmdBufferStart(const vk::CommandBuffer &buffer)
Record a timestamp query into the provided cmd buffer to record start time.
GPUProbe(const std::weak_ptr< GPUTracerVK > &tracer)
void RecordCmdBufferEnd(const vk::CommandBuffer &buffer)
Record a timestamp query into the provided cmd buffer to record end time.
void MarkFrameStart()
Signal the start of a frame workload.
void MarkFrameEnd()
Signal the end of a frame workload.
std::unique_ptr< GPUProbe > CreateGPUProbe()
Create a GPUProbe to trace the execution of a command buffer on the GPU.
void InitializeQueryPool(const ContextVK &context)
Initialize the set of query pools.
GPUTracerVK(std::weak_ptr< ContextVK > context, bool enable_gpu_tracing)
AtkStateType state
static const uint8_t buffer[]
GAsyncResult * result
#define FML_DCHECK(condition)
Definition logging.h:103
static constexpr uint32_t kPoolSize
Definition ref_ptr.h:256
#define FML_TRACE_COUNTER(category_group, name, counter_id, arg1,...)
Definition trace_event.h:85
#define VALIDATION_LOG
Definition validation.h:73