23 bool enable_gpu_tracing)
24 : context_(
std::move(context)) {
25 if (!enable_gpu_tracing) {
28 timestamp_period_ = context_.lock()
32 .limits.timestampPeriod;
33 if (timestamp_period_ <= 0) {
47 Lock lock(trace_state_mutex_);
51 for (
auto i = 0u;
i < kTraceStatesSize;
i++) {
52 vk::QueryPoolCreateInfo info;
54 info.queryType = vk::QueryType::eTimestamp;
56 auto [status, pool] = context.
GetDevice().createQueryPoolUnique(info);
57 if (status != vk::Result::eSuccess) {
61 trace_states_[
i].query_pool = std::move(pool);
63 trace_states_[
i].query_pool.get(), 0,
kPoolSize);
81 raster_thread_id_ = std::this_thread::get_id();
91 Lock lock(trace_state_mutex_);
92 current_state_ = (current_state_ + 1) % kTraceStatesSize;
94 auto& state = trace_states_[current_state_];
101 state.pending_buffers = 0;
102 state.current_index = 0;
106 return std::make_unique<GPUProbe>(weak_from_this());
109void GPUTracerVK::RecordCmdBufferStart(
const vk::CommandBuffer& buffer,
111 if (!enabled_ || std::this_thread::get_id() != raster_thread_id_ ||
115 Lock lock(trace_state_mutex_);
116 auto& state = trace_states_[current_state_];
119 if (!states_to_reset_.empty()) {
120 for (
auto i = 0u;
i < states_to_reset_.size();
i++) {
121 buffer.resetQueryPool(trace_states_[states_to_reset_[
i]].query_pool.get(),
124 states_to_reset_.clear();
134 buffer.writeTimestamp(vk::PipelineStageFlagBits::eTopOfPipe,
135 trace_states_[current_state_].query_pool.get(),
136 state.current_index);
137 state.current_index += 1;
138 probe.index_ = current_state_;
139 state.pending_buffers += 1;
142void GPUTracerVK::RecordCmdBufferEnd(
const vk::CommandBuffer& buffer,
144 if (!enabled_ || std::this_thread::get_id() != raster_thread_id_ ||
145 !in_frame_ || !probe.index_.has_value()) {
148 Lock lock(trace_state_mutex_);
149 GPUTraceState& state = trace_states_[current_state_];
155 buffer.writeTimestamp(vk::PipelineStageFlagBits::eBottomOfPipe,
156 state.query_pool.get(), state.current_index);
158 state.current_index += 1;
161void GPUTracerVK::OnFenceComplete(
size_t frame_index) {
167 size_t query_count = 0;
170 Lock lock(trace_state_mutex_);
171 GPUTraceState& state = trace_states_[frame_index];
174 state.pending_buffers -= 1;
175 pending = state.pending_buffers;
176 query_count = state.current_index;
177 pool = state.query_pool.get();
181 std::vector<uint64_t> bits(query_count);
182 std::shared_ptr<ContextVK> context = context_.lock();
187 auto result = context->GetDevice().getQueryPoolResults(
188 pool, 0, query_count, query_count *
sizeof(uint64_t), bits.data(),
189 sizeof(uint64_t), vk::QueryResultFlagBits::e64);
198 if (result == vk::Result::eSuccess) {
199 uint64_t smallest_timestamp = std::numeric_limits<uint64_t>::max();
200 uint64_t largest_timestamp = 0;
201 for (
auto i = 0u;
i < bits.size();
i++) {
202 smallest_timestamp = std::min(smallest_timestamp, bits[
i]);
203 largest_timestamp = std::max(largest_timestamp, bits[
i]);
206 (((largest_timestamp - smallest_timestamp) * timestamp_period_) /
209 reinterpret_cast<int64_t
>(
this),
210 "FrameTimeMS", gpu_ms);
214 Lock lock(trace_state_mutex_);
215 states_to_reset_.push_back(frame_index);
223 if (!index_.has_value()) {
226 auto tracer = tracer_.lock();
230 tracer->OnFenceComplete(index_.value());
234 auto tracer = tracer_.lock();
238 tracer->RecordCmdBufferStart(buffer, *
this);
242 auto tracer = tracer_.lock();
246 tracer->RecordCmdBufferEnd(buffer, *
this);
static CommandBufferVK & Cast(CommandBuffer &base)
vk::CommandBuffer GetCommandBuffer() const
Retrieve the native command buffer from this object.
const vk::Device & GetDevice() const
std::shared_ptr< CommandBuffer > CreateCommandBuffer() const override
Create a new command buffer. Command buffers can be used to encode graphics, blit,...
std::shared_ptr< CommandQueue > GetCommandQueue() const override
Return the graphics queue for submitting command buffers.
void RecordCmdBufferStart(const vk::CommandBuffer &buffer)
Record a timestamp query into the provided cmd buffer to record start time.
GPUProbe(const std::weak_ptr< GPUTracerVK > &tracer)
void RecordCmdBufferEnd(const vk::CommandBuffer &buffer)
Record a timestamp query into the provided cmd buffer to record end time.
void MarkFrameStart()
Signal the start of a frame workload.
void MarkFrameEnd()
Signal the end of a frame workload.
std::unique_ptr< GPUProbe > CreateGPUProbe()
Create a GPUProbe to trace the execution of a command buffer on the GPU.
void InitializeQueryPool(const ContextVK &context)
Initialize the set of query pools.
GPUTracerVK(std::weak_ptr< ContextVK > context, bool enable_gpu_tracing)
#define FML_DCHECK(condition)
DEF_SWITCHES_START aot vmservice shared library Name of the *so containing AOT compiled Dart assets for launching the service isolate vm snapshot The VM snapshot data that will be memory mapped as read only SnapshotAssetPath must be present isolate snapshot The isolate snapshot data that will be memory mapped as read only SnapshotAssetPath must be present cache dir Path to the cache directory This is different from the persistent_cache_path in embedder which is used for Skia shader cache icu native lib Path to the library file that exports the ICU data vm service The hostname IP address on which the Dart VM Service should be served If not defaults to or::depending on whether ipv6 is specified disable vm Disable the Dart VM Service The Dart VM Service is never available in release mode Bind to the IPv6 localhost address for the Dart VM Service Ignored if vm service host is set profile Make the profiler discard new samples once the profiler sample buffer is full When this flag is not the profiler sample buffer is used as a ring buffer
static constexpr uint32_t kPoolSize
#define FML_TRACE_COUNTER(category_group, name, counter_id, arg1,...)