Flutter Engine
The Flutter Engine
Loading...
Searching...
No Matches
elf_symbolizer.py
Go to the documentation of this file.
1# Copyright 2014 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import collections
6import datetime
7import logging
8import multiprocessing
9import os
10import posixpath
11import queue
12import re
13import subprocess
14import sys
15import threading
16import time
17
18# addr2line builds a possibly infinite memory cache that can exhaust
19# the computer's memory if allowed to grow for too long. This constant
20# controls how many lookups we do before restarting the process. 4000
21# gives near peak performance without extreme memory usage.
22ADDR2LINE_RECYCLE_LIMIT = 4000
23
24
25class ELFSymbolizer(object):
26 """An uber-fast (multiprocessing, pipelined and asynchronous) ELF symbolizer.
27
28 This class is a frontend for addr2line (part of GNU binutils), designed to
29 symbolize batches of large numbers of symbols for a given ELF file. It
30 supports sharding symbolization against many addr2line instances and
31 pipelining of multiple requests per each instance (in order to hide addr2line
32 internals and OS pipe latencies).
33
34 The interface exhibited by this class is a very simple asynchronous interface,
35 which is based on the following three methods:
36 - SymbolizeAsync(): used to request (enqueue) resolution of a given address.
37 - The |callback| method: used to communicated back the symbol information.
38 - Join(): called to conclude the batch to gather the last outstanding results.
39 In essence, before the Join method returns, this class will have issued as
40 many callbacks as the number of SymbolizeAsync() calls. In this regard, note
41 that due to multiprocess sharding, callbacks can be delivered out of order.
42
43 Some background about addr2line:
44 - it is invoked passing the elf path in the cmdline, piping the addresses in
45 its stdin and getting results on its stdout.
46 - it has pretty large response times for the first requests, but it
47 works very well in streaming mode once it has been warmed up.
48 - it doesn't scale by itself (on more cores). However, spawning multiple
49 instances at the same time on the same file is pretty efficient as they
50 keep hitting the pagecache and become mostly CPU bound.
51 - it might hang or crash, mostly for OOM. This class deals with both of these
52 problems.
53
54 Despite the "scary" imports and the multi* words above, (almost) no multi-
55 threading/processing is involved from the python viewpoint. Concurrency
56 here is achieved by spawning several addr2line subprocesses and handling their
57 output pipes asynchronously. Therefore, all the code here (with the exception
58 of the Queue instance in Addr2Line) should be free from mind-blowing
59 thread-safety concerns.
60
61 The multiprocess sharding works as follows:
62 The symbolizer tries to use the lowest number of addr2line instances as
63 possible (with respect of |max_concurrent_jobs|) and enqueue all the requests
64 in a single addr2line instance. For few symbols (i.e. dozens) sharding isn't
65 worth the startup cost.
66 The multiprocess logic kicks in as soon as the queues for the existing
67 instances grow. Specifically, once all the existing instances reach the
68 |max_queue_size| bound, a new addr2line instance is kicked in.
69 In the case of a very eager producer (i.e. all |max_concurrent_jobs| instances
70 have a backlog of |max_queue_size|), back-pressure is applied on the caller by
71 blocking the SymbolizeAsync method.
72
73 This module has been deliberately designed to be dependency free (w.r.t. of
74 other modules in this project), to allow easy reuse in external projects.
75 """
76
77 def __init__(self,
78 elf_file_path,
79 addr2line_path,
80 callback,
81 inlines=False,
82 max_concurrent_jobs=None,
83 addr2line_timeout=30,
84 max_queue_size=50,
85 source_root_path=None,
86 strip_base_path=None):
87 """Args:
88 elf_file_path: path of the elf file to be symbolized.
89 addr2line_path: path of the toolchain's addr2line binary.
90 callback: a callback which will be invoked for each resolved symbol with
91 the two args (sym_info, callback_arg). The former is an instance of
92 |ELFSymbolInfo| and contains the symbol information. The latter is an
93 embedder-provided argument which is passed to SymbolizeAsync().
94 inlines: when True, the ELFSymbolInfo will contain also the details about
95 the outer inlining functions. When False, only the innermost function
96 will be provided.
97 max_concurrent_jobs: Max number of addr2line instances spawned.
98 Parallelize responsibly, addr2line is a memory and I/O monster.
99 max_queue_size: Max number of outstanding requests per addr2line instance.
100 addr2line_timeout: Max time (in seconds) to wait for a addr2line response.
101 After the timeout, the instance will be considered hung and respawned.
102 source_root_path: In some toolchains only the name of the source file is
103 is output, without any path information; disambiguation searches
104 through the source directory specified by |source_root_path| argument
105 for files whose name matches, adding the full path information to the
106 output. For example, if the toolchain outputs "unicode.cc" and there
107 is a file called "unicode.cc" located under |source_root_path|/foo,
108 the tool will replace "unicode.cc" with
109 "|source_root_path|/foo/unicode.cc". If there are multiple files with
110 the same name, disambiguation will fail because the tool cannot
111 determine which of the files was the source of the symbol.
112 strip_base_path: Rebases the symbols source paths onto |source_root_path|
113 (i.e replace |strip_base_path| with |source_root_path).
114 """
115 assert (os.path.isfile(addr2line_path)), 'Cannot find ' + addr2line_path
116 self.elf_file_path = elf_file_path
117 self.addr2line_path = addr2line_path
118 self.callback = callback
119 self.inlines = inlines
120 self.max_concurrent_jobs = (max_concurrent_jobs or
121 min(multiprocessing.cpu_count(), 4))
122 self.max_queue_size = max_queue_size
123 self.addr2line_timeout = addr2line_timeout
124 self.requests_counter = 0 # For generating monotonic request IDs.
125 self._a2l_instances = [] # Up to |max_concurrent_jobs| _Addr2Line inst.
126
127 # If necessary, create disambiguation lookup table
128 self.disambiguate = source_root_path is not None
130 self.strip_base_path = strip_base_path
131 if (self.disambiguate):
132 self.source_root_path = os.path.abspath(source_root_path)
134
135 # Create one addr2line instance. More instances will be created on demand
136 # (up to |max_concurrent_jobs|) depending on the rate of the requests.
138
139 def SymbolizeAsync(self, addr, callback_arg=None):
140 """Requests symbolization of a given address.
141
142 This method is not guaranteed to return immediately. It generally does, but
143 in some scenarios (e.g. all addr2line instances have full queues) it can
144 block to create back-pressure.
145
146 Args:
147 addr: address to symbolize.
148 callback_arg: optional argument which will be passed to the |callback|."""
149 assert (isinstance(addr, int))
150
151 # Process all the symbols that have been resolved in the meanwhile.
152 # Essentially, this drains all the addr2line(s) out queues.
153 for a2l_to_purge in self._a2l_instances:
154 a2l_to_purge.ProcessAllResolvedSymbolsInQueue()
155 a2l_to_purge.RecycleIfNecessary()
156
157 # Find the best instance according to this logic:
158 # 1. Find an existing instance with the shortest queue.
159 # 2. If all of instances' queues are full, but there is room in the pool,
160 # (i.e. < |max_concurrent_jobs|) create a new instance.
161 # 3. If there were already |max_concurrent_jobs| instances and all of them
162 # had full queues, make back-pressure.
163
164 # 1.
165 def _SortByQueueSizeAndReqID(a2l):
166 return (a2l.queue_size, a2l.first_request_id)
167
168 a2l = min(self._a2l_instances, key=_SortByQueueSizeAndReqID)
169
170 # 2.
171 if (a2l.queue_size >= self.max_queue_size and
172 len(self._a2l_instances) < self.max_concurrent_jobs):
173 a2l = self._CreateNewA2LInstance()
174
175 # 3.
176 if a2l.queue_size >= self.max_queue_size:
177 a2l.WaitForNextSymbolInQueue()
178
179 a2l.EnqueueRequest(addr, callback_arg)
180
181 def Join(self):
182 """Waits for all the outstanding requests to complete and terminates."""
183 for a2l in self._a2l_instances:
184 a2l.WaitForIdle()
185 a2l.Terminate()
186
188 assert (len(self._a2l_instances) < self.max_concurrent_jobs)
189 a2l = ELFSymbolizer.Addr2Line(self)
190 self._a2l_instances.append(a2l)
191 return a2l
192
194 """ Non-unique file names will result in None entries"""
195 start_time = time.time()
196 logging.info('Collecting information about available source files...')
197 self.disambiguation_table = {}
198
199 for root, _, filenames in os.walk(self.source_root_path):
200 for f in filenames:
201 self.disambiguation_table[f] = os.path.join(
202 root, f) if (f not in self.disambiguation_table) else None
203 logging.info(
204 'Finished collecting information about '
205 'possible files (took %.1f s).', (time.time() - start_time))
206
207 class Addr2Line(object):
208 """A python wrapper around an addr2line instance.
209
210 The communication with the addr2line process looks as follows:
211 [STDIN] [STDOUT] (from addr2line's viewpoint)
212 > f001111
213 > f002222
214 < Symbol::Name(foo, bar) for f001111
215 < /path/to/source/file.c:line_number
216 > f003333
217 < Symbol::Name2() for f002222
218 < /path/to/source/file.c:line_number
219 < Symbol::Name3() for f003333
220 < /path/to/source/file.c:line_number
221 """
222
223 SYM_ADDR_RE = re.compile(r'([^:]+):(\?|\d+).*')
224
225 def __init__(self, symbolizer):
226 self._symbolizer = symbolizer
227 self._lib_file_name = posixpath.basename(symbolizer.elf_file_path)
228
229 # The request queue (i.e. addresses pushed to addr2line's stdin and not
230 # yet retrieved on stdout)
231 self._request_queue = collections.deque()
232
233 # This is essentially len(self._request_queue). It has been optimized to a
234 # separate field because turned out to be a perf hot-spot.
235 self.queue_size = 0
236
237 # Keep track of the number of symbols a process has processed to
238 # avoid a single process growing too big and using all the memory.
240
241 # Objects required to handle the addr2line subprocess.
242 self._proc = None # Subprocess.Popen(...) instance.
243 self._thread = None # Threading.thread instance.
244 self._out_queue = None # queue.Queue instance (for buffering a2l stdout).
246
247 def EnqueueRequest(self, addr, callback_arg):
248 """Pushes an address to addr2line's stdin (and keeps track of it)."""
249 self._symbolizer.requests_counter += 1 # For global "age" of requests.
250 req_idx = self._symbolizer.requests_counter
251 self._request_queue.append((addr, callback_arg, req_idx))
252 self.queue_size += 1
253 self._WriteToA2lStdin(addr)
254
255 def WaitForIdle(self):
256 """Waits until all the pending requests have been symbolized."""
257 while self.queue_size > 0:
259
261 """Waits for the next pending request to be symbolized."""
262 if not self.queue_size:
263 return
264
265 # This outer loop guards against a2l hanging (detecting stdout timeout).
266 while True:
267 start_time = datetime.datetime.now()
268 timeout = datetime.timedelta(
269 seconds=self._symbolizer.addr2line_timeout)
270
271 # The inner loop guards against a2l crashing (checking if it exited).
272 while (datetime.datetime.now() - start_time < timeout):
273 # poll() returns !None if the process exited. a2l should never exit.
274 if self._proc.poll():
275 logging.warning(
276 'addr2line crashed, respawning (lib: %s).' %
277 self._lib_file_name)
279 # TODO(primiano): the best thing to do in this case would be
280 # shrinking the pool size as, very likely, addr2line is crashed
281 # due to low memory (and the respawned one will die again soon).
282
283 try:
284 lines = self._out_queue.get(block=True, timeout=0.25)
285 except queue.Empty:
286 # On timeout (1/4 s.) repeat the inner loop and check if either the
287 # addr2line process did crash or we waited its output for too long.
288 continue
289
290 # In nominal conditions, we get straight to this point.
291 self._ProcessSymbolOutput(lines)
292 return
293
294 # If this point is reached, we waited more than |addr2line_timeout|.
295 logging.warning('Hung addr2line process, respawning (lib: %s).'
296 % self._lib_file_name)
298
300 """Consumes all the addr2line output lines produced (without blocking)."""
301 if not self.queue_size:
302 return
303 while True:
304 try:
305 lines = self._out_queue.get_nowait()
306 except queue.Empty:
307 break
308 self._ProcessSymbolOutput(lines)
309
311 """Restarts the process if it has been used for too long.
312
313 A long running addr2line process will consume excessive amounts
314 of memory without any gain in performance."""
315 if self._processed_symbols_count >= ADDR2LINE_RECYCLE_LIMIT:
317
318 def Terminate(self):
319 """Kills the underlying addr2line process.
320
321 The poller |_thread| will terminate as well due to the broken pipe."""
322 try:
323 self._proc.kill()
324 self._proc.communicate(
325 ) # Essentially wait() without risking deadlock.
326 except Exception: # An exception while terminating? How interesting.
327 pass
328 self._proc = None
329
330 def _WriteToA2lStdin(self, addr):
331 self._proc.stdin.write(('%s\n' % hex(addr)).encode())
332 if self._symbolizer.inlines:
333 # In the case of inlines we output an extra blank line, which causes
334 # addr2line to emit a (??,??:0) tuple that we use as a boundary marker.
335 self._proc.stdin.write('\n')
336 self._proc.stdin.flush()
337
338 def _ProcessSymbolOutput(self, lines):
339 """Parses an addr2line symbol output and triggers the client callback."""
340 (_, callback_arg, _) = self._request_queue.popleft()
341 self.queue_size -= 1
342
343 innermost_sym_info = None
344 sym_info = None
345 for (line1, line2) in lines:
346 prev_sym_info = sym_info
347 name = line1 if not line1.startswith('?') else None
348 source_path = None
349 source_line = None
350 m = ELFSymbolizer.Addr2Line.SYM_ADDR_RE.match(line2)
351 if m:
352 if not m.group(1).startswith('?'):
353 source_path = m.group(1)
354 if not m.group(2).startswith('?'):
355 source_line = int(m.group(2))
356 else:
357 logging.warning(
358 'Got invalid symbol path from addr2line: %s' % line2)
359
360 # In case disambiguation is on, and needed
361 was_ambiguous = False
362 disambiguated = False
363 if self._symbolizer.disambiguate:
364 if source_path and not posixpath.isabs(source_path):
365 path = self._symbolizer.disambiguation_table.get(
366 source_path)
367 was_ambiguous = True
368 disambiguated = path is not None
369 source_path = path if disambiguated else source_path
370
371 # Use absolute paths (so that paths are consistent, as disambiguation
372 # uses absolute paths)
373 if source_path and not was_ambiguous:
374 source_path = os.path.abspath(source_path)
375
376 if source_path and self._symbolizer.strip_base_path:
377 # Strip the base path
378 source_path = re.sub(
379 '^' + self._symbolizer.strip_base_path,
380 self._symbolizer.source_root_path or '', source_path)
381
382 sym_info = ELFSymbolInfo(name, source_path, source_line,
383 was_ambiguous, disambiguated)
384 if prev_sym_info:
385 prev_sym_info.inlined_by = sym_info
386 if not innermost_sym_info:
387 innermost_sym_info = sym_info
388
390 self._symbolizer.callback(innermost_sym_info, callback_arg)
391
393 if self._proc:
394 self.Terminate()
395
396 # The only reason of existence of this Queue (and the corresponding
397 # Thread below) is the lack of a subprocess.stdout.poll_avail_lines().
398 # Essentially this is a pipe able to extract a couple of lines atomically.
399 self._out_queue = queue.Queue()
400
401 # Start the underlying addr2line process in line buffered mode.
402
403 cmd = [
404 self._symbolizer.addr2line_path, '--functions', '--demangle',
405 '--exe=' + self._symbolizer.elf_file_path
406 ]
407 if self._symbolizer.inlines:
408 cmd += ['--inlines']
409 self._proc = subprocess.Popen(
410 cmd,
411 stdout=subprocess.PIPE,
412 stdin=subprocess.PIPE,
413 stderr=sys.stderr,
414 close_fds=True)
415
416 # Start the poller thread, which simply moves atomically the lines read
417 # from the addr2line's stdout to the |_out_queue|.
418 self._thread = threading.Thread(
419 target=ELFSymbolizer.Addr2Line.StdoutReaderThread,
420 args=(self._proc.stdout, self._out_queue,
421 self._symbolizer.inlines))
422 self._thread.daemon = True # Don't prevent early process exit.
423 self._thread.start()
424
426
427 # Replay the pending requests on the new process (only for the case
428 # of a hung addr2line timing out during the game).
429 for (addr, _, _) in self._request_queue:
430 self._WriteToA2lStdin(addr)
431
432 @staticmethod
433 def StdoutReaderThread(process_pipe, queue, inlines):
434 """The poller thread fn, which moves the addr2line stdout to the |queue|.
435
436 This is the only piece of code not running on the main thread. It merely
437 writes to a Queue, which is thread-safe. In the case of inlines, it
438 detects the ??,??:0 marker and sends the lines atomically, such that the
439 main thread always receives all the lines corresponding to one symbol in
440 one shot."""
441 try:
442 lines_for_one_symbol = []
443 while True:
444 line1 = process_pipe.readline().decode().rstrip('\r\n')
445 line2 = process_pipe.readline().decode().rstrip('\r\n')
446 if not line1 or not line2:
447 break
448 inline_has_more_lines = inlines and (
449 len(lines_for_one_symbol) == 0 or
450 (line1 != '??' and line2 != '??:0'))
451 if not inlines or inline_has_more_lines:
452 lines_for_one_symbol += [(line1, line2)]
453 if inline_has_more_lines:
454 continue
455 queue.put(lines_for_one_symbol)
456 lines_for_one_symbol = []
457 process_pipe.close()
458
459 # Every addr2line processes will die at some point, please die silently.
460 except (IOError, OSError):
461 pass
462
463 @property
465 """Returns the request_id of the oldest pending request in the queue."""
466 return self._request_queue[0][2] if self._request_queue else 0
467
468
469class ELFSymbolInfo(object):
470 """The result of the symbolization passed as first arg. of each callback."""
471
472 def __init__(self,
473 name,
474 source_path,
475 source_line,
476 was_ambiguous=False,
477 disambiguated=False):
478 """All the fields here can be None (if addr2line replies with '??')."""
479 self.name = name
480 self.source_path = source_path
481 self.source_line = source_line
482 # In the case of |inlines|=True, the |inlined_by| points to the outer
483 # function inlining the current one (and so on, to form a chain).
484 self.inlined_by = None
485 self.disambiguated = disambiguated
486 self.was_ambiguous = was_ambiguous
487
488 def __str__(self):
489 return '%s [%s:%d]' % (self.name or '??', self.source_path or '??',
490 self.source_line or 0)
static void encode(uint8_t output[16], const uint32_t input[4])
Definition SkMD5.cpp:240
Type::kYUV Type::kRGBA() int(0.7 *637)
__init__(self, name, source_path, source_line, was_ambiguous=False, disambiguated=False)
StdoutReaderThread(process_pipe, queue, inlines)
EnqueueRequest(self, addr, callback_arg)
__init__(self, elf_file_path, addr2line_path, callback, inlines=False, max_concurrent_jobs=None, addr2line_timeout=30, max_queue_size=50, source_root_path=None, strip_base_path=None)
SymbolizeAsync(self, addr, callback_arg=None)
static void append(char **dst, size_t *count, const char *src, size_t n)
Definition editor.cpp:211
static float min(float r, float g, float b)
Definition hsl.cpp:48
static DecodeResult decode(std::string path)