22ADDR2LINE_RECYCLE_LIMIT = 4000
26 """An uber-fast (multiprocessing, pipelined and asynchronous) ELF symbolizer.
28 This class is
a frontend
for addr2line (part of GNU binutils), designed to
29 symbolize batches of large numbers of symbols
for a given ELF file. It
30 supports sharding symbolization against many addr2line instances
and
31 pipelining of multiple requests per each instance (
in order to hide addr2line
32 internals
and OS pipe latencies).
34 The interface exhibited by this
class is
a very simple asynchronous interface,
35 which
is based on the following three methods:
36 -
SymbolizeAsync(): used to request (enqueue) resolution of a given address.
37 - The |callback| method: used to communicated back the symbol information.
38 -
Join(): called to conclude the batch to gather the last outstanding results.
39 In essence, before the Join method returns, this
class will have issued as
40 many callbacks
as the number of
SymbolizeAsync() calls. In this regard, note
41 that due to multiprocess sharding, callbacks can be delivered out of order.
43 Some background about addr2line:
44 - it
is invoked passing the elf path
in the cmdline, piping the addresses
in
45 its stdin
and getting results on its stdout.
46 - it has pretty large response times
for the first requests, but it
47 works very well
in streaming mode once it has been warmed up.
48 - it doesn
't scale by itself (on more cores). However, spawning multiple
49 instances at the same time on the same file is pretty efficient
as they
50 keep hitting the pagecache
and become mostly CPU bound.
51 - it might hang
or crash, mostly
for OOM. This
class deals with both of these
54 Despite the
"scary" imports
and the multi* words above, (almost) no multi-
55 threading/processing
is involved
from the python viewpoint. Concurrency
56 here
is achieved by spawning several addr2line subprocesses
and handling their
57 output pipes asynchronously. Therefore, all the code here (
with the exception
58 of the Queue instance
in Addr2Line) should be free
from mind-blowing
59 thread-safety concerns.
61 The multiprocess sharding works
as follows:
62 The symbolizer tries to use the lowest number of addr2line instances
as
63 possible (
with respect of |max_concurrent_jobs|)
and enqueue all the requests
64 in a single addr2line instance. For few symbols (i.e. dozens) sharding isn
't
65 worth the startup cost.
66 The multiprocess logic kicks in as soon
as the queues
for the existing
67 instances grow. Specifically, once all the existing instances reach the
68 |max_queue_size| bound, a new addr2line instance
is kicked
in.
69 In the case of a very eager producer (i.e. all |max_concurrent_jobs| instances
70 have a backlog of |max_queue_size|), back-pressure
is applied on the caller by
71 blocking the SymbolizeAsync method.
73 This module has been deliberately designed to be dependency free (w.r.t. of
74 other modules
in this project), to allow easy reuse
in external projects.
82 max_concurrent_jobs=None,
85 source_root_path=None,
86 strip_base_path=None):
88 elf_file_path: path of the elf file to be symbolized.
89 addr2line_path: path of the toolchain's addr2line binary.
90 callback: a callback which will be invoked for each resolved symbol
with
91 the two args (sym_info, callback_arg). The former
is an instance of
92 |ELFSymbolInfo|
and contains the symbol information. The latter
is an
94 inlines: when
True, the ELFSymbolInfo will contain also the details about
95 the outer inlining functions. When
False, only the innermost function
97 max_concurrent_jobs: Max number of addr2line instances spawned.
98 Parallelize responsibly, addr2line
is a memory
and I/O monster.
99 max_queue_size: Max number of outstanding requests per addr2line instance.
100 addr2line_timeout: Max time (
in seconds) to wait
for a addr2line response.
101 After the timeout, the instance will be considered hung
and respawned.
102 source_root_path: In some toolchains only the name of the source file
is
103 is output, without any path information; disambiguation searches
104 through the source directory specified by |source_root_path| argument
105 for files whose name matches, adding the full path information to the
106 output. For example,
if the toolchain outputs
"unicode.cc" and there
107 is a file called
"unicode.cc" located under |source_root_path|/foo,
108 the tool will replace
"unicode.cc" with
109 "|source_root_path|/foo/unicode.cc". If there are multiple files
with
110 the same name, disambiguation will fail because the tool cannot
111 determine which of the files was the source of the symbol.
112 strip_base_path: Rebases the symbols source paths onto |source_root_path|
113 (i.e replace |strip_base_path|
with |source_root_path).
115 assert (os.path.isfile(addr2line_path)),
'Cannot find ' + addr2line_path
121 min(multiprocessing.cpu_count(), 4))
140 """Requests symbolization of a given address.
142 This method is not guaranteed to
return immediately. It generally does, but
143 in some scenarios (e.g. all addr2line instances have full queues) it can
144 block to create back-pressure.
147 addr: address to symbolize.
148 callback_arg: optional argument which will be passed to the |callback|.
"""
149 assert (isinstance(addr, int))
154 a2l_to_purge.ProcessAllResolvedSymbolsInQueue()
155 a2l_to_purge.RecycleIfNecessary()
165 def _SortByQueueSizeAndReqID(a2l):
166 return (a2l.queue_size, a2l.first_request_id)
177 a2l.WaitForNextSymbolInQueue()
179 a2l.EnqueueRequest(addr, callback_arg)
182 """Waits for all the outstanding requests to complete and terminates."""
187 def _CreateNewA2LInstance(self):
193 def _CreateDisambiguationTable(self):
194 """ Non-unique file names will result in None entries"""
195 start_time = time.time()
196 logging.info(
'Collecting information about available source files...')
204 'Finished collecting information about '
205 'possible files (took %.1f s).', (time.time() - start_time))
208 """A python wrapper around an addr2line instance.
210 The communication with the addr2line process looks
as follows:
211 [STDIN] [STDOUT] (
from addr2line
's viewpoint)
214 < Symbol::Name(foo, bar) for f001111
215 < /path/to/source/file.c:line_number
217 < Symbol::Name2()
for f002222
218 < /path/to/source/file.c:line_number
219 < Symbol::Name3()
for f003333
220 < /path/to/source/file.c:line_number
223 SYM_ADDR_RE = re.compile(r'([^:]+):(\?|\d+).*')
227 self.
_lib_file_name = posixpath.basename(symbolizer.elf_file_path)
248 """Pushes an address to addr2line's stdin (and keeps track of it)."""
256 """Waits until all the pending requests have been symbolized."""
261 """Waits for the next pending request to be symbolized."""
267 start_time = datetime.datetime.now()
268 timeout = datetime.timedelta(
272 while (datetime.datetime.now() - start_time < timeout):
274 if self.
_proc.poll():
276 'addr2line crashed, respawning (lib: %s).' %
295 logging.warning(
'Hung addr2line process, respawning (lib: %s).'
300 """Consumes all the addr2line output lines produced (without blocking)."""
311 """Restarts the process if it has been used for too long.
313 A long running addr2line process will consume excessive amounts
314 of memory without any gain in performance.
"""
319 """Kills the underlying addr2line process.
321 The poller |_thread| will terminate as well due to the broken pipe.
"""
324 self.
_proc.communicate(
330 def _WriteToA2lStdin(self, addr):
331 self.
_proc.stdin.write((
'%s\n' % hex(addr)).
encode())
335 self.
_proc.stdin.write(
'\n')
336 self.
_proc.stdin.flush()
338 def _ProcessSymbolOutput(self, lines):
339 """Parses an addr2line symbol output and triggers the client callback."""
343 innermost_sym_info =
None
345 for (line1, line2)
in lines:
346 prev_sym_info = sym_info
347 name = line1
if not line1.startswith(
'?')
else None
350 m = ELFSymbolizer.Addr2Line.SYM_ADDR_RE.match(line2)
352 if not m.group(1).startswith(
'?'):
353 source_path = m.group(1)
354 if not m.group(2).startswith(
'?'):
355 source_line =
int(m.group(2))
358 'Got invalid symbol path from addr2line: %s' % line2)
361 was_ambiguous =
False
362 disambiguated =
False
364 if source_path
and not posixpath.isabs(source_path):
368 disambiguated = path
is not None
369 source_path = path
if disambiguated
else source_path
373 if source_path
and not was_ambiguous:
374 source_path = os.path.abspath(source_path)
376 if source_path
and self.
_symbolizer.strip_base_path:
378 source_path = re.sub(
380 self.
_symbolizer.source_root_path
or '', source_path)
383 was_ambiguous, disambiguated)
385 prev_sym_info.inlined_by = sym_info
386 if not innermost_sym_info:
387 innermost_sym_info = sym_info
392 def _RestartAddr2LineProcess(self):
404 self.
_symbolizer.addr2line_path,
'--functions',
'--demangle',
409 self.
_proc = subprocess.Popen(
411 stdout=subprocess.PIPE,
412 stdin=subprocess.PIPE,
418 self.
_thread = threading.Thread(
419 target=ELFSymbolizer.Addr2Line.StdoutReaderThread,
434 """The poller thread fn, which moves the addr2line stdout to the |queue|.
436 This is the only piece of code
not running on the main thread. It merely
437 writes to a Queue, which
is thread-safe. In the case of inlines, it
438 detects the ??,??:0 marker
and sends the lines atomically, such that the
439 main thread always receives all the lines corresponding to one symbol
in
442 lines_for_one_symbol = []
444 line1 = process_pipe.readline().
decode().rstrip(
'\r\n')
445 line2 = process_pipe.readline().
decode().rstrip(
'\r\n')
446 if not line1
or not line2:
448 inline_has_more_lines = inlines
and (
449 len(lines_for_one_symbol) == 0
or
450 (line1 !=
'??' and line2 !=
'??:0'))
451 if not inlines
or inline_has_more_lines:
452 lines_for_one_symbol += [(line1, line2)]
453 if inline_has_more_lines:
455 queue.put(lines_for_one_symbol)
456 lines_for_one_symbol = []
460 except (IOError, OSError):
465 """Returns the request_id of the oldest pending request in the queue."""
470 """The result of the symbolization passed as first arg. of each callback."""
477 disambiguated=False):
478 """All the fields here can be None (if addr2line replies with '??')."""
for(const auto glyph :glyphs)
static void encode(uint8_t output[16], const uint32_t input[4])
def __init__(self, name, source_path, source_line, was_ambiguous=False, disambiguated=False)
def _RestartAddr2LineProcess(self)
def __init__(self, symbolizer)
def StdoutReaderThread(process_pipe, queue, inlines)
def EnqueueRequest(self, addr, callback_arg)
def _ProcessSymbolOutput(self, lines)
def WaitForNextSymbolInQueue(self)
def first_request_id(self)
def ProcessAllResolvedSymbolsInQueue(self)
def RecycleIfNecessary(self)
def _WriteToA2lStdin(self, addr)
def _CreateNewA2LInstance(self)
def SymbolizeAsync(self, addr, callback_arg=None)
def __init__(self, elf_file_path, addr2line_path, callback, inlines=False, max_concurrent_jobs=None, addr2line_timeout=30, max_queue_size=50, source_root_path=None, strip_base_path=None)
def _CreateDisambiguationTable(self)
static void append(char **dst, size_t *count, const char *src, size_t n)
static float min(float r, float g, float b)
const myers::Point & get(const myers::Segment &)
static DecodeResult decode(std::string path)