5"""Generate a spatial analysis against an arbitrary library.
7To use, build the 'binary_size_tool' target. Then run this tool, passing
8in the location of the library to be analyzed along with any other options
25import binary_size_utils
33NODE_CHILDREN_KEY =
'children'
34NODE_SYMBOL_TYPE_KEY =
't'
35NODE_SYMBOL_SIZE_KEY =
'value'
36NODE_MAX_DEPTH_KEY =
'maxDepth'
37NODE_LAST_PATH_ELEMENT_KEY =
'lastPathElement'
40NAME_NO_PATH_BUCKET =
'(No Path)'
44BIG_BUCKET_LIMIT = 3000
47def _MkChild(node, name):
48 child = node[NODE_CHILDREN_KEY].
get(name)
50 child = {NODE_NAME_KEY: name, NODE_CHILDREN_KEY: {}}
51 node[NODE_CHILDREN_KEY][name] = child
56 """NAME_NO_PATH_BUCKET can be too large for the graphing lib to
57 handle. Split it into sub-buckets in that case.
"""
58 root_children = node[NODE_CHILDREN_KEY]
59 if NAME_NO_PATH_BUCKET
in root_children:
60 no_path_bucket = root_children[NAME_NO_PATH_BUCKET]
61 old_children = no_path_bucket[NODE_CHILDREN_KEY]
63 for symbol_type, symbol_bucket
in old_children.items():
64 count +=
len(symbol_bucket[NODE_CHILDREN_KEY])
65 if count > BIG_BUCKET_LIMIT:
67 no_path_bucket[NODE_CHILDREN_KEY] = new_children
70 for symbol_type, symbol_bucket
in old_children.items():
71 for symbol_name, value
in symbol_bucket[
72 NODE_CHILDREN_KEY].items():
73 if index % BIG_BUCKET_LIMIT == 0:
74 group_no = (index / BIG_BUCKET_LIMIT) + 1
75 current_bucket = _MkChild(
77 '%s subgroup %d' % (NAME_NO_PATH_BUCKET, group_no))
78 assert not NODE_TYPE_KEY
in node
or node[
80 node[NODE_TYPE_KEY] =
'p'
82 symbol_size = value[NODE_SYMBOL_SIZE_KEY]
84 symbol_name, symbol_size)
89 if NODE_CHILDREN_KEY
in node:
90 largest_list_len =
len(node[NODE_CHILDREN_KEY])
92 for child
in node[NODE_CHILDREN_KEY].
values():
94 if child_largest_list_len > largest_list_len:
95 largest_list_len = child_largest_list_len
96 child_list.append(child)
97 node[NODE_CHILDREN_KEY] = child_list
99 return largest_list_len
103 """Puts symbol into the file path node |node|.
104 Returns the number of added levels in tree. I.e. returns 2.
"""
107 node[NODE_LAST_PATH_ELEMENT_KEY] =
True
108 node = _MkChild(node, symbol_type)
109 assert not NODE_TYPE_KEY
in node
or node[NODE_TYPE_KEY] ==
'b'
110 node[NODE_SYMBOL_TYPE_KEY] = symbol_type
111 node[NODE_TYPE_KEY] =
'b'
114 node = _MkChild(node, symbol_name)
115 if NODE_CHILDREN_KEY
in node:
116 if node[NODE_CHILDREN_KEY]:
118 'A container node used as symbol for %s.' % symbol_name)
120 del node[NODE_CHILDREN_KEY]
121 node[NODE_SYMBOL_SIZE_KEY] = symbol_size
122 node[NODE_SYMBOL_TYPE_KEY] = symbol_type
123 node[NODE_TYPE_KEY] =
's'
131 NODE_CHILDREN_KEY: {},
133 NODE_MAX_DEPTH_KEY: 0
135 seen_symbol_with_path =
False
136 cwd = os.path.abspath(os.getcwd())
137 for symbol_name, symbol_type, symbol_size, file_path, _address
in symbols:
139 if 'vtable for ' in symbol_name:
142 if file_path
and file_path !=
"??":
143 file_path = os.path.abspath(
144 os.path.join(symbol_path_origin_dir, file_path))
148 if file_path.startswith(cwd + os.sep):
149 file_path = file_path[
len(cwd):]
150 if file_path.startswith(
'/'):
151 file_path = file_path[1:]
152 seen_symbol_with_path =
True
154 file_path = NAME_NO_PATH_BUCKET
156 path_parts = file_path.split(
'/')
161 while len(path_parts) > 0:
162 path_part = path_parts.pop(0)
163 if len(path_part) == 0:
166 node = _MkChild(node, path_part)
167 assert not NODE_TYPE_KEY
in node
or node[NODE_TYPE_KEY] ==
'p'
168 node[NODE_TYPE_KEY] =
'p'
172 result[NODE_MAX_DEPTH_KEY] =
max(result[NODE_MAX_DEPTH_KEY], depth)
174 if not seen_symbol_with_path:
175 logging.warning(
'Symbols lack paths. Data will not be structured.')
183 if largest_list_len > BIG_BUCKET_LIMIT:
184 logging.warning(
'There are sections with %d nodes. '
185 'Results might be unusable.' % largest_list_len)
191 with open(outfile,
'w')
as out:
192 out.write(
'var tree_data=')
194 json.dump(tree_root, out, separators=(
',',
':'))
195 print(
'Writing %d bytes json' % os.path.getsize(outfile))
200 for _sym, _symbol_type, size, path, _address
in symbols:
203 key = os.path.normpath(path)
206 if key
not in sources:
207 sources[key] = {
'path': path,
'symbol_count': 0,
'size': 0}
208 record = sources[key]
209 record[
'size'] += size
210 record[
'symbol_count'] += 1
228sNmPattern = re.compile(
229 r'([0-9a-f]{8,})[\s]+([0-9a-f]{8,})[\s]*(\S?)[\s*]([^\t]*)[\t]?(.*)')
245 disambiguate, src_path):
246 nm_output =
RunNm(library, nm_binary)
247 nm_output_lines = nm_output.splitlines()
248 nm_output_lines_len =
len(nm_output_lines)
252 def map_address_symbol(symbol, addr):
254 if addr
in address_symbol:
257 progress.collisions += 1
259 if symbol.disambiguated:
260 progress.disambiguations += 1
261 if symbol.was_ambiguous:
262 progress.was_ambiguous += 1
264 address_symbol[addr] = symbol
268 def progress_output():
270 if progress.count % progress_chunk == 0:
271 time_now = time.time()
272 time_spent = time_now - progress.time_last_output
275 progress.time_last_output = time_now
276 chunk_size = progress.count - progress.count_last_output
277 progress.count_last_output = progress.count
279 speed = chunk_size / time_spent
282 progress_percent = (100.0 * (
283 progress.count + progress.skip_count) / nm_output_lines_len)
284 disambiguation_percent = 0
285 if progress.disambiguations != 0:
286 disambiguation_percent = (100.0 * progress.disambiguations /
287 progress.was_ambiguous)
290 '\r%.1f%%: Looked up %d symbols (%d collisions, '
291 '%d disambiguations where %.1f%% succeeded)'
292 ' - %.1f lookups/s.' %
293 (progress_percent, progress.count, progress.collisions,
294 progress.disambiguations, disambiguation_percent, speed))
304 max_concurrent_jobs=jobs,
305 source_root_path=src_path)
306 user_interrupted =
False
308 for binary_line
in nm_output_lines:
309 line = binary_line.decode()
310 match = sNmPattern.match(line)
312 location = match.group(5)
314 addr =
int(match.group(1), 16)
315 size =
int(match.group(2), 16)
316 if addr
in address_symbol:
318 map_address_symbol(address_symbol[addr], addr)
322 print(
'Empty symbol: ' + line)
324 symbolizer.SymbolizeAsync(addr, addr)
327 progress.skip_count += 1
328 except KeyboardInterrupt:
329 user_interrupted =
True
330 print(
'Interrupting - killing subprocesses. Please wait.')
334 except KeyboardInterrupt:
336 user_interrupted =
True
337 print(
'Patience you must have my young padawan.')
342 print(
'Skipping the rest of the file mapping. '
343 'Output will not be fully classified.')
345 symbol_path_origin_dir = os.path.dirname(os.path.abspath(library))
347 with open(outfile,
'w')
as out:
348 for binary_line
in nm_output_lines:
349 line = binary_line.decode()
350 match = sNmPattern.match(line)
352 location = match.group(5)
354 addr =
int(match.group(1), 16)
355 symbol = address_symbol.get(addr)
356 if symbol
is not None:
358 if symbol.source_path
is not None:
359 path = os.path.abspath(
360 os.path.join(symbol_path_origin_dir,
363 if symbol.source_line
is not None:
364 line_number = symbol.source_line
365 out.write(
'%s\t%s:%d\n' % (line, path, line_number))
368 out.write(
'%s\n' % line)
370 print(
'%d symbols in the results.' %
len(address_symbol))
375 nm_binary,
'-C',
'--print-size',
'--size-sort',
'--reverse-sort', binary
377 nm_process = subprocess.Popen(
378 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
379 (process_output, err_output) = nm_process.communicate()
381 if nm_process.returncode != 0:
383 raise Exception(err_output)
385 raise Exception(process_output)
387 return process_output
390def GetNmSymbols(nm_infile, outfile, library, jobs, verbose, addr2line_binary,
391 nm_binary, disambiguate, src_path):
392 if nm_infile
is None:
394 outfile = tempfile.NamedTemporaryFile(delete=
False).name
397 print(
'Running parallel addr2line, dumping symbols to ' + outfile)
399 disambiguate, src_path)
404 print(
'Using nm input from ' + nm_infile)
405 with open(nm_infile,
'r')
as infile:
409PAK_RESOURCE_ID_TO_STRING = {
"inited":
False}
413 """Given a file name, it loads everything that looks like a resource id
414 into PAK_RESOURCE_ID_TO_STRING."""
415 with open(filename)
as resource_header:
416 for line
in resource_header:
417 if line.startswith(
"#define "):
418 line_data = line.split()
419 if len(line_data) == 3:
421 resource_number =
int(line_data[2])
422 resource_name = line_data[1]
423 PAK_RESOURCE_ID_TO_STRING[
424 resource_number] = resource_name
430 """Pak resources have a numeric identifier. It is not helpful when
431 trying to locate where footprint is generated. This does its best to
432 map the number to a usable string.
"""
433 if not PAK_RESOURCE_ID_TO_STRING[
'inited']:
438 PAK_RESOURCE_ID_TO_STRING[
'inited'] =
True
439 gen_dir = os.path.join(os.path.dirname(pak_file),
'gen')
440 if os.path.isdir(gen_dir):
441 for dirname, _dirs, files
in os.walk(gen_dir):
442 for filename
in files:
443 if filename.endswith(
'resources.h'):
445 os.path.join(dirname, filename))
446 return PAK_RESOURCE_ID_TO_STRING.get(resource_id,
447 'Pak Resource %d' % resource_id)
451 """Adds pseudo-symbols from a pak file."""
452 pak_file = os.path.abspath(pak_file)
453 with open(pak_file,
'rb')
as pak:
457 HEADER_LENGTH = 2 * 4 + 1
459 INDEX_ENTRY_SIZE = 2 + 4
460 version, num_entries, _encoding = struct.unpack(
'<IIB',
461 data[:HEADER_LENGTH])
462 assert version == PAK_FILE_VERSION, (
463 'Unsupported pak file '
464 'version (%d) in %s. Only '
465 'support version %d' % (version, pak_file, PAK_FILE_VERSION))
468 data = data[HEADER_LENGTH:]
469 for _
in range(num_entries):
470 resource_id, offset = struct.unpack(
'<HI', data[:INDEX_ENTRY_SIZE])
471 data = data[INDEX_ENTRY_SIZE:]
472 _next_id, next_offset = struct.unpack(
'<HI',
473 data[:INDEX_ENTRY_SIZE])
474 resource_size = next_offset - offset
477 symbol_path = pak_file
479 symbol_size = resource_size
480 symbols.append((symbol_name, symbol_type, symbol_size, symbol_path))
483def _find_in_system_path(binary):
484 """Locate the full path to binary in the system path or return None
486 system_path = os.environ["PATH"].split(os.pathsep)
487 for path
in system_path:
488 binary_path = os.path.join(path, binary)
489 if os.path.isfile(binary_path):
495 """Kills the program if debug data is in an unsupported format.
497 There are two common versions of the DWARF debug formats and
498 since we are right now transitioning
from DWARF2 to newer formats,
499 it
's possible to have a mix of tools that are not compatible. Detect
500 that and abort rather than produce meaningless output.
"""
501 tool_output = subprocess.check_output([addr2line_binary,
503 version_re = re.compile(
r'^GNU [^ ]+ .* (\d+).(\d+).*?$', re.M)
504 parsed_output = version_re.match(tool_output)
505 major =
int(parsed_output.group(1))
506 minor =
int(parsed_output.group(2))
507 supports_dwarf4 = major > 2
or major == 2
and minor > 22
512 print(
'Checking version of debug information in %s.' % library)
513 debug_info = subprocess.check_output(
514 [
'readelf',
'--debug-dump=info',
'--dwarf-depth=1', library])
515 dwarf_version_re = re.compile(
r'^\s+Version:\s+(\d+)$', re.M)
516 parsed_dwarf_format_output = dwarf_version_re.search(debug_info)
517 version =
int(parsed_dwarf_format_output.group(1))
520 'The supplied tools only support DWARF2 debug data but the binary\n'
521 +
'uses DWARF%d. Update the tools or compile the binary\n' % version
527 usage =
"""%prog [options]
529 Runs a spatial analysis on a given library, looking up the source locations
530 of its symbols and calculating how much space each directory, source file,
531 and so on
is taking. The result
is a report that can be used to pinpoint
532 sources of large portions of the binary, etceteras.
534 Under normal circumstances, you only need to
pass two arguments, thusly:
536 %prog --library /path/to/library --destdir /path/to/output
538 In this mode, the program will dump the symbols
from the specified library
539 and map those symbols back to source locations, producing a web-based
540 report
in the specified output directory.
542 Other options are available via
'--help'.
544 parser = optparse.OptionParser(usage=usage)
548 help=
'if specified, use nm input from <path> instead of '
549 'generating it. Note that source locations should be '
550 'present in the file; i.e., no addr2line symbol lookups '
551 'will be performed when this option is specified. '
552 'Mutually exclusive with --library.')
556 help=
'write output to the specified directory. An HTML '
557 'report is generated here along with supporting files; '
558 'any existing report will be overwritten.')
562 help=
'if specified, process symbols in the library at '
563 'the specified path. Mutually exclusive with --nm-in.')
567 help=
'if specified, includes the contents of the '
568 'specified *.pak file in the output.')
571 help=
'use the specified nm binary to analyze library. '
572 'This is to be used when the nm in the path is not for '
573 'the right architecture or of the right version.')
575 '--addr2line-binary',
576 help=
'use the specified addr2line binary to analyze '
577 'library. This is to be used when the addr2line in '
578 'the path is not for the right architecture or '
579 'of the right version.')
583 help=
'number of jobs to use for the parallel '
584 'addr2line processing pool; defaults to 1. More '
585 'jobs greatly improve throughput but eat RAM like '
586 'popcorn, and take several gigabytes each. Start low '
587 'and ramp this number up until your machine begins to '
588 'struggle with RAM. '
589 'This argument is only valid when using --library.')
595 help=
'be verbose, printing lots of status information.')
599 help=
'(deprecated) No-op. nm.out is stored in --destdir.')
603 help=
'do not keep the nm output file. This file is useful '
604 'if you want to see the fully processed nm output after '
605 'the symbols have been mapped to source locations, or if '
606 'you plan to run explain_binary_size_delta.py. By default '
607 'the file \'nm.out\' is placed alongside the generated '
608 'report. The nm.out file is only created when using '
611 '--disable-disambiguation',
613 help=
'disables the disambiguation process altogether,'
614 ' NOTE: this may, depending on your toolchain, produce'
615 ' output with some symbols at the top layer if addr2line'
616 ' could not get the entire source path.')
620 help=
'the path to the source code of the output binary, '
621 'default set to current directory. Used in the'
622 ' disambiguation process.')
625 dest=
'check_support',
628 help=
'Check that the version of the available tools is sufficient to '
629 'read the data from the library given by --library')
630 parser.add_option(
'--no-check-support',
631 action=
'store_false',
632 dest=
'check_support')
633 opts, _args = parser.parse_args()
635 if ((
not opts.library)
and
636 (
not opts.nm_in))
or (opts.library
and opts.nm_in):
637 parser.error(
'exactly one of --library or --nm-in is required')
639 print(
'WARNING: --nm-out is deprecated and has no effect.',
643 print(
'WARNING: --jobs has no effect when used with --nm-in',
646 parser.error(
'--destdir is a required argument')
652 opts.jobs =
max(2,
min(4, multiprocessing.cpu_count()))
654 if opts.addr2line_binary:
655 assert os.path.isfile(opts.addr2line_binary)
656 addr2line_binary = opts.addr2line_binary
658 addr2line_binary = _find_in_system_path(
'addr2line')
659 assert addr2line_binary,
'Unable to find addr2line in the path. '\
660 'Use --addr2line-binary to specify location.'
663 assert os.path.isfile(opts.nm_binary)
664 nm_binary = opts.nm_binary
666 nm_binary = _find_in_system_path(
'nm')
667 assert nm_binary,
'Unable to find nm in the path. Use --nm-binary '\
668 'to specify location.'
671 assert os.path.isfile(opts.pak),
'Could not find ' % opts.pak
673 print(
'addr2line: %s' % addr2line_binary)
674 print(
'nm: %s' % nm_binary)
676 if opts.library
and opts.check_support:
680 if not os.path.exists(opts.destdir):
681 os.makedirs(opts.destdir, 0o755)
682 nm_out = os.path.join(opts.destdir,
'nm.out')
689 data_js_file_name = os.path.join(opts.destdir,
'data.js')
690 d3_out = os.path.join(opts.destdir,
'd3')
691 if not os.path.exists(d3_out):
692 os.makedirs(d3_out, 0o755)
693 d3_src = os.path.join(os.path.dirname(__file__),
'..',
'..',
'd3',
'src')
694 template_src = os.path.join(os.path.dirname(__file__),
'template')
695 shutil.copy(os.path.join(d3_src,
'LICENSE'), d3_out)
696 shutil.copy(os.path.join(d3_src,
'd3.js'), d3_out)
697 shutil.copy(os.path.join(template_src,
'index.html'), opts.destdir)
698 shutil.copy(os.path.join(template_src,
'D3SymbolTreeMap.js'), opts.destdir)
701 symbols =
GetNmSymbols(opts.nm_in, nm_out, opts.library, opts.jobs,
702 opts.verbose
is True, addr2line_binary, nm_binary,
703 opts.disable_disambiguation
is None,
710 symbol_path_origin_dir = os.path.dirname(os.path.abspath(opts.library))
713 symbol_path_origin_dir = os.path.abspath(os.getcwd())
716 print(
'Report saved to ' + opts.destdir +
'/index.html')
719if __name__ ==
'__main__':
static float max(float r, float g, float b)
static float min(float r, float g, float b)
const myers::Point & get(const myers::Segment &)
def CheckDebugFormatSupport(library, addr2line_binary)
def AddPakData(symbols, pak_file)
def GetReadablePakResourceName(pak_file, resource_id)
def MakeSourceMap(symbols)
def RunNm(binary, nm_binary)
def DumpCompactTree(symbols, symbol_path_origin_dir, outfile)
def SplitNoPathBucket(node)
def AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size)
def RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs, disambiguate, src_path)
def MakeCompactTree(symbols, symbol_path_origin_dir)
def LoadPakIdsFromResourceFile(filename)
def GetNmSymbols(nm_infile, outfile, library, jobs, verbose, addr2line_binary, nm_binary, disambiguate, src_path)
def MakeChildrenDictsIntoLists(node)
def print(*args, **kwargs)
static DecodeResult decode(std::string path)