Flutter Engine
The Flutter Engine
ab.py
Go to the documentation of this file.
1#!/usr/bin/python
2# encoding: utf-8
3
4# Copyright 2017 Google Inc.
5#
6# Use of this source code is governed by a BSD-style license that can be found
7# in the LICENSE file.
8#
9# This is an A/B test utility script used by calmbench.py
10#
11# For each bench, we get a distribution of min_ms measurements from nanobench.
12# From that, we try to recover the 1/3 and 2/3 quantiles of the distribution.
13# If range (1/3 quantile, 2/3 quantile) is completely disjoint between A and B,
14# we report that as a regression.
15#
16# The more measurements we have for a bench, the more accurate our quantiles
17# are. However, taking more measurements is time consuming. Hence we'll prune
18# out benches and only take more measurements for benches whose current quantile
19# ranges are disjoint.
20#
21# P.S. The current script is brute forcely translated from a ruby script. So it
22# may be ugly...
23
24
25from __future__ import print_function
26import re
27import os
28import sys
29import time
30import json
31import subprocess
32import shlex
33import multiprocessing
34import traceback
35from argparse import ArgumentParser
36from multiprocessing import Process
37from threading import Thread
38from threading import Lock
39from pdb import set_trace
40
41
42HELP = """
43\033[31mPlease call calmbench.py to drive this script if you're not doing so.
44This script is not supposed to be used by itself. (At least, it's not easy to
45use by itself. The calmbench bots may use this script directly.)
46\033[0m
47"""
48
49FACTOR = 3 # lower/upper quantile factor
50DIFF_T = 0.99 # different enough threshold
51TERM = 10 # terminate after this no. of iterations without suspect changes
52MAXTRY = 30 # max number of nanobench tries to narrow down suspects
53
54UNITS = "ns µs ms s".split()
55
56
57timesLock = Lock()
58timesA = {}
59timesB = {}
60
61
63 parser = ArgumentParser(description=HELP)
64
65 parser.add_argument('outdir', type=str, help="output directory")
66 parser.add_argument('a', type=str, help="name of A")
67 parser.add_argument('b', type=str, help="name of B")
68 parser.add_argument('nano_a', type=str, help="path to A's nanobench binary")
69 parser.add_argument('nano_b', type=str, help="path to B's nanobench binary")
70 parser.add_argument('arg_a', type=str, help="args for A's nanobench run")
71 parser.add_argument('arg_b', type=str, help="args for B's nanobench run")
72 parser.add_argument('repeat', type=int, help="number of initial runs")
73 parser.add_argument('skip_b', type=str, help=("whether to skip running B"
74 " ('true' or 'false')"))
75 parser.add_argument('config', type=str, help="nanobenh config")
76 parser.add_argument('threads', type=int, help="number of threads to run")
77 parser.add_argument('noinit', type=str, help=("whether to skip running B"
78 " ('true' or 'false')"))
79
80 parser.add_argument('--concise', dest='concise', action="store_true",
81 help="If set, no verbose thread info will be printed.")
82 parser.set_defaults(concise=False)
83
84 # Additional args for bots
85 BHELP = "bot specific options"
86 parser.add_argument('--githash', type=str, default="", help=BHELP)
87 parser.add_argument('--keys', type=str, default=[], nargs='+', help=BHELP)
88
89 args = parser.parse_args()
90 args.skip_b = args.skip_b == "true"
91 args.noinit = args.noinit == "true"
92
93 if args.threads == -1:
94 args.threads = 1
95 if args.config in ["8888", "565"]: # multi-thread for CPU only
96 args.threads = max(1, multiprocessing.cpu_count() / 2)
97
98 return args
99
100def append_dict_sorted_array(dict_array, key, value):
101 if key not in dict_array:
102 dict_array[key] = []
103 dict_array[key].append(value)
104 dict_array[key].sort()
105
106
107def add_time(args, name, bench, t, unit):
108 normalized_t = t * 1000 ** UNITS.index(unit);
109 if name.startswith(args.a):
110 append_dict_sorted_array(timesA, bench, normalized_t)
111 else:
112 append_dict_sorted_array(timesB, bench, normalized_t)
113
114
115def append_times_from_file(args, name, filename):
116 with open(filename) as f:
117 lines = f.readlines()
118 for line in lines:
119 items = line.split()
120 if len(items) > 10:
121 bench = items[10]
122 matches = re.search("([+-]?\d*.?\d+)(s|ms|µs|ns)", items[3])
123 if (not matches or items[9] != args.config):
124 continue
125 time_num = matches.group(1)
126 time_unit = matches.group(2)
127 add_time(args, name, bench, float(time_num), time_unit)
128
129
131 def __init__(self, target):
132 super(ThreadWithException, self).__init__(target = target)
133 self.exception = None
134
135 def run(self):
136 try:
137 self._Thread__target(*self._Thread__args, **self._Thread__kwargs)
138 except BaseException as e:
139 self.exception = e
140
141 def join(self, timeout=None):
142 super(ThreadWithException, self).join(timeout)
143
144
146 """Simplest and stupidiest threaded executer."""
147 def __init__(self, args):
148 self.concise = args.concise
149 self.threads = []
150
151 def add(self, args, fn):
152 if len(self.threads) >= args.threads:
153 self.wait()
154 t = ThreadWithException(target = fn)
155 t.daemon = True
156 self.threads.append(t)
157 t.start()
158
159 def wait(self):
160 def spin():
161 i = 0
162 spinners = [". ", ".. ", "..."]
163 while len(self.threads) > 0:
164 timesLock.acquire()
165 sys.stderr.write(
166 "\r" + spinners[i % len(spinners)] +
167 " (%d threads running)" % len(self.threads) +
168 " \r" # spaces for erasing characters
169 )
170 timesLock.release()
171 time.sleep(0.5)
172 i += 1
173
174 if not self.concise:
175 ts = Thread(target = spin);
176 ts.start()
177
178 for t in self.threads:
179 t.join()
180
181 exceptions = []
182 for t in self.threads:
183 if t.exception:
184 exceptions.append(t.exception)
185
186 self.threads = []
187
188 if not self.concise:
189 ts.join()
190
191 if len(exceptions):
192 for exc in exceptions:
193 print(exc)
194 raise exceptions[0]
195
196
197def split_arg(arg):
198 raw = shlex.split(arg)
199 result = []
200 for r in raw:
201 if '~' in r:
202 result.append(os.path.expanduser(r))
203 else:
204 result.append(r)
205 return result
206
207
208def run(args, threadRunner, name, nano, arg, i):
209 def task():
210 file_i = "%s/%s.out%d" % (args.outdir, name, i)
211
212 should_run = not args.noinit and not (name == args.b and args.skip_b)
213 if i <= 0:
214 should_run = True # always run for suspects
215
216 if should_run:
217 if i > 0:
218 timesLock.acquire()
219 print("Init run %d for %s..." % (i, name))
220 timesLock.release()
221 subprocess.check_call(["touch", file_i])
222 with open(file_i, 'w') as f:
223 subprocess.check_call([nano] + split_arg(arg) +
224 ["--config", args.config], stderr=f, stdout=f)
225
226 timesLock.acquire()
227 append_times_from_file(args, name, file_i)
228 timesLock.release()
229
230 threadRunner.add(args, task)
231
232
233def init_run(args):
234 threadRunner = ThreadRunner(args)
235 for i in range(1, max(args.repeat, args.threads / 2) + 1):
236 run(args, threadRunner, args.a, args.nano_a, args.arg_a, i)
237 run(args, threadRunner, args.b, args.nano_b, args.arg_b, i)
238 threadRunner.wait()
239
240
241def get_lower_upper(values):
242 i = max(0, (len(values) - 1) / FACTOR)
243 return values[i], values[-i - 1]
244
245
246def different_enough(lower1, upper2):
247 return upper2 < DIFF_T * lower1
248
249
250# TODO(liyuqian): we used this hacky criteria mainly because that I didn't have
251# time to study more rigorous statistical tests. We should adopt a more rigorous
252# test in the future.
254 suspects = []
255 for bench in timesA.keys():
256 if bench not in timesB:
257 continue
258 lowerA, upperA = get_lower_upper(timesA[bench])
259 lowerB, upperB = get_lower_upper(timesB[bench])
260 if different_enough(lowerA, upperB) or different_enough(lowerB, upperA):
261 suspects.append(bench)
262 return suspects
263
264
266 if ".skp" in s: # skp bench won't match their exact names...
267 return "^\"" + s[0:(s.index(".skp") + 3)] + "\""
268 else:
269 return "^\"" + s + "\"$"
270
271
272def suspects_arg(suspects):
273 patterns = map(process_bench_pattern, suspects)
274 return " --match " + (" ".join(patterns))
275
276
277def median(array):
278 return array[len(array) / 2]
279
280
281def regression(bench):
282 a = median(timesA[bench])
283 b = median(timesB[bench])
284 if (a == 0): # bad bench, just return no regression
285 return 1
286 return b / a
287
288
290 return (x - 1) * 100
291
292
293def format_r(r):
294 return ('%6.2f' % percentage(r)) + "%"
295
296
298 if r > 1.0:
299 return r - 1.0
300 else:
301 return 1.0 - 1/r
302
303
304def test():
305 args = parse_args()
306
307 init_run(args)
308 last_unchanged_iter = 0
309 last_suspect_number = -1
310 tryCnt = 0
311 it = 0
312 while tryCnt < MAXTRY:
313 it += 1
314 suspects = get_suspects()
315 if len(suspects) != last_suspect_number:
316 last_suspect_number = len(suspects)
317 last_unchanged_iter = it
318 if (len(suspects) == 0 or it - last_unchanged_iter >= TERM):
319 break
320
321 print("Number of suspects at iteration %d: %d" % (it, len(suspects)))
322 threadRunner = ThreadRunner(args)
323 for j in range(1, max(1, args.threads / 2) + 1):
324 run(args, threadRunner, args.a, args.nano_a,
325 args.arg_a + suspects_arg(suspects), -j)
326 run(args, threadRunner, args.b, args.nano_b,
327 args.arg_b + suspects_arg(suspects), -j)
328 tryCnt += 1
329 threadRunner.wait()
330
331 suspects = get_suspects()
332 if len(suspects) == 0:
333 print(("%s and %s does not seem to have significant " + \
334 "performance differences.") % (args.a, args.b))
335 else:
336 suspects.sort(key = regression)
337 print("%s (compared to %s) is likely" % (args.a, args.b))
338 for suspect in suspects:
339 r = regression(suspect)
340 if r < 1:
341 print("\033[31m %s slower in %s\033[0m" % (format_r(1/r), suspect))
342 else:
343 print("\033[32m %s faster in %s\033[0m" % (format_r(r), suspect))
344
345 with open("%s/bench_%s_%s.json" % (args.outdir, args.a, args.b), 'w') as f:
346 results = {}
347 for bench in timesA:
348 r = regression(bench) if bench in suspects else 1.0
349 results[bench] = {
350 args.config: {
351 "signed_regression": normalize_r(r),
352 "lower_quantile_ms": get_lower_upper(timesA[bench])[0] * 1e-6,
353 "upper_quantile_ms": get_lower_upper(timesA[bench])[1] * 1e-6,
354 "options": {
355 # TODO(liyuqian): let ab.py call nanobench with --outResultsFile so
356 # nanobench could generate the json for us that's exactly the same
357 # as that being used by perf bots. Currently, we cannot guarantee
358 # that bench is the name (e.g., bench may have additional resolution
359 # information appended after name).
360 "name": bench
361 }
362 }
363 }
364
365 output = {"results": results}
366 if args.githash:
367 output["gitHash"] = args.githash
368 if args.keys:
369 keys = {}
370 for i in range(len(args.keys) / 2):
371 keys[args.keys[i * 2]] = args.keys[i * 2 + 1]
372 output["key"] = keys
373 f.write(json.dumps(output, indent=4))
374 print(("\033[36mJSON results available in %s\033[0m" % f.name))
375
376 with open("%s/bench_%s_%s.csv" % (args.outdir, args.a, args.b), 'w') as out:
377 out.write(("bench, significant?, raw regresion, " +
378 "%(A)s quantile (ns), %(B)s quantile (ns), " +
379 "%(A)s (ns), %(B)s (ns)\n") % {'A': args.a, 'B': args.b})
380 for bench in suspects + timesA.keys():
381 if (bench not in timesA or bench not in timesB):
382 continue
383 ta = timesA[bench]
384 tb = timesB[bench]
385 out.write(
386 "%s, %s, %f, " % (bench, bench in suspects, regression(bench)) +
387 ' '.join(map(str, get_lower_upper(ta))) + ", " +
388 ' '.join(map(str, get_lower_upper(tb))) + ", " +
389 ("%s, %s\n" % (' '.join(map(str, ta)), ' '.join(map(str, tb))))
390 )
391 print(("\033[36m" +
392 "Compared %d benches. " +
393 "%d of them seem to be significantly differrent." +
394 "\033[0m") %
395 (len([x for x in timesA if x in timesB]), len(suspects)))
396 print("\033[36mPlease see detailed bench results in %s\033[0m" % out.name)
397
398
399if __name__ == "__main__":
400 try:
401 test()
402 except Exception as e:
403 print(e)
404 print(HELP)
405 traceback.print_exc()
406 raise e
static std::vector< SkPDFIndirectReference > sort(const THashSet< SkPDFIndirectReference > &src)
def __init__(self, args)
Definition: ab.py:147
def wait(self)
Definition: ab.py:159
def add(self, args, fn)
Definition: ab.py:151
def __init__(self, target)
Definition: ab.py:131
def join(self, timeout=None)
Definition: ab.py:141
def run(self)
Definition: ab.py:135
static void append(char **dst, size_t *count, const char *src, size_t n)
Definition: editor.cpp:211
static float max(float r, float g, float b)
Definition: hsl.cpp:49
def append_dict_sorted_array(dict_array, key, value)
Definition: ab.py:100
def suspects_arg(suspects)
Definition: ab.py:272
def parse_args()
Definition: ab.py:62
def get_lower_upper(values)
Definition: ab.py:241
def normalize_r(r)
Definition: ab.py:297
def percentage(x)
Definition: ab.py:289
def get_suspects()
Definition: ab.py:253
def different_enough(lower1, upper2)
Definition: ab.py:246
def init_run(args)
Definition: ab.py:233
def test()
Definition: ab.py:304
def regression(bench)
Definition: ab.py:281
def format_r(r)
Definition: ab.py:293
def run(args, threadRunner, name, nano, arg, i)
Definition: ab.py:208
def append_times_from_file(args, name, filename)
Definition: ab.py:115
def add_time(args, name, bench, t, unit)
Definition: ab.py:107
def process_bench_pattern(s)
Definition: ab.py:265
def split_arg(arg)
Definition: ab.py:197
def median(array)
Definition: ab.py:277
def print(*args, **kwargs)
Definition: run_tests.py:49
Definition: run.py:1
SI auto map(std::index_sequence< I... >, Fn &&fn, const Args &... args) -> skvx::Vec< sizeof...(I), decltype(fn(args[0]...))>
Definition: SkVx.h:680
static SkString join(const CommandLineFlags::StringArray &)
Definition: skpbench.cpp:741
static sk_sp< SkColorFilter > spin(sk_sp< SkColorFilter > cf)