d8/dd5/ab_8py_source.html

#!/usr/bin/python

# encoding: utf-8


# Copyright 2017 Google Inc.

#

# Use of this source code is governed by a BSD-style license that can be found

# in the LICENSE file.

#

# This is an A/B test utility script used by calmbench.py

#

# For each bench, we get a distribution of min_ms measurements from nanobench.

# From that, we try to recover the 1/3 and 2/3 quantiles of the distribution.

# If range (1/3 quantile, 2/3 quantile) is completely disjoint between A and B,

# we report that as a regression.

#

# The more measurements we have for a bench, the more accurate our quantiles

# are. However, taking more measurements is time consuming. Hence we'll prune

# out benches and only take more measurements for benches whose current quantile

# ranges are disjoint.

#

# P.S. The current script is brute forcely translated from a ruby script. So it

# may be ugly...


from __future__ import print_function

import re

import os

import sys

import time

import json

import subprocess

import shlex

import multiprocessing

import traceback

from argparse import ArgumentParser

from multiprocessing import Process

from threading import Thread

from threading import Lock

from pdb import set_trace


HELP = """

\033[31mPlease call calmbench.py to drive this script if you're not doing so.

This script is not supposed to be used by itself. (At least, it's not easy to

use by itself. The calmbench bots may use this script directly.)

\033[0m

"""


FACTOR  = 3     # lower/upper quantile factor

DIFF_T  = 0.99  # different enough threshold

TERM    = 10    # terminate after this no. of iterations without suspect changes

MAXTRY  = 30    # max number of nanobench tries to narrow down suspects


UNITS   = "ns µs ms s".split()


timesLock = Lock()

timesA  = {}

timesB  = {}


def parse_args():

  parser = ArgumentParser(description=HELP)


  parser.add_argument('outdir', type=str, help="output directory")

  parser.add_argument('a', type=str, help="name of A")

  parser.add_argument('b', type=str, help="name of B")

  parser.add_argument('nano_a', type=str, help="path to A's nanobench binary")

  parser.add_argument('nano_b', type=str, help="path to B's nanobench binary")

  parser.add_argument('arg_a', type=str, help="args for A's nanobench run")

  parser.add_argument('arg_b', type=str, help="args for B's nanobench run")

  parser.add_argument('repeat', type=int, help="number of initial runs")

  parser.add_argument('skip_b', type=str, help=("whether to skip running B"

                                                " ('true' or 'false')"))

  parser.add_argument('config', type=str, help="nanobenh config")

  parser.add_argument('threads', type=int, help="number of threads to run")

  parser.add_argument('noinit', type=str, help=("whether to skip running B"

                                                " ('true' or 'false')"))


  parser.add_argument('--concise', dest='concise', action="store_true",

      help="If set, no verbose thread info will be printed.")

  parser.set_defaults(concise=False)


  # Additional args for bots

  BHELP = "bot specific options"

  parser.add_argument('--githash', type=str, default="", help=BHELP)

  parser.add_argument('--keys', type=str, default=[], nargs='+', help=BHELP)


  args = parser.parse_args()

  args.skip_b = args.skip_b == "true"

  args.noinit = args.noinit == "true"


  if args.threads == -1:

    args.threads = 1

    if args.config in ["8888", "565"]: # multi-thread for CPU only

        args.threads = max(1, multiprocessing.cpu_count() / 2)


  return args


def append_dict_sorted_array(dict_array, key, value):

  if key not in dict_array:

    dict_array[key] = []

  dict_array[key].append(value)

  dict_array[key].sort()


def add_time(args, name, bench, t, unit):

  normalized_t = t * 1000 ** UNITS.index(unit);

  if name.startswith(args.a):

    append_dict_sorted_array(timesA, bench, normalized_t)

  else:

    append_dict_sorted_array(timesB, bench, normalized_t)


def append_times_from_file(args, name, filename):

  with open(filename) as f:

    lines = f.readlines()

  for line in lines:

    items = line.split()

    if len(items) > 10:

      bench = items[10]

      matches = re.search("([+-]?\d*.?\d+)(s|ms|µs|ns)", items[3])

      if (not matches or items[9] != args.config):

        continue

      time_num = matches.group(1)

      time_unit = matches.group(2)

      add_time(args, name, bench, float(time_num), time_unit)


class ThreadWithException(Thread):

  def __init__(self, target):

    super(ThreadWithException, self).__init__(target = target)

    self.exception = None


  def run(self):

    try:

      self._Thread__target(*self._Thread__args, **self._Thread__kwargs)

    except BaseException as e:

      self.exception = e


  def join(self, timeout=None):

    super(ThreadWithException, self).join(timeout)


class ThreadRunner:

  """Simplest and stupidiest threaded executer."""

  def __init__(self, args):

    self.concise = args.concise

    self.threads = []


  def add(self, args, fn):

    if len(self.threads) >= args.threads:

      self.wait()

    t = ThreadWithException(target = fn)

    t.daemon = True

    self.threads.append(t)

    t.start()


  def wait(self):

    def spin():

      i = 0

      spinners = [".  ", ".. ", "..."]

      while len(self.threads) > 0:

        timesLock.acquire()

        sys.stderr.write(

            "\r" + spinners[i % len(spinners)] +

            " (%d threads running)" % len(self.threads) +

            "           \r" # spaces for erasing characters

        )

        timesLock.release()

        time.sleep(0.5)

        i += 1


    if not self.concise:

      ts = Thread(target = spin);

      ts.start()


    for t in self.threads:

      t.join()


    exceptions = []

    for t in self.threads:

      if t.exception:

        exceptions.append(t.exception)


    self.threads = []


    if not self.concise:

      ts.join()


    if len(exceptions):

      for exc in exceptions:

        print(exc)

      raise exceptions[0]


def split_arg(arg):

  raw = shlex.split(arg)

  result = []

  for r in raw:

    if '~' in r:

      result.append(os.path.expanduser(r))

    else:

      result.append(r)

  return result


def run(args, threadRunner, name, nano, arg, i):

  def task():

    file_i = "%s/%s.out%d" % (args.outdir, name, i)


    should_run = not args.noinit and not (name == args.b and args.skip_b)

    if i <= 0:

      should_run = True # always run for suspects


    if should_run:

      if i > 0:

        timesLock.acquire()

        print("Init run %d for %s..." % (i, name))

        timesLock.release()

      subprocess.check_call(["touch", file_i])

      with open(file_i, 'w') as f:

        subprocess.check_call([nano] + split_arg(arg) +

                              ["--config", args.config], stderr=f, stdout=f)


    timesLock.acquire()

    append_times_from_file(args, name, file_i)

    timesLock.release()


  threadRunner.add(args, task)


def init_run(args):

  threadRunner = ThreadRunner(args)

  for i in range(1, max(args.repeat, args.threads / 2) + 1):

    run(args, threadRunner, args.a, args.nano_a, args.arg_a, i)

    run(args, threadRunner, args.b, args.nano_b, args.arg_b, i)

  threadRunner.wait()


def get_lower_upper(values):

  i = max(0, (len(values) - 1) / FACTOR)

  return values[i], values[-i - 1]


def different_enough(lower1, upper2):

  return upper2 < DIFF_T * lower1


# TODO(liyuqian): we used this hacky criteria mainly because that I didn't have

# time to study more rigorous statistical tests. We should adopt a more rigorous

# test in the future.

def get_suspects():

  suspects = []

  for bench in timesA.keys():

    if bench not in timesB:

      continue

    lowerA, upperA = get_lower_upper(timesA[bench])

    lowerB, upperB = get_lower_upper(timesB[bench])

    if different_enough(lowerA, upperB) or different_enough(lowerB, upperA):

      suspects.append(bench)

  return suspects


def process_bench_pattern(s):

  if ".skp" in s: # skp bench won't match their exact names...

    return "^\"" + s[0:(s.index(".skp") + 3)] + "\""

  else:

    return "^\"" + s + "\"$"


def suspects_arg(suspects):

  patterns = map(process_bench_pattern, suspects)

  return " --match " + (" ".join(patterns))


def median(array):

  return array[len(array) / 2]


def regression(bench):

  a = median(timesA[bench])

  b = median(timesB[bench])

  if (a == 0): # bad bench, just return no regression

    return 1

  return b / a


def percentage(x):

  return (x - 1) * 100


def format_r(r):

  return ('%6.2f' % percentage(r)) + "%"


def normalize_r(r):

  if r > 1.0:

    return r - 1.0

  else:

    return 1.0 - 1/r


def test():

  args = parse_args()


  init_run(args)

  last_unchanged_iter = 0

  last_suspect_number = -1

  tryCnt = 0

  it = 0

  while tryCnt < MAXTRY:

    it += 1

    suspects = get_suspects()

    if len(suspects) != last_suspect_number:

      last_suspect_number = len(suspects)

      last_unchanged_iter = it

    if (len(suspects) == 0 or it - last_unchanged_iter >= TERM):

      break


    print("Number of suspects at iteration %d: %d" % (it, len(suspects)))

    threadRunner = ThreadRunner(args)

    for j in range(1, max(1, args.threads / 2) + 1):

      run(args, threadRunner, args.a, args.nano_a,

          args.arg_a + suspects_arg(suspects), -j)

      run(args, threadRunner, args.b, args.nano_b,

          args.arg_b + suspects_arg(suspects), -j)

      tryCnt += 1

    threadRunner.wait()


  suspects = get_suspects()

  if len(suspects) == 0:

    print(("%s and %s does not seem to have significant " + \

           "performance differences.") % (args.a, args.b))

  else:

    suspects.sort(key = regression)

    print("%s (compared to %s) is likely" % (args.a, args.b))

    for suspect in suspects:

      r = regression(suspect)

      if r < 1:

        print("\033[31m  %s slower in %s\033[0m" % (format_r(1/r), suspect))

      else:

        print("\033[32m  %s faster in %s\033[0m" % (format_r(r), suspect))


  with open("%s/bench_%s_%s.json" % (args.outdir, args.a, args.b), 'w') as f:

    results = {}

    for bench in timesA:

      r = regression(bench) if bench in suspects else 1.0

      results[bench] = {

        args.config: {

          "signed_regression": normalize_r(r),

          "lower_quantile_ms": get_lower_upper(timesA[bench])[0] * 1e-6,

          "upper_quantile_ms": get_lower_upper(timesA[bench])[1] * 1e-6,

          "options": {

            # TODO(liyuqian): let ab.py call nanobench with --outResultsFile so

            # nanobench could generate the json for us that's exactly the same

            # as that being used by perf bots. Currently, we cannot guarantee

            # that bench is the name (e.g., bench may have additional resolution

            # information appended after name).

            "name": bench

          }

        }

      }


    output = {"results": results}

    if args.githash:

      output["gitHash"] = args.githash

    if args.keys:

      keys = {}

      for i in range(len(args.keys) / 2):

        keys[args.keys[i * 2]] = args.keys[i * 2 + 1]

      output["key"] = keys

    f.write(json.dumps(output, indent=4))

    print(("\033[36mJSON results available in %s\033[0m" % f.name))


  with open("%s/bench_%s_%s.csv" % (args.outdir, args.a, args.b), 'w') as out:

    out.write(("bench, significant?, raw regresion, " +

                   "%(A)s quantile (ns), %(B)s quantile (ns), " +

                   "%(A)s (ns), %(B)s (ns)\n") % {'A': args.a, 'B': args.b})

    for bench in suspects + timesA.keys():

      if (bench not in timesA or bench not in timesB):

        continue

      ta = timesA[bench]

      tb = timesB[bench]

      out.write(

          "%s, %s, %f, " % (bench, bench in suspects, regression(bench)) +

          ' '.join(map(str, get_lower_upper(ta))) + ", " +

          ' '.join(map(str, get_lower_upper(tb))) + ", " +

          ("%s, %s\n" % (' '.join(map(str, ta)), ' '.join(map(str, tb))))

      )

    print(("\033[36m" +

           "Compared %d benches. " +

           "%d of them seem to be significantly differrent." +

           "\033[0m") %

           (len([x for x in timesA if x in timesB]), len(suspects)))

    print("\033[36mPlease see detailed bench results in %s\033[0m" % out.name)


if __name__ == "__main__":

  try:

    test()

  except Exception as e:

    print(e)

    print(HELP)

    traceback.print_exc()

    raise e

sort
static std::vector< SkPDFIndirectReference > sort(const THashSet< SkPDFIndirectReference > &src)
Definition: SkPDFDevice.cpp:1061

ab.ThreadRunner
Definition: ab.py:145

ab.ThreadRunner.concise
concise
Definition: ab.py:148

ab.ThreadRunner.__init__
def __init__(self, args)
Definition: ab.py:147

ab.ThreadRunner.threads
threads
Definition: ab.py:149

ab.ThreadRunner.wait
def wait(self)
Definition: ab.py:159

ab.ThreadRunner.add
def add(self, args, fn)
Definition: ab.py:151

ab.ThreadWithException
Definition: ab.py:130

ab.ThreadWithException.__init__
def __init__(self, target)
Definition: ab.py:131

ab.ThreadWithException.join
def join(self, timeout=None)
Definition: ab.py:141

ab.ThreadWithException.run
def run(self)
Definition: ab.py:135

ab.ThreadWithException.exception
exception
Definition: ab.py:133

append
static void append(char **dst, size_t *count, const char *src, size_t n)
Definition: editor.cpp:211

max
static float max(float r, float g, float b)
Definition: hsl.cpp:49

Thread

ab.append_dict_sorted_array
def append_dict_sorted_array(dict_array, key, value)
Definition: ab.py:100

ab.suspects_arg
def suspects_arg(suspects)
Definition: ab.py:272

ab.parse_args
def parse_args()
Definition: ab.py:62

ab.get_lower_upper
def get_lower_upper(values)
Definition: ab.py:241

ab.normalize_r
def normalize_r(r)
Definition: ab.py:297

ab.percentage
def percentage(x)
Definition: ab.py:289

ab.get_suspects
def get_suspects()
Definition: ab.py:253

ab.different_enough
def different_enough(lower1, upper2)
Definition: ab.py:246

ab.init_run
def init_run(args)
Definition: ab.py:233

ab.test
def test()
Definition: ab.py:304

ab.regression
def regression(bench)
Definition: ab.py:281

ab.format_r
def format_r(r)
Definition: ab.py:293

ab.run
def run(args, threadRunner, name, nano, arg, i)
Definition: ab.py:208

ab.append_times_from_file
def append_times_from_file(args, name, filename)
Definition: ab.py:115

ab.add_time
def add_time(args, name, bench, t, unit)
Definition: ab.py:107

ab.process_bench_pattern
def process_bench_pattern(s)
Definition: ab.py:265

ab.split_arg
def split_arg(arg)
Definition: ab.py:197

ab.median
def median(array)
Definition: ab.py:277

gn.find_headers.len
len
Definition: find_headers.py:30

run_tests.print
def print(*args, **kwargs)
Definition: run_tests.py:49

run
Definition: run.py:1

skvx::map
SI auto map(std::index_sequence< I... >, Fn &&fn, const Args &... args) -> skvx::Vec< sizeof...(I), decltype(fn(args[0]...))>
Definition: SkVx.h:680

test

tools.skpbench.skpbench.float
float
Definition: skpbench.py:42

join
static SkString join(const CommandLineFlags::StringArray &)
Definition: skpbench.cpp:741

spin
static sk_sp< SkColorFilter > spin(sk_sp< SkColorFilter > cf)
Definition: workingspace.cpp:82