eval_gc_nvp.py 6.61 KB
Newer Older
1 2 3 4 5 6 7 8
#!/usr/bin/env python
#
# Copyright 2015 the V8 project authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""This script is used to analyze GCTracer's NVP output."""

9

10
from argparse import ArgumentParser
11
from copy import deepcopy
12
from gc_nvp_common import split_nvp
13
from math import ceil,log
14 15
from sys import stdin

16

17 18
class LinearBucket:
  def __init__(self, granularity):
19
    self.granularity = granularity
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47

  def value_to_bucket(self, value):
    return int(value / self.granularity)

  def bucket_to_range(self, bucket):
    return (bucket * self.granularity, (bucket + 1) * self.granularity)


class Log2Bucket:
  def __init__(self, start):
    self.start = int(log(start, 2)) - 1

  def value_to_bucket(self, value):
    index = int(log(value, 2))
    index -= self.start
    if index < 0:
      index = 0
    return index

  def bucket_to_range(self, bucket):
    if bucket == 0:
      return (0, 2 ** (self.start + 1))
    bucket += self.start
    return (2 ** bucket, 2 ** (bucket + 1))


class Histogram:
  def __init__(self, bucket_trait, fill_empty):
48 49
    self.histogram = {}
    self.fill_empty = fill_empty
50
    self.bucket_trait = bucket_trait
51 52

  def add(self, key):
53
    index = self.bucket_trait.value_to_bucket(key)
54 55 56
    if index not in self.histogram:
      self.histogram[index] = 0
    self.histogram[index] += 1
57 58 59

  def __str__(self):
    ret = []
60 61
    keys = self.histogram.keys()
    keys.sort()
62 63 64 65 66 67 68 69 70
    last = keys[len(keys) - 1]
    for i in range(0, last + 1):
      (min_value, max_value) = self.bucket_trait.bucket_to_range(i)
      if i == keys[0]:
        keys.pop(0)
        ret.append("  [{0},{1}[: {2}".format(
          str(min_value), str(max_value), self.histogram[i]))
      else:
        if self.fill_empty:
71
          ret.append("  [{0},{1}[: {2}".format(
72
            str(min_value), str(max_value), 0))
73 74 75 76
    return "\n".join(ret)


class Category:
77
  def __init__(self, key, histogram, csv, percentiles):
78 79 80
    self.key = key
    self.values = []
    self.histogram = histogram
81
    self.csv = csv
82
    self.percentiles = percentiles
83 84 85 86

  def process_entry(self, entry):
    if self.key in entry:
      self.values.append(float(entry[self.key]))
87 88
      if self.histogram:
        self.histogram.add(float(entry[self.key]))
89

90 91 92 93 94 95 96
  def min(self):
    return min(self.values)

  def max(self):
    return max(self.values)

  def avg(self):
97 98
    if len(self.values) == 0:
      return 0.0
99 100
    return sum(self.values) / len(self.values)

101 102 103
  def empty(self):
    return len(self.values) == 0

104 105 106 107 108 109 110 111 112 113
  def _compute_percentiles(self):
    ret = []
    if len(self.values) == 0:
      return ret
    sorted_values = sorted(self.values)
    for percentile in self.percentiles:
      index = int(ceil((len(self.values) - 1) * percentile / 100))
      ret.append("  {0}%: {1}".format(percentile, sorted_values[index]))
    return ret

114
  def __str__(self):
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
    if self.csv:
      ret = [self.key]
      ret.append(len(self.values))
      ret.append(self.min())
      ret.append(self.max())
      ret.append(self.avg())
      ret = [str(x) for x in ret]
      return ",".join(ret)
    else:
      ret = [self.key]
      ret.append("  len: {0}".format(len(self.values)))
      if len(self.values) > 0:
        ret.append("  min: {0}".format(self.min()))
        ret.append("  max: {0}".format(self.max()))
        ret.append("  avg: {0}".format(self.avg()))
        if self.histogram:
          ret.append(str(self.histogram))
132 133
        if self.percentiles:
          ret.append("\n".join(self._compute_percentiles()))
134
      return "\n".join(ret)
135

136 137 138 139 140 141 142 143 144
  def __repr__(self):
    return "<Category: {0}>".format(self.key)


def make_key_func(cmp_metric):
  def key_func(a):
    return getattr(a, cmp_metric)()
  return key_func

145 146 147 148

def main():
  parser = ArgumentParser(description="Process GCTracer's NVP output")
  parser.add_argument('keys', metavar='KEY', type=str, nargs='+',
149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
                      help='the keys of NVPs to process')
  parser.add_argument('--histogram-type', metavar='<linear|log2>',
                      type=str, nargs='?', default="linear",
                      help='histogram type to use (default: linear)')
  linear_group = parser.add_argument_group('linear histogram specific')
  linear_group.add_argument('--linear-histogram-granularity',
                            metavar='GRANULARITY', type=int, nargs='?',
                            default=5,
                            help='histogram granularity (default: 5)')
  log2_group = parser.add_argument_group('log2 histogram specific')
  log2_group.add_argument('--log2-histogram-init-bucket', metavar='START',
                          type=int, nargs='?', default=64,
                          help='initial buck size (default: 64)')
  parser.add_argument('--histogram-omit-empty-buckets',
                      dest='histogram_omit_empty',
                      action='store_true',
                      help='omit empty histogram buckets')
  parser.add_argument('--no-histogram', dest='histogram',
                      action='store_false', help='do not print histogram')
168
  parser.set_defaults(histogram=True)
169
  parser.set_defaults(histogram_omit_empty=False)
170 171 172 173
  parser.add_argument('--rank', metavar='<no|min|max|avg>',
                      type=str, nargs='?',
                      default="no",
                      help="rank keys by metric (default: no)")
174 175
  parser.add_argument('--csv', dest='csv',
                      action='store_true', help='provide output as csv')
176 177 178
  parser.add_argument('--percentiles', dest='percentiles',
                      type=str, default="",
                      help='comma separated list of percentiles')
179 180
  args = parser.parse_args()

181 182
  histogram = None
  if args.histogram:
183 184 185 186 187 188
    bucket_trait = None
    if args.histogram_type == "log2":
      bucket_trait = Log2Bucket(args.log2_histogram_init_bucket)
    else:
      bucket_trait = LinearBucket(args.linear_histogram_granularity)
    histogram = Histogram(bucket_trait, not args.histogram_omit_empty)
189

190 191 192 193 194 195 196 197
  percentiles = []
  for percentile in args.percentiles.split(','):
    try:
      percentiles.append(float(percentile))
    except ValueError:
      pass

  categories = [ Category(key, deepcopy(histogram), args.csv, percentiles)
198 199 200 201 202 203 204 205 206 207
                 for key in args.keys ]

  while True:
    line = stdin.readline()
    if not line:
      break
    obj = split_nvp(line)
    for category in categories:
      category.process_entry(obj)

208 209 210
  # Filter out empty categories.
  categories = [x for x in categories if not x.empty()]

211 212 213
  if args.rank != "no":
    categories = sorted(categories, key=make_key_func(args.rank), reverse=True)

214 215 216 217 218 219
  for category in categories:
    print(category)


if __name__ == '__main__':
  main()