Hash :
2f4a7518
Author :
Date :
2019-08-16T14:09:13
Refactor perf tests to fix metric/story swapping Refactors the perf tests to fix the issue of metric and story being swapped, which causes issues when trying to convert to histograms. Specifically, does the following: 1. Rolls the version of src/tests/perf_tests/third_party/perf/ to Chromium 476dae823269c8d05b544271af97ad1adb0db8ee 2. Switch to using PerfResultReporter instead of PrintResult directly. 3. Split RenderTestParams::suffix into backend and story; backend is used as part of the metric, while story is used as the story. 4. Remove the "average" metric that was being automatically reported by ANGLEPerfTest, as reported results are automatically averaged. 5. Update the reported metric to more clearly distinguish between test, backend, and metric. It is now name_backend.metric. e.g. DrawCallPerf_vulkan.wall_time. Bug: chromium:923564,chromium:924618 Change-Id: I00cc191407052f23df57dbfa53b6fb088fc26960 Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/1762360 Commit-Queue: Jamie Madill <jmadill@chromium.org> Reviewed-by: Jamie Madill <jmadill@chromium.org> Reviewed-by: Jonah Ryan-Davis <jonahr@google.com>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
#!/usr/bin/python2
#
# Copyright 2015 The ANGLE Project Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
#
# perf_test_runner.py:
# Helper script for running and analyzing perftest results. Runs the
# tests in an infinite batch, printing out the mean and coefficient of
# variation of the population continuously.
#
import glob
import subprocess
import sys
import os
import re
base_path = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..'))
# Look for a [Rr]elease build.
perftests_paths = glob.glob('out/*elease*')
metric = 'wall_time'
max_experiments = 10
binary_name = 'angle_perftests'
if sys.platform == 'win32':
binary_name += '.exe'
scores = []
# Danke to http://stackoverflow.com/a/27758326
def mean(data):
"""Return the sample arithmetic mean of data."""
n = len(data)
if n < 1:
raise ValueError('mean requires at least one data point')
return float(sum(data)) / float(n) # in Python 2 use sum(data)/float(n)
def sum_of_square_deviations(data, c):
"""Return sum of square deviations of sequence data."""
ss = sum((float(x) - c)**2 for x in data)
return ss
def coefficient_of_variation(data):
"""Calculates the population coefficient of variation."""
n = len(data)
if n < 2:
raise ValueError('variance requires at least two data points')
c = mean(data)
ss = sum_of_square_deviations(data, c)
pvar = ss / n # the population variance
stddev = (pvar**0.5) # population standard deviation
return stddev / c
def truncated_list(data, n):
"""Compute a truncated list, n is truncation size"""
if len(data) < n * 2:
raise ValueError('list not large enough to truncate')
return sorted(data)[n:-n]
def truncated_mean(data, n):
"""Compute a truncated mean, n is truncation size"""
return mean(truncated_list(data, n))
def truncated_cov(data, n):
"""Compute a truncated coefficient of variation, n is truncation size"""
return coefficient_of_variation(truncated_list(data, n))
# Find most recent binary
newest_binary = None
newest_mtime = None
for path in perftests_paths:
binary_path = os.path.join(base_path, path, binary_name)
if os.path.exists(binary_path):
binary_mtime = os.path.getmtime(binary_path)
if (newest_binary is None) or (binary_mtime > newest_mtime):
newest_binary = binary_path
newest_mtime = binary_mtime
perftests_path = newest_binary
if perftests_path == None or not os.path.exists(perftests_path):
print('Cannot find Release %s!' % binary_name)
sys.exit(1)
if sys.platform == 'win32':
test_name = 'DrawCallPerfBenchmark.Run/d3d11_null'
else:
test_name = 'DrawCallPerfBenchmark.Run/gl'
if len(sys.argv) >= 2:
test_name = sys.argv[1]
print('Using test executable: ' + perftests_path)
print('Test name: ' + test_name)
def get_results(metric, extra_args=[]):
process = subprocess.Popen(
[perftests_path, '--gtest_filter=' + test_name] + extra_args,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
output, err = process.communicate()
m = re.search(r'Running (\d+) tests', output)
if m and int(m.group(1)) > 1:
print("Found more than one test result in output:")
print(output)
sys.exit(3)
# Results are reported in the format:
# name_backend.metric: story= value units.
pattern = r'\.' + metric + r':.*= ([0-9.]+)'
m = re.findall(pattern, output)
if not m:
print("Did not find the metric '%s' in the test output:" % metric)
print(output)
sys.exit(1)
return [float(value) for value in m]
# Calibrate the number of steps
steps = get_results("steps", ["--calibration"])[0]
print("running with %d steps." % steps)
# Loop 'max_experiments' times, running the tests.
for experiment in range(max_experiments):
experiment_scores = get_results(metric, ["--steps", str(steps)])
for score in experiment_scores:
sys.stdout.write("%s: %.2f" % (metric, score))
scores.append(score)
if (len(scores) > 1):
sys.stdout.write(", mean: %.2f" % mean(scores))
sys.stdout.write(", variation: %.2f%%" % (coefficient_of_variation(scores) * 100.0))
if (len(scores) > 7):
truncation_n = len(scores) >> 3
sys.stdout.write(", truncated mean: %.2f" % truncated_mean(scores, truncation_n))
sys.stdout.write(", variation: %.2f%%" % (truncated_cov(scores, truncation_n) * 100.0))
print("")