diff --git a/wlauto/core/execution.py b/wlauto/core/execution.py index 5abadc87..25554db4 100644 --- a/wlauto/core/execution.py +++ b/wlauto/core/execution.py @@ -192,6 +192,9 @@ class ExecutionContext(object): self.current_job = None self.output_directory = self.run_output_directory + def add_metric(self, *args, **kwargs): + self.result.add_metric(*args, **kwargs) + def add_artifact(self, name, path, kind, *args, **kwargs): if self.current_job is None: self.add_run_artifact(name, path, kind, *args, **kwargs) diff --git a/wlauto/core/result.py b/wlauto/core/result.py index 58df6212..38a052b0 100644 --- a/wlauto/core/result.py +++ b/wlauto/core/result.py @@ -261,8 +261,8 @@ class IterationResult(object): self.metrics = [] self.artifacts = [] - def add_metric(self, name, value, units=None, lower_is_better=False): - self.metrics.append(Metric(name, value, units, lower_is_better)) + def add_metric(self, name, value, units=None, lower_is_better=False, classifiers=None): + self.metrics.append(Metric(name, value, units, lower_is_better, classifiers)) def has_metric(self, name): for metric in self.metrics: @@ -300,14 +300,18 @@ class Metric(object): has no units (e.g. it's a count or a standardised score). :param lower_is_better: Boolean flag indicating where lower values are better than higher ones. Defaults to False. + :param classifiers: A set of key-value pairs to further classify this metric + beyond current iteration (e.g. this can be used to identify + sub-tests). """ - def __init__(self, name, value, units=None, lower_is_better=False): + def __init__(self, name, value, units=None, lower_is_better=False, classifiers=None): self.name = name self.value = numeric(value) self.units = units self.lower_is_better = lower_is_better + self.classifiers = classifiers or {} def to_dict(self): return self.__dict__ diff --git a/wlauto/result_processors/standard.py b/wlauto/result_processors/standard.py index f0f5c8cd..6ff058e4 100644 --- a/wlauto/result_processors/standard.py +++ b/wlauto/result_processors/standard.py @@ -24,7 +24,9 @@ import os import csv import json -from wlauto import ResultProcessor, settings +from wlauto import ResultProcessor, Parameter +from wlauto.exceptions import ConfigError +from wlauto.utils.types import list_of_strings class StandardProcessor(ResultProcessor): @@ -63,15 +65,50 @@ class CsvReportProcessor(ResultProcessor): name = 'csv' + parameters = [ + Parameter('use_all_classifiers', kind=bool, default=False, + description=""" + If set to ``True``, this will add a column for every classifier + that features in at least one collected metric. + + .. note:: This cannot be ``True`` if ``extra_columns`` is set. + + """), + Parameter('extra_columns', kind=list_of_strings, + description=""" + List of classifiers to use as columns. + + .. note:: This cannot be set if ``use_all_classifiers`` is ``True``. + + """), + ] + + def validate(self): + if self.use_all_classifiers and self.extra_columns: + raise ConfigError('extra_columns cannot be specified when use_all_classifiers is True') + def process_run_result(self, result, context): - outfile = os.path.join(settings.output_directory, 'results.csv') + if self.use_all_classifiers: + classifiers = set([]) + for ir in result.iteration_results: + for metric in ir.metrics: + classifiers.update(metric.classifiers.keys()) + extra_columns = list(classifiers) + elif self.extra_columns: + extra_columns = self.extra_columns + else: + extra_columns = [] + + outfile = os.path.join(context.run_output_directory, 'results.csv') with open(outfile, 'wb') as wfh: writer = csv.writer(wfh) - writer.writerow(['id', 'workload', 'iteration', 'metric', 'value', 'units']) - for result in result.iteration_results: - for metric in result.metrics: - row = [result.id, result.spec.label, result.iteration, - metric.name, str(metric.value), metric.units or ''] + writer.writerow(['id', 'workload', 'iteration', 'metric', ] + + extra_columns + ['value', 'units']) + for ir in result.iteration_results: + for metric in ir.metrics: + row = ([ir.id, ir.spec.label, ir.iteration, metric.name] + + [str(metric.classifiers.get(c) or '') for c in extra_columns] + + [str(metric.value), metric.units or '']) writer.writerow(row) context.add_artifact('run_result_csv', 'results.csv', 'export') @@ -86,7 +123,7 @@ class JsonReportProcessor(ResultProcessor): name = 'json' def process_run_result(self, result, context): - outfile = os.path.join(settings.output_directory, 'results.json') + outfile = os.path.join(context.run_output_directory, 'results.json') with open(outfile, 'wb') as wfh: output = [] for result in result.iteration_results: @@ -111,7 +148,7 @@ class SummaryCsvProcessor(ResultProcessor): name = 'summary_csv' def process_run_result(self, result, context): - outfile = os.path.join(settings.output_directory, 'summary.csv') + outfile = os.path.join(context.run_output_directory, 'summary.csv') with open(outfile, 'wb') as wfh: writer = csv.writer(wfh) writer.writerow(['id', 'workload', 'iteration', 'metric', 'value', 'units']) diff --git a/wlauto/workloads/telemetry/__init__.py b/wlauto/workloads/telemetry/__init__.py index 1135de59..8d718c7d 100644 --- a/wlauto/workloads/telemetry/__init__.py +++ b/wlauto/workloads/telemetry/__init__.py @@ -125,7 +125,7 @@ class Telemetry(Workload): raise WorkloadError('Unexected error from run_benchmark: {}'.format(ret)) if self.extract_fps and 'trace' not in self.run_benchmark_params: raise ConfigError('"trace" profiler must be enabled in order to extract FPS for Telemetry') - self._resovlve_run_benchmark_path() + self._resolve_run_benchmark_path() def setup(self, context): self.raw_output = None @@ -133,7 +133,7 @@ class Telemetry(Workload): def run(self, context): self.logger.debug(self.command) - self.raw_output, _ = check_output(self.command, shell=True, timeout=self.run_timeout, ignore=1) + self.raw_output, _ = check_output(self.command, shell=True, timeout=self.run_timeout, ignore=range(256)) def update_result(self, context): # pylint: disable=too-many-locals if not self.raw_output: @@ -158,10 +158,12 @@ class Telemetry(Workload): context.result.add_metric(name_template.format('sd'), result.std, result.units, lower_is_better=True) writer.writerows(result.rows) - context.add_artifact('telemetry', csv_outfile, kind='data') - for kind, values in averages.iteritems(): - context.result.add_metric(kind, special_average(values), lower_is_better=True) + for i, value in enumerate(result.values, 1): + context.add_metric(result.kind, value, units=result.units, + classifiers={'url': result.url, 'time': i}) + + context.add_artifact('telemetry', csv_outfile, kind='data') for idx, artifact in enumerate(artifacts): if is_zipfile(artifact): @@ -199,10 +201,10 @@ class Telemetry(Workload): device_opts, self.run_benchmark_params) - def _resovlve_run_benchmark_path(self): + def _resolve_run_benchmark_path(self): # pylint: disable=access-member-before-definition if self.run_benchmark_path: - if not os.path.exists(self.run_bencmark_path): + if not os.path.exists(self.run_benchmark_path): raise ConfigError('run_benchmark path "{}" does not exist'.format(self.run_benchmark_path)) else: self.run_benchmark_path = os.path.join(self.dependencies_directory, 'telemetry', 'run_benchmark') @@ -291,21 +293,6 @@ def parse_telemetry_results(filepath): return results, artifacts -def special_average(values): - """Overall score calculation. Tries to accound for large differences - between different pages.""" - negs = [v < 0 for v in values] - abs_logs = [(av and math.log(av, 10) or av) - for av in map(abs, values)] - signed_logs = [] - for lv, n in zip(abs_logs, negs): - if n: - signed_logs.append(-lv) - else: - signed_logs.append(lv) - return get_meansd(signed_logs)[0] - - if __name__ == '__main__': import sys from pprint import pprint