diff --git a/wlauto/core/configuration.py b/wlauto/core/configuration.py index 5adbea0a..3b3208b3 100644 --- a/wlauto/core/configuration.py +++ b/wlauto/core/configuration.py @@ -243,6 +243,13 @@ class RebootPolicy(object): else: return cmp(self.policy, other) + def to_pod(self): + return self.policy + + @staticmethod + def from_pod(pod): + return RebootPolicy(pod) + class RunConfigurationItem(object): """ diff --git a/wlauto/exceptions.py b/wlauto/exceptions.py index 36f3050a..0ca445d9 100644 --- a/wlauto/exceptions.py +++ b/wlauto/exceptions.py @@ -141,3 +141,20 @@ class WorkerThreadError(WAError): message = 'Exception of type {} occured on thread {}:\n'.format(orig_name, thread) message += '{}\n{}: {}'.format(get_traceback(self.exc_info), orig_name, orig) super(WorkerThreadError, self).__init__(message) + + +class SerializerSyntaxError(Exception): + """ + Error loading a serialized structure from/to a file handle. + """ + + def __init__(self, message, line=None, column=None): + super(SerializerSyntaxError, self).__init__(message) + self.line = line + self.column = column + + def __str__(self): + linestring = ' on line {}'.format(self.line) if self.line else '' + colstring = ' in column {}'.format(self.column) if self.column else '' + message = 'Syntax Error{}: {}' + return message.format(''.join([linestring, colstring]), self.message) diff --git a/wlauto/result_processors/json_rp.py b/wlauto/result_processors/json_rp.py new file mode 100644 index 00000000..22de698a --- /dev/null +++ b/wlauto/result_processors/json_rp.py @@ -0,0 +1,122 @@ +# Copyright 2014-2015 ARM Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +#pylint: disable=E1101,W0201 +import os +from base64 import b64encode + +from wlauto import ResultProcessor, Parameter +from wlauto.utils.serializer import json +from wlauto.utils.misc import istextfile +from wlauto.utils.types import list_of_strings +from wlauto.exceptions import ResultProcessorError + + +class JsonReportProcessor(ResultProcessor): + + name = 'json' + description = """ + Produces a JSON file with WA config, results ect. + + + This includes embedded artifacts either as text or base64 + + """ + + parameters = [ + Parameter("ignore_artifact_types", kind=list_of_strings, + default=['export', 'raw'], + description="""A list of which artifact types to be ignored, + and thus not embedded in the JSON""") + ] + final = {} + + def initialize(self, context): + self.final = context.run_info.to_dict() + del self.final['workload_specs'] + + wa_adapter = self.final['device'] + self.final['device'] = {} + self.final['device']['props'] = self.final['device_properties'] + self.final['device']['wa_adapter'] = wa_adapter + del self.final['device_properties'] + + self.final['output_directory'] = os.path.abspath(context.output_directory) + self.final['artifacts'] = [] + self.final['workloads'] = context.config.to_dict()['workload_specs'] + for workload in self.final['workloads']: + workload['name'] = workload['workload_name'] + del workload['workload_name'] + workload['results'] = [] + + def export_iteration_result(self, result, context): + r = {} + r['iteration'] = context.current_iteration + r['status'] = result.status + r['events'] = [e.to_dict() for e in result.events] + r['metrics'] = [] + for m in result.metrics: + md = m.to_dict() + md['is_summary'] = m.name in context.workload.summary_metrics + r['metrics'].append(md) + iteration_artefacts = [self.embed_artifact(context, a) for a in context.iteration_artifacts] + r['artifacts'] = [e for e in iteration_artefacts if e is not None] + for workload in self.final['workloads']: + if workload['id'] == context.spec.id: + workload.update(r) + break + else: + raise ResultProcessorError("No workload spec with matching id found") + + def export_run_result(self, result, context): + run_artifacts = [self.embed_artifact(context, a) for a in context.run_artifacts] + self.logger.debug('Generating results bundle...') + run_stats = { + 'status': result.status, + 'events': [e.to_dict() for e in result.events], + 'end_time': context.run_info.end_time, + 'duration': context.run_info.duration.total_seconds(), + 'artifacts': [e for e in run_artifacts if e is not None], + } + self.final.update(run_stats) + json_path = os.path.join(os.path.abspath(context.output_directory), "run.json") + with open(json_path, 'w') as json_file: + json.dump(self.final, json_file) + + def embed_artifact(self, context, artifact): + artifact_path = os.path.join(context.output_directory, artifact.path) + + if not os.path.exists(artifact_path): + self.logger.debug('Artifact {} has not been generated'.format(artifact_path)) + return + elif artifact.kind in self.ignore_artifact_types: + self.logger.debug('Ignoring {} artifact {}'.format(artifact.kind, artifact_path)) + return + else: + self.logger.debug('Uploading artifact {}'.format(artifact_path)) + entry = artifact.to_dict() + path = os.path.join(os.path.abspath(context.output_directory), entry['path']) + if istextfile(open(path)): + entry['encoding'] = "text" + entry['content'] = open(path).read() + else: + entry['encoding'] = "base64" + entry['content'] = b64encode(open(path).read()) + + del entry['path'] + del entry['level'] + del entry['mandatory'] + return entry diff --git a/wlauto/result_processors/standard.py b/wlauto/result_processors/standard.py index 08b8785e..254fb21b 100644 --- a/wlauto/result_processors/standard.py +++ b/wlauto/result_processors/standard.py @@ -22,7 +22,6 @@ text files in various formats. """ import os import csv -import json from wlauto import ResultProcessor, Parameter from wlauto.exceptions import ConfigError @@ -124,32 +123,6 @@ class CsvReportProcessor(ResultProcessor): writer.writerow(row) -class JsonReportProcessor(ResultProcessor): - """ - Creates a ``results.json`` in the output directory containing results for - all iterations in JSON format. - - """ - - name = 'json' - - def process_run_result(self, result, context): - outfile = os.path.join(context.run_output_directory, 'results.json') - with open(outfile, 'wb') as wfh: - output = [] - for result in result.iteration_results: - output.append({ - 'id': result.id, - 'workload': result.workload.name, - 'iteration': result.iteration, - 'metrics': [dict([(k, v) for k, v in m.__dict__.iteritems() - if not k.startswith('_')]) - for m in result.metrics], - }) - json.dump(output, wfh, indent=4) - context.add_artifact('run_result_json', 'results.json', 'export') - - class SummaryCsvProcessor(ResultProcessor): """ Similar to csv result processor, but only contains workloads' summary metrics. diff --git a/wlauto/utils/misc.py b/wlauto/utils/misc.py index 48efc96c..76a54443 100644 --- a/wlauto/utils/misc.py +++ b/wlauto/utils/misc.py @@ -815,3 +815,27 @@ def sha256(path, chunk=2048): def urljoin(*parts): return '/'.join(p.rstrip('/') for p in parts) + + +# From: http://eli.thegreenplace.net/2011/10/19/perls-guess-if-file-is-text-or-binary-implemented-in-python/ +def istextfile(fileobj, blocksize=512): + """ Uses heuristics to guess whether the given file is text or binary, + by reading a single block of bytes from the file. + If more than 30% of the chars in the block are non-text, or there + are NUL ('\x00') bytes in the block, assume this is a binary file. + """ + _text_characters = (b''.join(chr(i) for i in range(32, 127)) + + b'\n\r\t\f\b') + + block = fileobj.read(blocksize) + if b'\x00' in block: + # Files with null bytes are binary + return False + elif not block: + # An empty file is considered a valid text file + return True + + # Use translate's 'deletechars' argument to efficiently remove all + # occurrences of _text_characters from the block + nontext = block.translate(None, _text_characters) + return float(len(nontext)) / len(block) <= 0.30 diff --git a/wlauto/utils/serializer.py b/wlauto/utils/serializer.py new file mode 100644 index 00000000..d267cd19 --- /dev/null +++ b/wlauto/utils/serializer.py @@ -0,0 +1,243 @@ +""" +This module contains wrappers for Python serialization modules for +common formats that make it easier to serialize/deserialize WA +Plain Old Data structures (serilizable WA classes implement +``to_pod()``/``from_pod()`` methods for converting between POD +structures and Python class instances). + +The modifications to standard serilization procedures are: + + - mappings are deserialized as ``OrderedDict``\ 's are than standard + Python ``dict``\ 's. This allows for cleaner syntax in certain parts + of WA configuration (e.g. values to be written to files can be specified + as a dict, and they will be written in the order specified in the config). + - regular expressions are automatically encoded/decoded. This allows for + configuration values to be transparently specified as strings or regexes + in the POD config. + +This module exports the "wrapped" versions of serialization libraries, +and this should be imported and used instead of importing the libraries +directly. i.e. :: + + from wa.utils.serializer import yaml + pod = yaml.load(fh) + +instead of :: + + import yaml + pod = yaml.load(fh) + +It's also possible to use the serializer directly:: + + from wa.utils import serializer + pod = serializer.load(fh) + +This can also be used to ``dump()`` POD structures. By default, +``dump()`` will produce JSON, but ``fmt`` parameter may be used to +specify an alternative format (``yaml`` or ``python``). ``load()`` will +use the file extension to guess the format, but ``fmt`` may also be used +to specify it explicitly. + +""" +# pylint: disable=unused-argument + +import os +import re +import json as _json +from collections import OrderedDict +from datetime import datetime + +import yaml as _yaml +import dateutil.parser + +from wlauto.exceptions import SerializerSyntaxError +from wlauto.utils.types import regex_type +from wlauto.utils.misc import isiterable + + +__all__ = [ + 'json', + 'yaml', + 'read_pod', + 'dump', + 'load', +] + + +class WAJSONEncoder(_json.JSONEncoder): + + def default(self, obj): # pylint: disable=method-hidden + if hasattr(obj, 'to_pod'): + return obj.to_pod() + elif isinstance(obj, regex_type): + return 'REGEX:{}:{}'.format(obj.flags, obj.pattern) + elif isinstance(obj, datetime): + return 'DATET:{}'.format(obj.isoformat()) + else: + return _json.JSONEncoder.default(self, obj) + + +class WAJSONDecoder(_json.JSONDecoder): + + def decode(self, s, **kwargs): + d = _json.JSONDecoder.decode(self, s, **kwargs) + + def try_parse_object(v): + if isinstance(v, basestring) and v.startswith('REGEX:'): + _, flags, pattern = v.split(':', 2) + return re.compile(pattern, int(flags or 0)) + elif isinstance(v, basestring) and v.startswith('DATET:'): + _, pattern = v.split(':', 1) + return dateutil.parser.parse(pattern) + else: + return v + + def load_objects(d): + pairs = [] + for k, v in d.iteritems(): + if hasattr(v, 'iteritems'): + pairs.append((k, load_objects(v))) + elif isiterable(v): + pairs.append((k, [try_parse_object(i) for i in v])) + else: + pairs.append((k, try_parse_object(v))) + return OrderedDict(pairs) + + return load_objects(d) + + +class json(object): + + @staticmethod + def dump(o, wfh, indent=4, *args, **kwargs): + return _json.dump(o, wfh, cls=WAJSONEncoder, indent=indent, *args, **kwargs) + + @staticmethod + def load(fh, *args, **kwargs): + try: + return _json.load(fh, cls=WAJSONDecoder, object_pairs_hook=OrderedDict, *args, **kwargs) + except ValueError as e: + raise SerializerSyntaxError(e.message) + + @staticmethod + def loads(s, *args, **kwargs): + try: + return _json.loads(s, cls=WAJSONDecoder, object_pairs_hook=OrderedDict, *args, **kwargs) + except ValueError as e: + raise SerializerSyntaxError(e.message) + + +_mapping_tag = _yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG +_regex_tag = u'tag:wa:regex' + + +def _wa_dict_representer(dumper, data): + return dumper.represent_mapping(_mapping_tag, data.iteritems()) + + +def _wa_regex_representer(dumper, data): + text = '{}:{}'.format(data.flags, data.pattern) + return dumper.represent_scalar(_regex_tag, text) + + +def _wa_dict_constructor(loader, node): + pairs = loader.construct_pairs(node) + seen_keys = set() + for k, _ in pairs: + if k in seen_keys: + raise ValueError('Duplicate entry: {}'.format(k)) + seen_keys.add(k) + return OrderedDict(pairs) + + +def _wa_regex_constructor(loader, node): + value = loader.construct_scalar(node) + flags, pattern = value.split(':', 1) + return re.compile(pattern, int(flags or 0)) + + +_yaml.add_representer(OrderedDict, _wa_dict_representer) +_yaml.add_representer(regex_type, _wa_regex_representer) +_yaml.add_constructor(_mapping_tag, _wa_dict_constructor) +_yaml.add_constructor(_regex_tag, _wa_regex_constructor) + + +class yaml(object): + + @staticmethod + def dump(o, wfh, *args, **kwargs): + return _yaml.dump(o, wfh, *args, **kwargs) + + @staticmethod + def load(fh, *args, **kwargs): + try: + return _yaml.load(fh, *args, **kwargs) + except _yaml.YAMLError as e: + lineno = None + if hasattr(e, 'problem_mark'): + lineno = e.problem_mark.line # pylint: disable=no-member + raise SerializerSyntaxError(e.message, lineno) + + loads = load + + +class python(object): + + @staticmethod + def dump(o, wfh, *args, **kwargs): + raise NotImplementedError() + + @classmethod + def load(cls, fh, *args, **kwargs): + return cls.loads(fh.read()) + + @staticmethod + def loads(s, *args, **kwargs): + pod = {} + try: + exec s in pod # pylint: disable=exec-used + except SyntaxError as e: + raise SerializerSyntaxError(e.message, e.lineno) + for k in pod.keys(): + if k.startswith('__'): + del pod[k] + return pod + + +def read_pod(source, fmt=None): + if isinstance(source, basestring): + with open(source) as fh: + return _read_pod(fh, fmt) + elif hasattr(source, 'read') and (hasattr(source, 'name') or fmt): + return _read_pod(source, fmt) + else: + message = 'source must be a path or an open file handle; got {}' + raise ValueError(message.format(type(source))) + + +def dump(o, wfh, fmt='json', *args, **kwargs): + serializer = {'yaml': yaml, + 'json': json, + 'python': python, + 'py': python, + }.get(fmt) + if serializer is None: + raise ValueError('Unknown serialization format: "{}"'.format(fmt)) + serializer.dump(o, wfh, *args, **kwargs) + + +def load(s, fmt='json', *args, **kwargs): + return read_pod(s, fmt=fmt) + + +def _read_pod(fh, fmt=None): + if fmt is None: + fmt = os.path.splitext(fh.name)[1].lower().strip('.') + if fmt == 'yaml': + return yaml.load(fh) + elif fmt == 'json': + return json.load(fh) + elif fmt == 'py': + return python.load(fh) + else: + raise ValueError('Unknown format "{}": {}'.format(fmt, getattr(fh, 'name', '')))