Initial commit of open source Workload Automation.

2025-10-18 01:34:08 +01:00 · 2015-03-10 13:09:31 +00:00
commit a747ec7e4c
412 changed files with 41401 additions and 0 deletions
--- a/wlauto/result_processors/init.py
+++ b/wlauto/result_processors/init.py
@@ -0,0 +1,16 @@
+#    Copyright 2013-2015 ARM Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
--- a/wlauto/result_processors/dvfs.py
+++ b/wlauto/result_processors/dvfs.py
@@ -0,0 +1,375 @@
+#    Copyright 2013-2015 ARM Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import csv
+import re
+
+from wlauto import ResultProcessor, settings, instrumentation
+from wlauto.exceptions import ConfigError, ResultProcessorError
+
+
+class DVFS(ResultProcessor):
+    name = 'dvfs'
+    description = """
+    Reports DVFS state residency data form ftrace power events.
+
+    This generates a ``dvfs.csv`` in the top-level results directory that,
+    for each workload iteration, reports the percentage of time each CPU core
+    spent in each of the DVFS frequency states (P-states), as well as percentage
+    of the time spent in idle, during the execution of the workload.
+
+    .. note:: ``trace-cmd`` instrument *MUST* be enabled in the instrumentation,
+              and at least ``'power*'`` events must be enabled.
+
+
+    """
+
+    def __init__(self, **kwargs):
+        super(DVFS, self).__init__(**kwargs)
+        self.device = None
+        self.infile = None
+        self.outfile = None
+        self.current_cluster = None
+        self.currentstates_of_clusters = []
+        self.current_frequency_of_clusters = []
+        self.timestamp = []
+        self.state_time_map = {}  # hold state at timestamp
+        self.cpuid_time_map = {}  # hold cpuid at timestamp
+        self.cpu_freq_time_spent = {}
+        self.cpuids_of_clusters = []
+        self.power_state = [0, 1, 2, 3]
+        self.UNKNOWNSTATE = 4294967295
+        self.multiply_factor = None
+        self.corename_of_clusters = []
+        self.numberofcores_in_cluster = []
+        self.minimum_frequency_cluster = []
+        self.idlestate_description = {}
+
+    def validate(self):
+        if not instrumentation.instrument_is_installed('trace-cmd'):
+            raise ConfigError('"dvfs" works only if "trace_cmd" in enabled in instrumentation')
+
+    def initialize(self, context):  # pylint: disable=R0912
+        self.device = context.device
+        if not self.device.core_names:
+            message = 'Device does not specify its core types (core_names/core_clusters not set in device_config).'
+            raise ResultProcessorError(message)
+        number_of_clusters = max(self.device.core_clusters) + 1
+        # In IKS devices, actual number of cores is double
+        # from what we get from device.number_of_cores
+        if self.device.scheduler == 'iks':
+            self.multiply_factor = 2
+        elif self.device.scheduler == 'unknown':
+            # Device doesn't specify its scheduler type. It could be IKS, in
+            # which case reporeted values would be wrong, so error out.
+            message = ('The Device doesn not specify it\'s scheduler type. If you are '
+                       'using a generic device interface, please make sure to set the '
+                       '"scheduler" parameter in the device config.')
+            raise ResultProcessorError(message)
+        else:
+            self.multiply_factor = 1
+        # separate out the cores in each cluster
+        # It is list of list of cores in cluster
+        listof_cores_clusters = []
+        for cluster in range(number_of_clusters):
+            listof_cores_clusters.append([core for core in self.device.core_clusters if core == cluster])
+        # Extract minimum frequency of each cluster and
+        # the idle power state with its descriptive name
+        #
+        total_cores = 0
+        current_cores = 0
+        for cluster, cores_list in enumerate(listof_cores_clusters):
+            self.corename_of_clusters.append(self.device.core_names[total_cores])
+            if self.device.scheduler != 'iks':
+                self.idlestate_description.update(self.device.get_cpuidle_states(total_cores))
+            else:
+                self.idlestate_description.update(self.device.get_cpuidle_states())
+            total_cores += len(cores_list)
+            self.numberofcores_in_cluster.append(len(cores_list))
+            for i in range(current_cores, total_cores):
+                if i in self.device.active_cpus:
+                    self.minimum_frequency_cluster.append(int(self.device.get_cpu_min_frequency("cpu{}".format(i))))
+                    break
+            current_cores = total_cores
+        length_frequency_cluster = len(self.minimum_frequency_cluster)
+        if length_frequency_cluster != number_of_clusters:
+            diff = number_of_clusters - length_frequency_cluster
+            offline_value = -1
+            for i in range(diff):
+                if self.device.scheduler != 'iks':
+                    self.minimum_frequency_cluster.append(offline_value)
+                else:
+                    self.minimum_frequency_cluster.append(self.device.iks_switch_frequency)
+
+    def process_iteration_result(self, result, context):
+        """
+        Parse the trace.txt for each iteration,  calculate DVFS residency state/frequencies
+        and dump the result in csv and flush the data for next iteration.
+        """
+        self.infile = os.path.join(context.output_directory, 'trace.txt')
+        if os.path.isfile(self.infile):
+            self.logger.debug('Running result_processor "dvfs"')
+            self.outfile = os.path.join(settings.output_directory, 'dvfs.csv')
+            self.flush_parse_initialize()
+            self.calculate()
+            self.percentage()
+            self.generate_csv(context)
+            self.logger.debug('Completed result_processor "dvfs"')
+        else:
+            self.logger.debug('trace.txt not found.')
+
+    def flush_parse_initialize(self):
+        """
+        Store state, cpu_id for each timestamp from trace.txt and flush all the values for
+        next iterations.
+        """
+        self.current_cluster = 0
+        self.current_frequency_of_clusters = []
+        self.timestamp = []
+        self.currentstates_of_clusters = []
+        self.state_time_map = {}
+        self.cpuid_time_map = {}
+        self.cpu_freq_time_spent = {}
+        self.cpuids_of_clusters = []
+        self.parse()  # Parse trace.txt generated from trace-cmd instrumentation
+        # Initialize the states of each core of clusters and frequency of
+        # each clusters with its minimum freq
+        # cpu_id is assigned for each of clusters.
+        # For IKS devices cpuid remains same in other clusters
+        # and for other it will increment by 1
+        count = 0
+        for cluster, cores_number in enumerate(self.numberofcores_in_cluster):
+            self.currentstates_of_clusters.append([-1 for dummy in range(cores_number)])
+            self.current_frequency_of_clusters.append(self.minimum_frequency_cluster[cluster])
+            if self.device.scheduler == 'iks':
+                self.cpuids_of_clusters.append([j for j in range(cores_number)])
+            else:
+                self.cpuids_of_clusters.append(range(count, count + cores_number))
+                count += cores_number
+
+        # Initialize the time spent in each state/frequency for each core.
+        for i in range(self.device.number_of_cores * self.multiply_factor):
+            self.cpu_freq_time_spent["cpu{}".format(i)] = {}
+            for j in self.unique_freq():
+                self.cpu_freq_time_spent["cpu{}".format(i)][j] = 0
+            # To determine offline -1 state is added
+            offline_value = -1
+            self.cpu_freq_time_spent["cpu{}".format(i)][offline_value] = 0
+            if 0 not in self.unique_freq():
+                self.cpu_freq_time_spent["cpu{}".format(i)][0] = 0
+
+    def update_cluster_freq(self, state, cpu_id):
+        """ Update the cluster frequency and current cluster"""
+        # For IKS devices cluster changes only possible when
+        # freq changes, for other it is determine by cpu_id.
+        if self.device.scheduler != 'iks':
+            self.current_cluster = self.get_cluster(cpu_id, state)
+        if self.get_state_name(state) == "freqstate":
+            self.current_cluster = self.get_cluster(cpu_id, state)
+            self.current_frequency_of_clusters[self.current_cluster] = state
+
+    def get_cluster(self, cpu_id, state):
+        # For IKS if current state is greater than switch
+        # freq then it is in cluster2 else cluster1
+        # For other, Look the current cpu_id and check this id
+        # belong to which cluster.
+        if self.device.scheduler == 'iks':
+            return 1 if state >= self.device.iks_switch_frequency else 0
+        else:
+            for cluster, cpuids_list in enumerate(self.cpuids_of_clusters):
+                if cpu_id in cpuids_list:
+                    return cluster
+
+    def get_cluster_freq(self):
+        return self.current_frequency_of_clusters[self.current_cluster]
+
+    def update_state(self, state, cpu_id):  # pylint: disable=R0912
+        """
+        Update state of each cores in every cluster.
+        This is done for each timestamp.
+        """
+        POWERDOWN = 2
+        offline_value = -1
+        # if state is in unknowstate, then change state of current cpu_id
+        # with cluster freq of current cluster.
+        # if state is in powerstate then change state with that power state.
+        if self.get_state_name(state) in ["unknownstate", "powerstate"]:
+            for i in range(len(self.cpuids_of_clusters[self.current_cluster])):
+                if cpu_id == self.cpuids_of_clusters[self.current_cluster][i]:
+                    if self.get_state_name(state) == "unknownstate":
+                        self.currentstates_of_clusters[self.current_cluster][i] = self.current_frequency_of_clusters[self.current_cluster]
+                    elif self.get_state_name(state) == "powerstate":
+                        self.currentstates_of_clusters[self.current_cluster][i] = state
+        # If state is in freqstate then update the state with current state.
+        # For IKS, if all cores is in power down and current state is freqstate
+        # then update the all the cores in current cluster to current state
+        # and other state cluster changed to Power down.
+        if self.get_state_name(state) == "freqstate":
+            for i, j in enumerate(self.currentstates_of_clusters[self.current_cluster]):
+                if j != offline_value:
+                    self.currentstates_of_clusters[self.current_cluster][i] = state
+                if cpu_id == self.cpuids_of_clusters[self.current_cluster][i]:
+                    self.currentstates_of_clusters[self.current_cluster][i] = state
+            if self.device.scheduler == 'iks':
+                check = False  # All core in cluster is power down
+                for i in range(len(self.currentstates_of_clusters[self.current_cluster])):
+                    if self.currentstates_of_clusters[self.current_cluster][i] != POWERDOWN:
+                        check = True
+                        break
+                if not check:
+                    for i in range(len(self.currentstates_of_clusters[self.current_cluster])):
+                        self.currentstates_of_clusters[self.current_cluster][i] = self.current_frequency_of_clusters[self.current_cluster]
+                for cluster, state_list in enumerate(self.currentstates_of_clusters):
+                    if cluster != self.current_cluster:
+                        for j in range(len(state_list)):
+                            self.currentstates_of_clusters[i][j] = POWERDOWN
+
+    def unique_freq(self):
+        """ Determine the unique Frequency and state"""
+        unique_freq = []
+        for i in self.timestamp:
+            if self.state_time_map[i] not in unique_freq and self.state_time_map[i] != self.UNKNOWNSTATE:
+                unique_freq.append(self.state_time_map[i])
+        for i in self.minimum_frequency_cluster:
+            if i not in unique_freq:
+                unique_freq.append(i)
+        return unique_freq
+
+    def parse(self):
+        """
+        Parse the trace.txt ::
+
+            store timestamp, state, cpu_id
+            ---------------------------------------------------------------------------------
+                                |timestamp|                       |state|        |cpu_id|
+            <idle>-0     [001]   294.554380: cpu_idle:             state=4294967295 cpu_id=1
+            <idle>-0     [001]   294.554454: power_start:          type=1 state=0 cpu_id=1
+            <idle>-0     [001]   294.554458: cpu_idle:             state=0 cpu_id=1
+            <idle>-0     [001]   294.554464: power_end:            cpu_id=1
+            <idle>-0     [001]   294.554471: cpu_idle:             state=4294967295 cpu_id=1
+            <idle>-0     [001]   294.554590: power_start:          type=1 state=0 cpu_id=1
+            <idle>-0     [001]   294.554593: cpu_idle:             state=0 cpu_id=1
+            <idle>-0     [001]   294.554636: power_end:            cpu_id=1
+            <idle>-0     [001]   294.554639: cpu_idle:             state=4294967295 cpu_id=1
+            <idle>-0     [001]   294.554669: power_start:          type=1 state=0 cpu_id=1
+
+
+        """
+        pattern = re.compile(r'\s+(?P<time>\S+)\S+\s*(?P<desc>(cpu_idle:|cpu_frequency:))\s*state=(?P<state>\d+)\s*cpu_id=(?P<cpu_id>\d+)')
+        start_trace = False
+        stop_trace = False
+        with open(self.infile, 'r') as f:
+            for line in f:
+                #Start collecting data from label "TRACE_MARKER_START" and
+                #stop with label "TRACE_MARKER_STOP"
+                if line.find("TRACE_MARKER_START") != -1:
+                    start_trace = True
+                if line.find("TRACE_MARKER_STOP") != -1:
+                    stop_trace = True
+                if start_trace and not stop_trace:
+                    match = pattern.search(line)
+                    if match:
+                        self.timestamp.append(float(match.group('time')))
+                        self.state_time_map[float(match.group('time'))] = int(match.group('state'))
+                        self.cpuid_time_map[float(match.group('time'))] = int(match.group('cpu_id'))
+
+    def get_state_name(self, state):
+        if state in self.power_state:
+            return "powerstate"
+        elif state == self.UNKNOWNSTATE:
+            return "unknownstate"
+        else:
+            return "freqstate"
+
+    def populate(self, time1, time2):
+        diff = time2 - time1
+        for cluster, states_list in enumerate(self.currentstates_of_clusters):
+            for k, j in enumerate(states_list):
+                if self.device.scheduler == 'iks' and cluster == 1:
+                    self.cpu_freq_time_spent["cpu{}".format(self.cpuids_of_clusters[cluster][k] + len(self.currentstates_of_clusters[0]))][j] += diff
+                else:
+                    self.cpu_freq_time_spent["cpu{}".format(self.cpuids_of_clusters[cluster][k])][j] += diff
+
+    def calculate(self):
+        for i in range(len(self.timestamp) - 1):
+            self.update_cluster_freq(self.state_time_map[self.timestamp[i]], self.cpuid_time_map[self.timestamp[i]])
+            self.update_state(self.state_time_map[self.timestamp[i]], self.cpuid_time_map[self.timestamp[i]])
+            self.populate(self.timestamp[i], self.timestamp[i + 1])
+
+    def percentage(self):
+        """Normalize the result with total execution time."""
+        temp = self.cpu_freq_time_spent.copy()
+        for i in self.cpu_freq_time_spent:
+            total = 0
+            for j in self.cpu_freq_time_spent[i]:
+                total += self.cpu_freq_time_spent[i][j]
+            for j in self.cpu_freq_time_spent[i]:
+                if total != 0:
+                    temp[i][j] = self.cpu_freq_time_spent[i][j] * 100 / total
+                else:
+                    temp[i][j] = 0
+        return temp
+
+    def generate_csv(self, context):  # pylint: disable=R0912,R0914
+        """ generate the '''dvfs.csv''' with the state, frequency and cores """
+        temp = self.percentage()
+        total_state = self.unique_freq()
+        offline_value = -1
+        ghz_conversion = 1000000
+        mhz_conversion = 1000
+        with open(self.outfile, 'a+') as f:
+            writer = csv.writer(f, delimiter=',')
+            reader = csv.reader(f)
+            # Create the header in the format below
+            # workload name, iteration, state, A7 CPU0,A7 CPU1,A7 CPU2,A7 CPU3,A15 CPU4,A15 CPU5
+            if sum(1 for row in reader) == 0:
+                header_row = ['workload', 'iteration', 'state']
+                count = 0
+                for cluster, states_list in enumerate(self.currentstates_of_clusters):
+                    for dummy_index in range(len(states_list)):
+                        header_row.append("{} CPU{}".format(self.corename_of_clusters[cluster], count))
+                        count += 1
+                writer.writerow(header_row)
+            if offline_value in total_state:
+                total_state.remove(offline_value)  # remove the offline state
+            for i in sorted(total_state):
+                temprow = []
+                temprow.extend([context.result.spec.label, context.result.iteration])
+                if "state{}".format(i) in self.idlestate_description:
+                    temprow.append(self.idlestate_description["state{}".format(i)])
+                else:
+                    state_value = float(i)
+                    if state_value / ghz_conversion >= 1:
+                        temprow.append("{} Ghz".format(state_value / ghz_conversion))
+                    else:
+                        temprow.append("{} Mhz".format(state_value / mhz_conversion))
+                for j in range(self.device.number_of_cores * self.multiply_factor):
+                    temprow.append("{0:.3f}".format(temp["cpu{}".format(j)][i]))
+                writer.writerow(temprow)
+            check_off = True  # Checking whether core is OFFLINE
+            for i in range(self.device.number_of_cores * self.multiply_factor):
+                temp_val = "{0:.3f}".format(temp["cpu{}".format(i)][offline_value])
+                if float(temp_val) > 1:
+                    check_off = False
+                    break
+            if check_off is False:
+                temprow = []
+                temprow.extend([context.result.spec.label, context.result.iteration])
+                temprow.append("OFFLINE")
+                for i in range(self.device.number_of_cores * self.multiply_factor):
+                    temprow.append("{0:.3f}".format(temp["cpu{}".format(i)][offline_value]))
+                writer.writerow(temprow)
+
--- a/wlauto/result_processors/mongodb.py
+++ b/wlauto/result_processors/mongodb.py
@@ -0,0 +1,235 @@
+#    Copyright 2014-2015 ARM Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+#pylint: disable=E1101,W0201
+import os
+import re
+import string
+import tarfile
+
+try:
+    import pymongo
+    from bson.objectid import ObjectId
+    from gridfs import GridFS
+except ImportError:
+    pymongo = None
+
+from wlauto import ResultProcessor, Parameter, Artifact
+from wlauto.exceptions import ResultProcessorError
+from wlauto.utils.misc import as_relative
+
+
+__bad_chars = '$.'
+KEY_TRANS_TABLE = string.maketrans(__bad_chars, '_' * len(__bad_chars))
+BUNDLE_NAME = 'files.tar.gz'
+
+
+class MongodbUploader(ResultProcessor):
+
+    name = 'mongodb'
+    description = """
+    Uploads run results to a MongoDB instance.
+
+    MongoDB is a popular document-based data store (NoSQL database).
+
+    """
+
+    parameters = [
+        Parameter('uri', kind=str, default=None,
+                  description="""Connection URI. If specified, this will be used for connecting
+                                 to the backend, and host/port parameters will be ignored."""),
+        Parameter('host', kind=str, default='localhost', mandatory=True,
+                  description='IP address/name of the machinge hosting the MongoDB server.'),
+        Parameter('port', kind=int, default=27017, mandatory=True,
+                  description='Port on which the MongoDB server is listening.'),
+        Parameter('db', kind=str, default='wa', mandatory=True,
+                  description='Database on the server used to store WA results.'),
+        Parameter('extra_params', kind=dict, default={},
+                  description='''Additional connection parameters may be specfied using this (see
+                                 pymongo documentation.'''),
+        Parameter('authentication', kind=dict, default={},
+                  description='''If specified, this will be passed to db.authenticate() upon connection;
+                                 please pymongo documentaion authentication examples for detail.'''),
+    ]
+
+    def initialize(self, context):
+        if pymongo is None:
+            raise ResultProcessorError('mongodb result processor requres pymongo package to be installed.')
+        try:
+            self.client = pymongo.MongoClient(self.host, self.port, **self.extra_params)
+        except pymongo.errors.PyMongoError, e:
+            raise ResultProcessorError('Error connecting to mongod: {}'.fromat(e))
+        self.dbc = self.client[self.db]
+        self.fs = GridFS(self.dbc)
+        if self.authentication:
+            if not self.dbc.authenticate(**self.authentication):
+                raise ResultProcessorError('Authentication to database {} failed.'.format(self.db))
+
+        self.run_result_dbid = ObjectId()
+        run_doc = context.run_info.to_dict()
+
+        wa_adapter = run_doc['device']
+        devprops = dict((k.translate(KEY_TRANS_TABLE), v)
+                        for k, v in run_doc['device_properties'].iteritems())
+        run_doc['device'] = devprops
+        run_doc['device']['wa_adapter'] = wa_adapter
+        del run_doc['device_properties']
+
+        run_doc['output_directory'] = os.path.abspath(context.output_directory)
+        run_doc['artifacts'] = []
+        run_doc['workloads'] = context.config.to_dict()['workload_specs']
+        for workload in run_doc['workloads']:
+            workload['name'] = workload['workload_name']
+            del workload['workload_name']
+            workload['results'] = []
+        self.run_dbid = self.dbc.runs.insert(run_doc)
+
+        prefix = context.run_info.project if context.run_info.project else '[NOPROJECT]'
+        run_part = context.run_info.run_name or context.run_info.uuid.hex
+        self.gridfs_dir = os.path.join(prefix, run_part)
+        i = 0
+        while self.gridfs_directory_exists(self.gridfs_dir):
+            if self.gridfs_dir.endswith('-{}'.format(i)):
+                self.gridfs_dir = self.gridfs_dir[:-2]
+            i += 1
+            self.gridfs_dir += '-{}'.format(i)
+
+        # Keep track of all generated artefacts, so that we know what to
+        # include in the tarball. The tarball will contains raw artificats
+        # (other kinds would have been uploaded directly or do not contain
+        # new data) and all files in the results dir that have not been marked
+        # as artificats.
+        self.artifacts = []
+
+    def export_iteration_result(self, result, context):
+        r = {}
+        r['iteration'] = context.current_iteration
+        r['status'] = result.status
+        r['events'] = [e.to_dict() for e in result.events]
+        r['metrics'] = []
+        for m in result.metrics:
+            md = m.to_dict()
+            md['is_summary'] = m.name in context.workload.summary_metrics
+            r['metrics'].append(md)
+        iteration_artefacts = [self.upload_artifact(context, a) for a in context.iteration_artifacts]
+        r['artifacts'] = [e for e in iteration_artefacts if e is not None]
+        self.dbc.runs.update({'_id': self.run_dbid, 'workloads.id': context.spec.id},
+                             {'$push': {'workloads.$.results': r}})
+
+    def export_run_result(self, result, context):
+        run_artifacts = [self.upload_artifact(context, a) for a in context.run_artifacts]
+        self.logger.debug('Generating results bundle...')
+        bundle = self.generate_bundle(context)
+        if bundle:
+            run_artifacts.append(self.upload_artifact(context, bundle))
+        else:
+            self.logger.debug('No untracked files found.')
+        run_stats = {
+            'status': result.status,
+            'events': [e.to_dict() for e in result.events],
+            'end_time': context.run_info.end_time,
+            'duration': context.run_info.duration.total_seconds(),
+            'artifacts': [e for e in run_artifacts if e is not None],
+        }
+        self.dbc.runs.update({'_id': self.run_dbid}, {'$set': run_stats})
+
+    def finalize(self, context):
+        self.client.close()
+
+    def validate(self):
+        if self.uri:
+            has_warned = False
+            if self.host != self.parameters['host'].default:
+                self.logger.warning('both uri and host specified; host will be ignored')
+                has_warned = True
+            if self.port != self.parameters['port'].default:
+                self.logger.warning('both uri and port specified; port will be ignored')
+                has_warned = True
+            if has_warned:
+                self.logger.warning('To supress this warning, please remove either uri or '
+                                    'host/port from your config.')
+
+    def upload_artifact(self, context, artifact):
+        artifact_path = os.path.join(context.output_directory, artifact.path)
+        self.artifacts.append((artifact_path, artifact))
+        if not os.path.exists(artifact_path):
+            self.logger.debug('Artifact {} has not been generated'.format(artifact_path))
+            return
+        elif artifact.kind in ['raw', 'export']:
+            self.logger.debug('Ignoring {} artifact {}'.format(artifact.kind, artifact_path))
+            return
+        else:
+            self.logger.debug('Uploading artifact {}'.format(artifact_path))
+            entry = artifact.to_dict()
+            path = entry['path']
+            del entry['path']
+            del entry['name']
+            del entry['level']
+            del entry['mandatory']
+
+            if context.workload is None:
+                entry['filename'] = os.path.join(self.gridfs_dir, as_relative(path))
+            else:
+                entry['filename'] = os.path.join(self.gridfs_dir,
+                                                 '{}-{}-{}'.format(context.spec.id,
+                                                                   context.spec.label,
+                                                                   context.current_iteration),
+                                                 as_relative(path))
+            with open(artifact_path, 'rb') as fh:
+                fsid = self.fs.put(fh, **entry)
+                entry['gridfs_id'] = fsid
+
+            return entry
+
+    def gridfs_directory_exists(self, path):
+        regex = re.compile('^{}'.format(path))
+        return self.fs.exists({'filename': regex})
+
+    def generate_bundle(self, context):  # pylint: disable=R0914
+        """
+        The bundle will contain files generated during the run that have not
+        already been processed. This includes all files for which there isn't an
+        explicit artifact as well as "raw" artifacts that aren't uploaded individually.
+        Basically, this ensures that everything that is not explicilty marked as an
+        "export" (which means it's guarnteed not to contain information not accessible
+        from other artifacts/scores) is avialable in the DB. The bundle is compressed,
+        so it shouldn't take up too much space, however it also means that it's not
+        easy to query for or get individual file (a trade off between space and convinience).
+
+        """
+        to_upload = []
+        artpaths = []
+        outdir = context.output_directory
+        for artpath, artifact in self.artifacts:
+            artpaths.append(os.path.relpath(artpath, outdir))
+            if artifact.kind == 'raw':
+                to_upload.append((artpath, os.path.relpath(artpath, outdir)))
+        for root, _, files in os.walk(outdir):
+            for f in files:
+                path = os.path.relpath(os.path.join(root, f), outdir)
+                if path not in artpaths:
+                    to_upload.append((os.path.join(outdir, path), path))
+
+        if not to_upload:
+            # Nothing unexpected/unprocessed has been generated during the run.
+            return None
+        else:
+            archive_path = os.path.join(outdir, BUNDLE_NAME)
+            with tarfile.open(archive_path, 'w:gz') as tf:
+                for fpath, arcpath in to_upload:
+                    tf.add(fpath, arcpath)
+            return Artifact('mongo_bundle', BUNDLE_NAME, 'data',
+                            description='bundle to be uploaded to mongodb.')
--- a/wlauto/result_processors/sqlite.py
+++ b/wlauto/result_processors/sqlite.py
@@ -0,0 +1,183 @@
+#    Copyright 2013-2015 ARM Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# pylint: disable=attribute-defined-outside-init
+
+import os
+import sqlite3
+import json
+import uuid
+from datetime import datetime, timedelta
+from contextlib import contextmanager
+
+from wlauto import ResultProcessor, settings, Parameter
+from wlauto.exceptions import ResultProcessorError
+from wlauto.utils.types import boolean
+
+
+# IMPORTANT: when updating this schema, make sure to bump the version!
+SCHEMA_VERSION = '0.0.2'
+SCHEMA = [
+    '''CREATE TABLE  runs (
+        uuid text,
+        start_time datetime,
+        end_time datetime,
+        duration integer
+    )''',
+    '''CREATE TABLE  workload_specs (
+        id text,
+        run_oid text,
+        number_of_iterations integer,
+        label text,
+        workload_name text,
+        boot_parameters text,
+        runtime_parameters text,
+        workload_parameters text
+    )''',
+    '''CREATE TABLE  metrics (
+        spec_oid int,
+        iteration integer,
+        metric text,
+        value text,
+        units text,
+        lower_is_better integer
+    )''',
+    '''CREATE VIEW results AS
+       SELECT uuid as run_uuid, spec_id, label as workload, iteration, metric, value, units, lower_is_better
+       FROM metrics AS m INNER JOIN (
+            SELECT ws.OID as spec_oid, ws.id as spec_id, uuid, label
+            FROM workload_specs AS ws INNER JOIN runs AS r ON ws.run_oid = r.OID
+       ) AS wsr ON wsr.spec_oid = m.spec_oid
+    ''',
+    '''CREATE TABLE  __meta (
+        schema_version text
+    )''',
+    '''INSERT INTO __meta VALUES ("{}")'''.format(SCHEMA_VERSION),
+]
+
+
+sqlite3.register_adapter(datetime, lambda x: x.isoformat())
+sqlite3.register_adapter(timedelta, lambda x: x.total_seconds())
+sqlite3.register_adapter(uuid.UUID, str)
+
+
+class SqliteResultProcessor(ResultProcessor):
+
+    name = 'sqlite'
+    description = """
+    Stores results in an sqlite database. The following settings may be
+    specified in config.py:
+
+    This may be used accumulate results of multiple runs in a single file.
+
+    """
+
+    name = 'sqlite'
+    parameters = [
+        Parameter('database', default=None,
+                  description=""" Full path to the sqlite database to be used.  If this is not specified then
+                                a new database file will be created in the output directory. This setting can be
+                                used to accumulate results from multiple runs in a single database. If the
+                                specified file does not exist, it will be created, however the directory of the
+                                file must exist.
+
+                                .. note:: The value must resolve to an absolute path,
+                                            relative paths are not allowed; however the
+                                            value may contain environment variables and/or
+                                            the home reference ~.
+                                """),
+        Parameter('overwrite', kind=boolean, default=False,
+                  description="""If ``True``, this will overwrite the database file
+                                 if it already exists. If ``False`` (the default) data
+                                 will be added to the existing file (provided schema
+                                 versions match -- otherwise an error will be raised).
+                              """),
+
+    ]
+
+    def initialize(self, context):
+        self._last_spec = None
+        self._run_oid = None
+        self._spec_oid = None
+        if not os.path.exists(self.database):
+            self._initdb()
+        elif self.overwrite:  # pylint: disable=no-member
+            os.remove(self.database)
+            self._initdb()
+        else:
+            self._validate_schema_version()
+        self._update_run(context.run_info.uuid)
+
+    def process_iteration_result(self, result, context):
+        if self._last_spec != context.spec:
+            self._update_spec(context.spec)
+        metrics = [(self._spec_oid, context.current_iteration, m.name, str(m.value), m.units, int(m.lower_is_better))
+                   for m in result.metrics]
+        with self._open_connecton() as conn:
+            conn.executemany('INSERT INTO metrics VALUES (?,?,?,?,?,?)', metrics)
+
+    def process_run_result(self, result, context):
+        info = context.run_info
+        with self._open_connecton() as conn:
+            conn.execute('''UPDATE runs SET start_time=?, end_time=?, duration=?
+                            WHERE OID=?''', (info.start_time, info.end_time, info.duration, self._run_oid))
+
+    def validate(self):
+        if not self.database:  # pylint: disable=access-member-before-definition
+            self.database = os.path.join(settings.output_directory, 'results.sqlite')
+        self.database = os.path.expandvars(os.path.expanduser(self.database))
+
+    def _initdb(self):
+        with self._open_connecton() as conn:
+            for command in SCHEMA:
+                conn.execute(command)
+
+    def _validate_schema_version(self):
+        with self._open_connecton() as conn:
+            try:
+                c = conn.execute('SELECT schema_version FROM __meta')
+                found_version = c.fetchone()[0]
+            except sqlite3.OperationalError:
+                message = '{} does not appear to be a valid WA results database.'.format(self.database)
+                raise ResultProcessorError(message)
+            if found_version != SCHEMA_VERSION:
+                message = 'Schema version in {} ({}) does not match current version ({}).'
+                raise ResultProcessorError(message.format(self.database, found_version, SCHEMA_VERSION))
+
+    def _update_run(self, run_uuid):
+        with self._open_connecton() as conn:
+            conn.execute('INSERT INTO runs (uuid) VALUES (?)', (run_uuid,))
+            conn.commit()
+            c = conn.execute('SELECT OID FROM runs WHERE uuid=?', (run_uuid,))
+            self._run_oid = c.fetchone()[0]
+
+    def _update_spec(self, spec):
+        self._last_spec = spec
+        spec_tuple = (spec.id, self._run_oid, spec.number_of_iterations, spec.label, spec.workload_name,
+                      json.dumps(spec.boot_parameters), json.dumps(spec.runtime_parameters),
+                      json.dumps(spec.workload_parameters))
+        with self._open_connecton() as conn:
+            conn.execute('INSERT INTO workload_specs VALUES (?,?,?,?,?,?,?,?)', spec_tuple)
+            conn.commit()
+            c = conn.execute('SELECT OID FROM workload_specs WHERE run_oid=? AND id=?', (self._run_oid, spec.id))
+            self._spec_oid = c.fetchone()[0]
+
+    @contextmanager
+    def _open_connecton(self):
+        conn = sqlite3.connect(self.database)
+        try:
+            yield conn
+        finally:
+            conn.commit()
--- a/wlauto/result_processors/standard.py
+++ b/wlauto/result_processors/standard.py
@@ -0,0 +1,124 @@
+#    Copyright 2013-2015 ARM Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+# pylint: disable=R0201
+"""
+This module contains a few "standard" result processors that write results to
+text files in various formats.
+
+"""
+import os
+import csv
+import json
+
+from wlauto import ResultProcessor, settings
+
+
+class StandardProcessor(ResultProcessor):
+
+    name = 'standard'
+    description = """
+    Creates a ``result.txt`` file for every iteration that contains metrics
+    for that iteration.
+
+    The metrics are written in ::
+
+        metric = value [units]
+
+    format.
+
+    """
+
+    def process_iteration_result(self, result, context):
+        outfile = os.path.join(context.output_directory, 'result.txt')
+        with open(outfile, 'w') as wfh:
+            for metric in result.metrics:
+                line = '{} = {}'.format(metric.name, metric.value)
+                if metric.units:
+                    line = ' '.join([line, metric.units])
+                line += '\n'
+                wfh.write(line)
+        context.add_artifact('iteration_result', 'result.txt', 'export')
+
+
+class CsvReportProcessor(ResultProcessor):
+    """
+    Creates a ``results.csv`` in the output directory containing results for
+    all iterations in CSV format, each line containing a single metric.
+
+    """
+
+    name = 'csv'
+
+    def process_run_result(self, result, context):
+        outfile = os.path.join(settings.output_directory, 'results.csv')
+        with open(outfile, 'wb') as wfh:
+            writer = csv.writer(wfh)
+            writer.writerow(['id', 'workload', 'iteration', 'metric', 'value', 'units'])
+            for result in result.iteration_results:
+                for metric in result.metrics:
+                    row = [result.id, result.spec.label, result.iteration,
+                           metric.name, str(metric.value), metric.units or '']
+                    writer.writerow(row)
+        context.add_artifact('run_result_csv', 'results.csv', 'export')
+
+
+class JsonReportProcessor(ResultProcessor):
+    """
+    Creates a ``results.json`` in the output directory containing results for
+    all iterations in JSON format.
+
+    """
+
+    name = 'json'
+
+    def process_run_result(self, result, context):
+        outfile = os.path.join(settings.output_directory, 'results.json')
+        with open(outfile, 'wb') as wfh:
+            output = []
+            for result in result.iteration_results:
+                output.append({
+                    'id': result.id,
+                    'workload': result.workload.name,
+                    'iteration': result.iteration,
+                    'metrics': [dict([(k, v) for k, v in m.__dict__.iteritems()
+                                      if not k.startswith('_')])
+                                for m in result.metrics],
+                })
+            json.dump(output, wfh, indent=4)
+        context.add_artifact('run_result_json', 'results.json', 'export')
+
+
+class SummaryCsvProcessor(ResultProcessor):
+    """
+    Similar to csv result processor, but only contains workloads' summary metrics.
+
+    """
+
+    name = 'summary_csv'
+
+    def process_run_result(self, result, context):
+        outfile = os.path.join(settings.output_directory, 'summary.csv')
+        with open(outfile, 'wb') as wfh:
+            writer = csv.writer(wfh)
+            writer.writerow(['id', 'workload', 'iteration', 'metric', 'value', 'units'])
+            for result in result.iteration_results:
+                for metric in result.metrics:
+                    if metric.name in result.workload.summary_metrics:
+                        row = [result.id, result.workload.name, result.iteration,
+                               metric.name, str(metric.value), metric.units or '']
+                        writer.writerow(row)
+        context.add_artifact('run_result_summary', 'summary.csv', 'export')
--- a/wlauto/result_processors/status.py
+++ b/wlauto/result_processors/status.py
@@ -0,0 +1,51 @@
+#    Copyright 2013-2015 ARM Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+# pylint: disable=R0201
+import os
+import time
+from collections import Counter
+from wlauto import ResultProcessor
+from wlauto.utils.misc import write_table
+
+
+class StatusTxtReporter(ResultProcessor):
+    name = 'status'
+    description = """
+    Outputs a txt file containing general status information about which runs
+    failed and which were successful
+
+    """
+
+    def process_run_result(self, result, context):
+        counter = Counter()
+        for ir in result.iteration_results:
+            counter[ir.status] += 1
+
+        outfile = os.path.join(context.run_output_directory, 'status.txt')
+        self.logger.info('Status available in {}'.format(outfile))
+        with open(outfile, 'w') as wfh:
+            wfh.write('Run name: {}\n'.format(context.run_info.run_name))
+            wfh.write('Run status: {}\n'.format(context.run_result.status))
+            wfh.write('Date: {}\n'.format(time.strftime("%c")))
+            wfh.write('{}/{} iterations completed without error\n'.format(counter['OK'], len(result.iteration_results)))
+            wfh.write('\n')
+            status_lines = [map(str, [ir.id, ir.spec.label, ir.iteration, ir.status,
+                                      ir.events and ir.events[0].message.split('\n')[0] or ''])
+                            for ir in result.iteration_results]
+            write_table(status_lines, wfh, align='<<>><')
+        context.add_artifact('run_status_summary', 'status.txt', 'export')
+
--- a/wlauto/result_processors/syeg.py
+++ b/wlauto/result_processors/syeg.py
@@ -0,0 +1,150 @@
+#    Copyright 2014-2015 ARM Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+#pylint: disable=E1101,W0201
+import os
+import csv
+import math
+import re
+
+from wlauto import ResultProcessor, Parameter, File
+from wlauto.utils.misc import get_meansd
+
+
+class SyegResultProcessor(ResultProcessor):
+
+    name = 'syeg_csv'
+    description = """
+    Generates a CSV results file in the format expected by SYEG toolchain.
+
+    Multiple iterations get parsed into columns, adds additional columns for mean
+    and standard deviation, append number of threads to metric names (where
+    applicable) and add some metadata based on external mapping files.
+
+    """
+
+    parameters = [
+        Parameter('outfile', kind=str, default='syeg_out.csv',
+                  description='The name of the output CSV file.'),
+    ]
+
+    def initialize(self, context):
+        self.levelmap = self._read_map(context, 'final_sub.csv',
+                                       'Could not find metrics level mapping.')
+        self.typemap = self._read_map(context, 'types.csv',
+                                      'Could not find benchmark suite types mapping.')
+
+    def process_run_result(self, result, context):
+        syeg_results = {}
+        max_iterations = max(ir.iteration for ir in result.iteration_results)
+        for ir in result.iteration_results:
+            for metric in ir.metrics:
+                key = ir.spec.label + metric.name
+                if key not in syeg_results:
+                    syeg_result = SyegResult(max_iterations)
+                    syeg_result.suite = ir.spec.label
+                    syeg_result.version = getattr(ir.workload, 'apk_version', None)
+                    syeg_result.test = metric.name
+                    if hasattr(ir.workload, 'number_of_threads'):
+                        syeg_result.test += ' NT {} (Iterations/sec)'.format(ir.workload.number_of_threads)
+                    syeg_result.final_sub = self.levelmap.get(metric.name)
+                    syeg_result.lower_is_better = metric.lower_is_better
+                    syeg_result.device = context.device.name
+                    syeg_result.type = self._get_type(ir.workload.name, metric.name)
+                    syeg_results[key] = syeg_result
+                syeg_results[key].runs[ir.iteration - 1] = metric.value
+
+        columns = ['device', 'suite', 'test', 'version', 'final_sub', 'best', 'average', 'deviation']
+        columns += ['run{}'.format(i + 1) for i in xrange(max_iterations)]
+        columns += ['type', 'suite_version']
+
+        outfile = os.path.join(context.output_directory, self.outfile)
+        with open(outfile, 'wb') as wfh:
+            writer = csv.writer(wfh)
+            writer.writerow(columns)
+            for syeg_result in syeg_results.values():
+                writer.writerow([getattr(syeg_result, c) for c in columns])
+        context.add_artifact('syeg_csv', outfile, 'export')
+
+    def _get_type(self, workload, metric):
+        metric = metric.lower()
+        type_ = self.typemap.get(workload)
+        if type_ == 'mixed':
+            if 'native' in metric:
+                type_ = 'native'
+            if ('java' in metric) or ('dalvik' in metric):
+                type_ = 'dalvik'
+        return type_
+
+    def _read_map(self, context, filename, errormsg):
+        mapfile = context.resolver.get(File(self, filename))
+        if mapfile:
+            with open(mapfile) as fh:
+                reader = csv.reader(fh)
+                return dict([c.strip() for c in r] for r in reader)
+        else:
+            self.logger.warning(errormsg)
+            return {}
+
+
+class SyegResult(object):
+
+    @property
+    def average(self):
+        if not self._mean:
+            self._mean, self._sd = get_meansd(self.run_values)
+        return self._mean
+
+    @property
+    def deviation(self):
+        if not self._sd:
+            self._mean, self._sd = get_meansd(self.run_values)
+        return self._sd
+
+    @property
+    def run_values(self):
+        return [r for r in self.runs if not math.isnan(r)]
+
+    @property
+    def best(self):
+        if self.lower_is_better:
+            return min(self.run_values)
+        else:
+            return max(self.run_values)
+
+    @property
+    def suite_version(self):
+        return ' '.join(map(str, [self.suite, self.version]))
+
+    def __init__(self, max_iter):
+        self.runs = [float('nan') for _ in xrange(max_iter)]
+        self.device = None
+        self.suite = None
+        self.test = None
+        self.version = None
+        self.final_sub = None
+        self.lower_is_better = None
+        self.type = None
+        self._mean = None
+        self._sd = None
+
+    def __getattr__(self, name):
+        match = re.search(r'run(\d+)', name)
+        if not match:
+            raise AttributeError(name)
+        return self.runs[int(match.group(1)) - 1]
+
+