1
0
mirror of https://github.com/ARM-software/workload-automation.git synced 2025-09-01 19:02:31 +01:00

Initial commit of open source Workload Automation.

This commit is contained in:
Sergei Trofimov
2015-03-10 13:09:31 +00:00
commit a747ec7e4c
412 changed files with 41401 additions and 0 deletions

View File

@@ -0,0 +1,16 @@
# Copyright 2013-2015 ARM Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

View File

@@ -0,0 +1,375 @@
# Copyright 2013-2015 ARM Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import os
import csv
import re
from wlauto import ResultProcessor, settings, instrumentation
from wlauto.exceptions import ConfigError, ResultProcessorError
class DVFS(ResultProcessor):
name = 'dvfs'
description = """
Reports DVFS state residency data form ftrace power events.
This generates a ``dvfs.csv`` in the top-level results directory that,
for each workload iteration, reports the percentage of time each CPU core
spent in each of the DVFS frequency states (P-states), as well as percentage
of the time spent in idle, during the execution of the workload.
.. note:: ``trace-cmd`` instrument *MUST* be enabled in the instrumentation,
and at least ``'power*'`` events must be enabled.
"""
def __init__(self, **kwargs):
super(DVFS, self).__init__(**kwargs)
self.device = None
self.infile = None
self.outfile = None
self.current_cluster = None
self.currentstates_of_clusters = []
self.current_frequency_of_clusters = []
self.timestamp = []
self.state_time_map = {} # hold state at timestamp
self.cpuid_time_map = {} # hold cpuid at timestamp
self.cpu_freq_time_spent = {}
self.cpuids_of_clusters = []
self.power_state = [0, 1, 2, 3]
self.UNKNOWNSTATE = 4294967295
self.multiply_factor = None
self.corename_of_clusters = []
self.numberofcores_in_cluster = []
self.minimum_frequency_cluster = []
self.idlestate_description = {}
def validate(self):
if not instrumentation.instrument_is_installed('trace-cmd'):
raise ConfigError('"dvfs" works only if "trace_cmd" in enabled in instrumentation')
def initialize(self, context): # pylint: disable=R0912
self.device = context.device
if not self.device.core_names:
message = 'Device does not specify its core types (core_names/core_clusters not set in device_config).'
raise ResultProcessorError(message)
number_of_clusters = max(self.device.core_clusters) + 1
# In IKS devices, actual number of cores is double
# from what we get from device.number_of_cores
if self.device.scheduler == 'iks':
self.multiply_factor = 2
elif self.device.scheduler == 'unknown':
# Device doesn't specify its scheduler type. It could be IKS, in
# which case reporeted values would be wrong, so error out.
message = ('The Device doesn not specify it\'s scheduler type. If you are '
'using a generic device interface, please make sure to set the '
'"scheduler" parameter in the device config.')
raise ResultProcessorError(message)
else:
self.multiply_factor = 1
# separate out the cores in each cluster
# It is list of list of cores in cluster
listof_cores_clusters = []
for cluster in range(number_of_clusters):
listof_cores_clusters.append([core for core in self.device.core_clusters if core == cluster])
# Extract minimum frequency of each cluster and
# the idle power state with its descriptive name
#
total_cores = 0
current_cores = 0
for cluster, cores_list in enumerate(listof_cores_clusters):
self.corename_of_clusters.append(self.device.core_names[total_cores])
if self.device.scheduler != 'iks':
self.idlestate_description.update(self.device.get_cpuidle_states(total_cores))
else:
self.idlestate_description.update(self.device.get_cpuidle_states())
total_cores += len(cores_list)
self.numberofcores_in_cluster.append(len(cores_list))
for i in range(current_cores, total_cores):
if i in self.device.active_cpus:
self.minimum_frequency_cluster.append(int(self.device.get_cpu_min_frequency("cpu{}".format(i))))
break
current_cores = total_cores
length_frequency_cluster = len(self.minimum_frequency_cluster)
if length_frequency_cluster != number_of_clusters:
diff = number_of_clusters - length_frequency_cluster
offline_value = -1
for i in range(diff):
if self.device.scheduler != 'iks':
self.minimum_frequency_cluster.append(offline_value)
else:
self.minimum_frequency_cluster.append(self.device.iks_switch_frequency)
def process_iteration_result(self, result, context):
"""
Parse the trace.txt for each iteration, calculate DVFS residency state/frequencies
and dump the result in csv and flush the data for next iteration.
"""
self.infile = os.path.join(context.output_directory, 'trace.txt')
if os.path.isfile(self.infile):
self.logger.debug('Running result_processor "dvfs"')
self.outfile = os.path.join(settings.output_directory, 'dvfs.csv')
self.flush_parse_initialize()
self.calculate()
self.percentage()
self.generate_csv(context)
self.logger.debug('Completed result_processor "dvfs"')
else:
self.logger.debug('trace.txt not found.')
def flush_parse_initialize(self):
"""
Store state, cpu_id for each timestamp from trace.txt and flush all the values for
next iterations.
"""
self.current_cluster = 0
self.current_frequency_of_clusters = []
self.timestamp = []
self.currentstates_of_clusters = []
self.state_time_map = {}
self.cpuid_time_map = {}
self.cpu_freq_time_spent = {}
self.cpuids_of_clusters = []
self.parse() # Parse trace.txt generated from trace-cmd instrumentation
# Initialize the states of each core of clusters and frequency of
# each clusters with its minimum freq
# cpu_id is assigned for each of clusters.
# For IKS devices cpuid remains same in other clusters
# and for other it will increment by 1
count = 0
for cluster, cores_number in enumerate(self.numberofcores_in_cluster):
self.currentstates_of_clusters.append([-1 for dummy in range(cores_number)])
self.current_frequency_of_clusters.append(self.minimum_frequency_cluster[cluster])
if self.device.scheduler == 'iks':
self.cpuids_of_clusters.append([j for j in range(cores_number)])
else:
self.cpuids_of_clusters.append(range(count, count + cores_number))
count += cores_number
# Initialize the time spent in each state/frequency for each core.
for i in range(self.device.number_of_cores * self.multiply_factor):
self.cpu_freq_time_spent["cpu{}".format(i)] = {}
for j in self.unique_freq():
self.cpu_freq_time_spent["cpu{}".format(i)][j] = 0
# To determine offline -1 state is added
offline_value = -1
self.cpu_freq_time_spent["cpu{}".format(i)][offline_value] = 0
if 0 not in self.unique_freq():
self.cpu_freq_time_spent["cpu{}".format(i)][0] = 0
def update_cluster_freq(self, state, cpu_id):
""" Update the cluster frequency and current cluster"""
# For IKS devices cluster changes only possible when
# freq changes, for other it is determine by cpu_id.
if self.device.scheduler != 'iks':
self.current_cluster = self.get_cluster(cpu_id, state)
if self.get_state_name(state) == "freqstate":
self.current_cluster = self.get_cluster(cpu_id, state)
self.current_frequency_of_clusters[self.current_cluster] = state
def get_cluster(self, cpu_id, state):
# For IKS if current state is greater than switch
# freq then it is in cluster2 else cluster1
# For other, Look the current cpu_id and check this id
# belong to which cluster.
if self.device.scheduler == 'iks':
return 1 if state >= self.device.iks_switch_frequency else 0
else:
for cluster, cpuids_list in enumerate(self.cpuids_of_clusters):
if cpu_id in cpuids_list:
return cluster
def get_cluster_freq(self):
return self.current_frequency_of_clusters[self.current_cluster]
def update_state(self, state, cpu_id): # pylint: disable=R0912
"""
Update state of each cores in every cluster.
This is done for each timestamp.
"""
POWERDOWN = 2
offline_value = -1
# if state is in unknowstate, then change state of current cpu_id
# with cluster freq of current cluster.
# if state is in powerstate then change state with that power state.
if self.get_state_name(state) in ["unknownstate", "powerstate"]:
for i in range(len(self.cpuids_of_clusters[self.current_cluster])):
if cpu_id == self.cpuids_of_clusters[self.current_cluster][i]:
if self.get_state_name(state) == "unknownstate":
self.currentstates_of_clusters[self.current_cluster][i] = self.current_frequency_of_clusters[self.current_cluster]
elif self.get_state_name(state) == "powerstate":
self.currentstates_of_clusters[self.current_cluster][i] = state
# If state is in freqstate then update the state with current state.
# For IKS, if all cores is in power down and current state is freqstate
# then update the all the cores in current cluster to current state
# and other state cluster changed to Power down.
if self.get_state_name(state) == "freqstate":
for i, j in enumerate(self.currentstates_of_clusters[self.current_cluster]):
if j != offline_value:
self.currentstates_of_clusters[self.current_cluster][i] = state
if cpu_id == self.cpuids_of_clusters[self.current_cluster][i]:
self.currentstates_of_clusters[self.current_cluster][i] = state
if self.device.scheduler == 'iks':
check = False # All core in cluster is power down
for i in range(len(self.currentstates_of_clusters[self.current_cluster])):
if self.currentstates_of_clusters[self.current_cluster][i] != POWERDOWN:
check = True
break
if not check:
for i in range(len(self.currentstates_of_clusters[self.current_cluster])):
self.currentstates_of_clusters[self.current_cluster][i] = self.current_frequency_of_clusters[self.current_cluster]
for cluster, state_list in enumerate(self.currentstates_of_clusters):
if cluster != self.current_cluster:
for j in range(len(state_list)):
self.currentstates_of_clusters[i][j] = POWERDOWN
def unique_freq(self):
""" Determine the unique Frequency and state"""
unique_freq = []
for i in self.timestamp:
if self.state_time_map[i] not in unique_freq and self.state_time_map[i] != self.UNKNOWNSTATE:
unique_freq.append(self.state_time_map[i])
for i in self.minimum_frequency_cluster:
if i not in unique_freq:
unique_freq.append(i)
return unique_freq
def parse(self):
"""
Parse the trace.txt ::
store timestamp, state, cpu_id
---------------------------------------------------------------------------------
|timestamp| |state| |cpu_id|
<idle>-0 [001] 294.554380: cpu_idle: state=4294967295 cpu_id=1
<idle>-0 [001] 294.554454: power_start: type=1 state=0 cpu_id=1
<idle>-0 [001] 294.554458: cpu_idle: state=0 cpu_id=1
<idle>-0 [001] 294.554464: power_end: cpu_id=1
<idle>-0 [001] 294.554471: cpu_idle: state=4294967295 cpu_id=1
<idle>-0 [001] 294.554590: power_start: type=1 state=0 cpu_id=1
<idle>-0 [001] 294.554593: cpu_idle: state=0 cpu_id=1
<idle>-0 [001] 294.554636: power_end: cpu_id=1
<idle>-0 [001] 294.554639: cpu_idle: state=4294967295 cpu_id=1
<idle>-0 [001] 294.554669: power_start: type=1 state=0 cpu_id=1
"""
pattern = re.compile(r'\s+(?P<time>\S+)\S+\s*(?P<desc>(cpu_idle:|cpu_frequency:))\s*state=(?P<state>\d+)\s*cpu_id=(?P<cpu_id>\d+)')
start_trace = False
stop_trace = False
with open(self.infile, 'r') as f:
for line in f:
#Start collecting data from label "TRACE_MARKER_START" and
#stop with label "TRACE_MARKER_STOP"
if line.find("TRACE_MARKER_START") != -1:
start_trace = True
if line.find("TRACE_MARKER_STOP") != -1:
stop_trace = True
if start_trace and not stop_trace:
match = pattern.search(line)
if match:
self.timestamp.append(float(match.group('time')))
self.state_time_map[float(match.group('time'))] = int(match.group('state'))
self.cpuid_time_map[float(match.group('time'))] = int(match.group('cpu_id'))
def get_state_name(self, state):
if state in self.power_state:
return "powerstate"
elif state == self.UNKNOWNSTATE:
return "unknownstate"
else:
return "freqstate"
def populate(self, time1, time2):
diff = time2 - time1
for cluster, states_list in enumerate(self.currentstates_of_clusters):
for k, j in enumerate(states_list):
if self.device.scheduler == 'iks' and cluster == 1:
self.cpu_freq_time_spent["cpu{}".format(self.cpuids_of_clusters[cluster][k] + len(self.currentstates_of_clusters[0]))][j] += diff
else:
self.cpu_freq_time_spent["cpu{}".format(self.cpuids_of_clusters[cluster][k])][j] += diff
def calculate(self):
for i in range(len(self.timestamp) - 1):
self.update_cluster_freq(self.state_time_map[self.timestamp[i]], self.cpuid_time_map[self.timestamp[i]])
self.update_state(self.state_time_map[self.timestamp[i]], self.cpuid_time_map[self.timestamp[i]])
self.populate(self.timestamp[i], self.timestamp[i + 1])
def percentage(self):
"""Normalize the result with total execution time."""
temp = self.cpu_freq_time_spent.copy()
for i in self.cpu_freq_time_spent:
total = 0
for j in self.cpu_freq_time_spent[i]:
total += self.cpu_freq_time_spent[i][j]
for j in self.cpu_freq_time_spent[i]:
if total != 0:
temp[i][j] = self.cpu_freq_time_spent[i][j] * 100 / total
else:
temp[i][j] = 0
return temp
def generate_csv(self, context): # pylint: disable=R0912,R0914
""" generate the '''dvfs.csv''' with the state, frequency and cores """
temp = self.percentage()
total_state = self.unique_freq()
offline_value = -1
ghz_conversion = 1000000
mhz_conversion = 1000
with open(self.outfile, 'a+') as f:
writer = csv.writer(f, delimiter=',')
reader = csv.reader(f)
# Create the header in the format below
# workload name, iteration, state, A7 CPU0,A7 CPU1,A7 CPU2,A7 CPU3,A15 CPU4,A15 CPU5
if sum(1 for row in reader) == 0:
header_row = ['workload', 'iteration', 'state']
count = 0
for cluster, states_list in enumerate(self.currentstates_of_clusters):
for dummy_index in range(len(states_list)):
header_row.append("{} CPU{}".format(self.corename_of_clusters[cluster], count))
count += 1
writer.writerow(header_row)
if offline_value in total_state:
total_state.remove(offline_value) # remove the offline state
for i in sorted(total_state):
temprow = []
temprow.extend([context.result.spec.label, context.result.iteration])
if "state{}".format(i) in self.idlestate_description:
temprow.append(self.idlestate_description["state{}".format(i)])
else:
state_value = float(i)
if state_value / ghz_conversion >= 1:
temprow.append("{} Ghz".format(state_value / ghz_conversion))
else:
temprow.append("{} Mhz".format(state_value / mhz_conversion))
for j in range(self.device.number_of_cores * self.multiply_factor):
temprow.append("{0:.3f}".format(temp["cpu{}".format(j)][i]))
writer.writerow(temprow)
check_off = True # Checking whether core is OFFLINE
for i in range(self.device.number_of_cores * self.multiply_factor):
temp_val = "{0:.3f}".format(temp["cpu{}".format(i)][offline_value])
if float(temp_val) > 1:
check_off = False
break
if check_off is False:
temprow = []
temprow.extend([context.result.spec.label, context.result.iteration])
temprow.append("OFFLINE")
for i in range(self.device.number_of_cores * self.multiply_factor):
temprow.append("{0:.3f}".format(temp["cpu{}".format(i)][offline_value]))
writer.writerow(temprow)

View File

@@ -0,0 +1,235 @@
# Copyright 2014-2015 ARM Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#pylint: disable=E1101,W0201
import os
import re
import string
import tarfile
try:
import pymongo
from bson.objectid import ObjectId
from gridfs import GridFS
except ImportError:
pymongo = None
from wlauto import ResultProcessor, Parameter, Artifact
from wlauto.exceptions import ResultProcessorError
from wlauto.utils.misc import as_relative
__bad_chars = '$.'
KEY_TRANS_TABLE = string.maketrans(__bad_chars, '_' * len(__bad_chars))
BUNDLE_NAME = 'files.tar.gz'
class MongodbUploader(ResultProcessor):
name = 'mongodb'
description = """
Uploads run results to a MongoDB instance.
MongoDB is a popular document-based data store (NoSQL database).
"""
parameters = [
Parameter('uri', kind=str, default=None,
description="""Connection URI. If specified, this will be used for connecting
to the backend, and host/port parameters will be ignored."""),
Parameter('host', kind=str, default='localhost', mandatory=True,
description='IP address/name of the machinge hosting the MongoDB server.'),
Parameter('port', kind=int, default=27017, mandatory=True,
description='Port on which the MongoDB server is listening.'),
Parameter('db', kind=str, default='wa', mandatory=True,
description='Database on the server used to store WA results.'),
Parameter('extra_params', kind=dict, default={},
description='''Additional connection parameters may be specfied using this (see
pymongo documentation.'''),
Parameter('authentication', kind=dict, default={},
description='''If specified, this will be passed to db.authenticate() upon connection;
please pymongo documentaion authentication examples for detail.'''),
]
def initialize(self, context):
if pymongo is None:
raise ResultProcessorError('mongodb result processor requres pymongo package to be installed.')
try:
self.client = pymongo.MongoClient(self.host, self.port, **self.extra_params)
except pymongo.errors.PyMongoError, e:
raise ResultProcessorError('Error connecting to mongod: {}'.fromat(e))
self.dbc = self.client[self.db]
self.fs = GridFS(self.dbc)
if self.authentication:
if not self.dbc.authenticate(**self.authentication):
raise ResultProcessorError('Authentication to database {} failed.'.format(self.db))
self.run_result_dbid = ObjectId()
run_doc = context.run_info.to_dict()
wa_adapter = run_doc['device']
devprops = dict((k.translate(KEY_TRANS_TABLE), v)
for k, v in run_doc['device_properties'].iteritems())
run_doc['device'] = devprops
run_doc['device']['wa_adapter'] = wa_adapter
del run_doc['device_properties']
run_doc['output_directory'] = os.path.abspath(context.output_directory)
run_doc['artifacts'] = []
run_doc['workloads'] = context.config.to_dict()['workload_specs']
for workload in run_doc['workloads']:
workload['name'] = workload['workload_name']
del workload['workload_name']
workload['results'] = []
self.run_dbid = self.dbc.runs.insert(run_doc)
prefix = context.run_info.project if context.run_info.project else '[NOPROJECT]'
run_part = context.run_info.run_name or context.run_info.uuid.hex
self.gridfs_dir = os.path.join(prefix, run_part)
i = 0
while self.gridfs_directory_exists(self.gridfs_dir):
if self.gridfs_dir.endswith('-{}'.format(i)):
self.gridfs_dir = self.gridfs_dir[:-2]
i += 1
self.gridfs_dir += '-{}'.format(i)
# Keep track of all generated artefacts, so that we know what to
# include in the tarball. The tarball will contains raw artificats
# (other kinds would have been uploaded directly or do not contain
# new data) and all files in the results dir that have not been marked
# as artificats.
self.artifacts = []
def export_iteration_result(self, result, context):
r = {}
r['iteration'] = context.current_iteration
r['status'] = result.status
r['events'] = [e.to_dict() for e in result.events]
r['metrics'] = []
for m in result.metrics:
md = m.to_dict()
md['is_summary'] = m.name in context.workload.summary_metrics
r['metrics'].append(md)
iteration_artefacts = [self.upload_artifact(context, a) for a in context.iteration_artifacts]
r['artifacts'] = [e for e in iteration_artefacts if e is not None]
self.dbc.runs.update({'_id': self.run_dbid, 'workloads.id': context.spec.id},
{'$push': {'workloads.$.results': r}})
def export_run_result(self, result, context):
run_artifacts = [self.upload_artifact(context, a) for a in context.run_artifacts]
self.logger.debug('Generating results bundle...')
bundle = self.generate_bundle(context)
if bundle:
run_artifacts.append(self.upload_artifact(context, bundle))
else:
self.logger.debug('No untracked files found.')
run_stats = {
'status': result.status,
'events': [e.to_dict() for e in result.events],
'end_time': context.run_info.end_time,
'duration': context.run_info.duration.total_seconds(),
'artifacts': [e for e in run_artifacts if e is not None],
}
self.dbc.runs.update({'_id': self.run_dbid}, {'$set': run_stats})
def finalize(self, context):
self.client.close()
def validate(self):
if self.uri:
has_warned = False
if self.host != self.parameters['host'].default:
self.logger.warning('both uri and host specified; host will be ignored')
has_warned = True
if self.port != self.parameters['port'].default:
self.logger.warning('both uri and port specified; port will be ignored')
has_warned = True
if has_warned:
self.logger.warning('To supress this warning, please remove either uri or '
'host/port from your config.')
def upload_artifact(self, context, artifact):
artifact_path = os.path.join(context.output_directory, artifact.path)
self.artifacts.append((artifact_path, artifact))
if not os.path.exists(artifact_path):
self.logger.debug('Artifact {} has not been generated'.format(artifact_path))
return
elif artifact.kind in ['raw', 'export']:
self.logger.debug('Ignoring {} artifact {}'.format(artifact.kind, artifact_path))
return
else:
self.logger.debug('Uploading artifact {}'.format(artifact_path))
entry = artifact.to_dict()
path = entry['path']
del entry['path']
del entry['name']
del entry['level']
del entry['mandatory']
if context.workload is None:
entry['filename'] = os.path.join(self.gridfs_dir, as_relative(path))
else:
entry['filename'] = os.path.join(self.gridfs_dir,
'{}-{}-{}'.format(context.spec.id,
context.spec.label,
context.current_iteration),
as_relative(path))
with open(artifact_path, 'rb') as fh:
fsid = self.fs.put(fh, **entry)
entry['gridfs_id'] = fsid
return entry
def gridfs_directory_exists(self, path):
regex = re.compile('^{}'.format(path))
return self.fs.exists({'filename': regex})
def generate_bundle(self, context): # pylint: disable=R0914
"""
The bundle will contain files generated during the run that have not
already been processed. This includes all files for which there isn't an
explicit artifact as well as "raw" artifacts that aren't uploaded individually.
Basically, this ensures that everything that is not explicilty marked as an
"export" (which means it's guarnteed not to contain information not accessible
from other artifacts/scores) is avialable in the DB. The bundle is compressed,
so it shouldn't take up too much space, however it also means that it's not
easy to query for or get individual file (a trade off between space and convinience).
"""
to_upload = []
artpaths = []
outdir = context.output_directory
for artpath, artifact in self.artifacts:
artpaths.append(os.path.relpath(artpath, outdir))
if artifact.kind == 'raw':
to_upload.append((artpath, os.path.relpath(artpath, outdir)))
for root, _, files in os.walk(outdir):
for f in files:
path = os.path.relpath(os.path.join(root, f), outdir)
if path not in artpaths:
to_upload.append((os.path.join(outdir, path), path))
if not to_upload:
# Nothing unexpected/unprocessed has been generated during the run.
return None
else:
archive_path = os.path.join(outdir, BUNDLE_NAME)
with tarfile.open(archive_path, 'w:gz') as tf:
for fpath, arcpath in to_upload:
tf.add(fpath, arcpath)
return Artifact('mongo_bundle', BUNDLE_NAME, 'data',
description='bundle to be uploaded to mongodb.')

View File

@@ -0,0 +1,183 @@
# Copyright 2013-2015 ARM Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# pylint: disable=attribute-defined-outside-init
import os
import sqlite3
import json
import uuid
from datetime import datetime, timedelta
from contextlib import contextmanager
from wlauto import ResultProcessor, settings, Parameter
from wlauto.exceptions import ResultProcessorError
from wlauto.utils.types import boolean
# IMPORTANT: when updating this schema, make sure to bump the version!
SCHEMA_VERSION = '0.0.2'
SCHEMA = [
'''CREATE TABLE runs (
uuid text,
start_time datetime,
end_time datetime,
duration integer
)''',
'''CREATE TABLE workload_specs (
id text,
run_oid text,
number_of_iterations integer,
label text,
workload_name text,
boot_parameters text,
runtime_parameters text,
workload_parameters text
)''',
'''CREATE TABLE metrics (
spec_oid int,
iteration integer,
metric text,
value text,
units text,
lower_is_better integer
)''',
'''CREATE VIEW results AS
SELECT uuid as run_uuid, spec_id, label as workload, iteration, metric, value, units, lower_is_better
FROM metrics AS m INNER JOIN (
SELECT ws.OID as spec_oid, ws.id as spec_id, uuid, label
FROM workload_specs AS ws INNER JOIN runs AS r ON ws.run_oid = r.OID
) AS wsr ON wsr.spec_oid = m.spec_oid
''',
'''CREATE TABLE __meta (
schema_version text
)''',
'''INSERT INTO __meta VALUES ("{}")'''.format(SCHEMA_VERSION),
]
sqlite3.register_adapter(datetime, lambda x: x.isoformat())
sqlite3.register_adapter(timedelta, lambda x: x.total_seconds())
sqlite3.register_adapter(uuid.UUID, str)
class SqliteResultProcessor(ResultProcessor):
name = 'sqlite'
description = """
Stores results in an sqlite database. The following settings may be
specified in config.py:
This may be used accumulate results of multiple runs in a single file.
"""
name = 'sqlite'
parameters = [
Parameter('database', default=None,
description=""" Full path to the sqlite database to be used. If this is not specified then
a new database file will be created in the output directory. This setting can be
used to accumulate results from multiple runs in a single database. If the
specified file does not exist, it will be created, however the directory of the
file must exist.
.. note:: The value must resolve to an absolute path,
relative paths are not allowed; however the
value may contain environment variables and/or
the home reference ~.
"""),
Parameter('overwrite', kind=boolean, default=False,
description="""If ``True``, this will overwrite the database file
if it already exists. If ``False`` (the default) data
will be added to the existing file (provided schema
versions match -- otherwise an error will be raised).
"""),
]
def initialize(self, context):
self._last_spec = None
self._run_oid = None
self._spec_oid = None
if not os.path.exists(self.database):
self._initdb()
elif self.overwrite: # pylint: disable=no-member
os.remove(self.database)
self._initdb()
else:
self._validate_schema_version()
self._update_run(context.run_info.uuid)
def process_iteration_result(self, result, context):
if self._last_spec != context.spec:
self._update_spec(context.spec)
metrics = [(self._spec_oid, context.current_iteration, m.name, str(m.value), m.units, int(m.lower_is_better))
for m in result.metrics]
with self._open_connecton() as conn:
conn.executemany('INSERT INTO metrics VALUES (?,?,?,?,?,?)', metrics)
def process_run_result(self, result, context):
info = context.run_info
with self._open_connecton() as conn:
conn.execute('''UPDATE runs SET start_time=?, end_time=?, duration=?
WHERE OID=?''', (info.start_time, info.end_time, info.duration, self._run_oid))
def validate(self):
if not self.database: # pylint: disable=access-member-before-definition
self.database = os.path.join(settings.output_directory, 'results.sqlite')
self.database = os.path.expandvars(os.path.expanduser(self.database))
def _initdb(self):
with self._open_connecton() as conn:
for command in SCHEMA:
conn.execute(command)
def _validate_schema_version(self):
with self._open_connecton() as conn:
try:
c = conn.execute('SELECT schema_version FROM __meta')
found_version = c.fetchone()[0]
except sqlite3.OperationalError:
message = '{} does not appear to be a valid WA results database.'.format(self.database)
raise ResultProcessorError(message)
if found_version != SCHEMA_VERSION:
message = 'Schema version in {} ({}) does not match current version ({}).'
raise ResultProcessorError(message.format(self.database, found_version, SCHEMA_VERSION))
def _update_run(self, run_uuid):
with self._open_connecton() as conn:
conn.execute('INSERT INTO runs (uuid) VALUES (?)', (run_uuid,))
conn.commit()
c = conn.execute('SELECT OID FROM runs WHERE uuid=?', (run_uuid,))
self._run_oid = c.fetchone()[0]
def _update_spec(self, spec):
self._last_spec = spec
spec_tuple = (spec.id, self._run_oid, spec.number_of_iterations, spec.label, spec.workload_name,
json.dumps(spec.boot_parameters), json.dumps(spec.runtime_parameters),
json.dumps(spec.workload_parameters))
with self._open_connecton() as conn:
conn.execute('INSERT INTO workload_specs VALUES (?,?,?,?,?,?,?,?)', spec_tuple)
conn.commit()
c = conn.execute('SELECT OID FROM workload_specs WHERE run_oid=? AND id=?', (self._run_oid, spec.id))
self._spec_oid = c.fetchone()[0]
@contextmanager
def _open_connecton(self):
conn = sqlite3.connect(self.database)
try:
yield conn
finally:
conn.commit()

View File

@@ -0,0 +1,124 @@
# Copyright 2013-2015 ARM Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# pylint: disable=R0201
"""
This module contains a few "standard" result processors that write results to
text files in various formats.
"""
import os
import csv
import json
from wlauto import ResultProcessor, settings
class StandardProcessor(ResultProcessor):
name = 'standard'
description = """
Creates a ``result.txt`` file for every iteration that contains metrics
for that iteration.
The metrics are written in ::
metric = value [units]
format.
"""
def process_iteration_result(self, result, context):
outfile = os.path.join(context.output_directory, 'result.txt')
with open(outfile, 'w') as wfh:
for metric in result.metrics:
line = '{} = {}'.format(metric.name, metric.value)
if metric.units:
line = ' '.join([line, metric.units])
line += '\n'
wfh.write(line)
context.add_artifact('iteration_result', 'result.txt', 'export')
class CsvReportProcessor(ResultProcessor):
"""
Creates a ``results.csv`` in the output directory containing results for
all iterations in CSV format, each line containing a single metric.
"""
name = 'csv'
def process_run_result(self, result, context):
outfile = os.path.join(settings.output_directory, 'results.csv')
with open(outfile, 'wb') as wfh:
writer = csv.writer(wfh)
writer.writerow(['id', 'workload', 'iteration', 'metric', 'value', 'units'])
for result in result.iteration_results:
for metric in result.metrics:
row = [result.id, result.spec.label, result.iteration,
metric.name, str(metric.value), metric.units or '']
writer.writerow(row)
context.add_artifact('run_result_csv', 'results.csv', 'export')
class JsonReportProcessor(ResultProcessor):
"""
Creates a ``results.json`` in the output directory containing results for
all iterations in JSON format.
"""
name = 'json'
def process_run_result(self, result, context):
outfile = os.path.join(settings.output_directory, 'results.json')
with open(outfile, 'wb') as wfh:
output = []
for result in result.iteration_results:
output.append({
'id': result.id,
'workload': result.workload.name,
'iteration': result.iteration,
'metrics': [dict([(k, v) for k, v in m.__dict__.iteritems()
if not k.startswith('_')])
for m in result.metrics],
})
json.dump(output, wfh, indent=4)
context.add_artifact('run_result_json', 'results.json', 'export')
class SummaryCsvProcessor(ResultProcessor):
"""
Similar to csv result processor, but only contains workloads' summary metrics.
"""
name = 'summary_csv'
def process_run_result(self, result, context):
outfile = os.path.join(settings.output_directory, 'summary.csv')
with open(outfile, 'wb') as wfh:
writer = csv.writer(wfh)
writer.writerow(['id', 'workload', 'iteration', 'metric', 'value', 'units'])
for result in result.iteration_results:
for metric in result.metrics:
if metric.name in result.workload.summary_metrics:
row = [result.id, result.workload.name, result.iteration,
metric.name, str(metric.value), metric.units or '']
writer.writerow(row)
context.add_artifact('run_result_summary', 'summary.csv', 'export')

View File

@@ -0,0 +1,51 @@
# Copyright 2013-2015 ARM Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# pylint: disable=R0201
import os
import time
from collections import Counter
from wlauto import ResultProcessor
from wlauto.utils.misc import write_table
class StatusTxtReporter(ResultProcessor):
name = 'status'
description = """
Outputs a txt file containing general status information about which runs
failed and which were successful
"""
def process_run_result(self, result, context):
counter = Counter()
for ir in result.iteration_results:
counter[ir.status] += 1
outfile = os.path.join(context.run_output_directory, 'status.txt')
self.logger.info('Status available in {}'.format(outfile))
with open(outfile, 'w') as wfh:
wfh.write('Run name: {}\n'.format(context.run_info.run_name))
wfh.write('Run status: {}\n'.format(context.run_result.status))
wfh.write('Date: {}\n'.format(time.strftime("%c")))
wfh.write('{}/{} iterations completed without error\n'.format(counter['OK'], len(result.iteration_results)))
wfh.write('\n')
status_lines = [map(str, [ir.id, ir.spec.label, ir.iteration, ir.status,
ir.events and ir.events[0].message.split('\n')[0] or ''])
for ir in result.iteration_results]
write_table(status_lines, wfh, align='<<>><')
context.add_artifact('run_status_summary', 'status.txt', 'export')

View File

@@ -0,0 +1,150 @@
# Copyright 2014-2015 ARM Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#pylint: disable=E1101,W0201
import os
import csv
import math
import re
from wlauto import ResultProcessor, Parameter, File
from wlauto.utils.misc import get_meansd
class SyegResultProcessor(ResultProcessor):
name = 'syeg_csv'
description = """
Generates a CSV results file in the format expected by SYEG toolchain.
Multiple iterations get parsed into columns, adds additional columns for mean
and standard deviation, append number of threads to metric names (where
applicable) and add some metadata based on external mapping files.
"""
parameters = [
Parameter('outfile', kind=str, default='syeg_out.csv',
description='The name of the output CSV file.'),
]
def initialize(self, context):
self.levelmap = self._read_map(context, 'final_sub.csv',
'Could not find metrics level mapping.')
self.typemap = self._read_map(context, 'types.csv',
'Could not find benchmark suite types mapping.')
def process_run_result(self, result, context):
syeg_results = {}
max_iterations = max(ir.iteration for ir in result.iteration_results)
for ir in result.iteration_results:
for metric in ir.metrics:
key = ir.spec.label + metric.name
if key not in syeg_results:
syeg_result = SyegResult(max_iterations)
syeg_result.suite = ir.spec.label
syeg_result.version = getattr(ir.workload, 'apk_version', None)
syeg_result.test = metric.name
if hasattr(ir.workload, 'number_of_threads'):
syeg_result.test += ' NT {} (Iterations/sec)'.format(ir.workload.number_of_threads)
syeg_result.final_sub = self.levelmap.get(metric.name)
syeg_result.lower_is_better = metric.lower_is_better
syeg_result.device = context.device.name
syeg_result.type = self._get_type(ir.workload.name, metric.name)
syeg_results[key] = syeg_result
syeg_results[key].runs[ir.iteration - 1] = metric.value
columns = ['device', 'suite', 'test', 'version', 'final_sub', 'best', 'average', 'deviation']
columns += ['run{}'.format(i + 1) for i in xrange(max_iterations)]
columns += ['type', 'suite_version']
outfile = os.path.join(context.output_directory, self.outfile)
with open(outfile, 'wb') as wfh:
writer = csv.writer(wfh)
writer.writerow(columns)
for syeg_result in syeg_results.values():
writer.writerow([getattr(syeg_result, c) for c in columns])
context.add_artifact('syeg_csv', outfile, 'export')
def _get_type(self, workload, metric):
metric = metric.lower()
type_ = self.typemap.get(workload)
if type_ == 'mixed':
if 'native' in metric:
type_ = 'native'
if ('java' in metric) or ('dalvik' in metric):
type_ = 'dalvik'
return type_
def _read_map(self, context, filename, errormsg):
mapfile = context.resolver.get(File(self, filename))
if mapfile:
with open(mapfile) as fh:
reader = csv.reader(fh)
return dict([c.strip() for c in r] for r in reader)
else:
self.logger.warning(errormsg)
return {}
class SyegResult(object):
@property
def average(self):
if not self._mean:
self._mean, self._sd = get_meansd(self.run_values)
return self._mean
@property
def deviation(self):
if not self._sd:
self._mean, self._sd = get_meansd(self.run_values)
return self._sd
@property
def run_values(self):
return [r for r in self.runs if not math.isnan(r)]
@property
def best(self):
if self.lower_is_better:
return min(self.run_values)
else:
return max(self.run_values)
@property
def suite_version(self):
return ' '.join(map(str, [self.suite, self.version]))
def __init__(self, max_iter):
self.runs = [float('nan') for _ in xrange(max_iter)]
self.device = None
self.suite = None
self.test = None
self.version = None
self.final_sub = None
self.lower_is_better = None
self.type = None
self._mean = None
self._sd = None
def __getattr__(self, name):
match = re.search(r'run(\d+)', name)
if not match:
raise AttributeError(name)
return self.runs[int(match.group(1)) - 1]