1
0
mirror of https://github.com/ARM-software/workload-automation.git synced 2024-10-06 10:51:13 +01:00
workload-automation/wa/framework/run.py

356 lines
10 KiB
Python
Raw Normal View History

2017-02-21 13:37:11 +00:00
# Copyright 2013-2015 ARM Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import uuid
import logging
from copy import copy
from datetime import datetime, timedelta
from collections import OrderedDict
from wa.framework import signal, pluginloader, log
from wa.framework.plugin import Plugin
from wa.framework.output import Status
from wa.framework.resource import ResourceResolver
from wa.framework.exception import JobError
from wa.utils import counter
from wa.utils.serializer import json
from wa.utils.misc import ensure_directory_exists as _d
from wa.utils.types import caseless_string
2017-02-21 13:37:11 +00:00
class JobActor(object):
def get_config(self):
return {}
def initialize(self, context):
pass
def run(self):
pass
def finalize(self):
pass
def restart(self):
pass
def complete(self):
pass
class RunnerJob(object):
@property
def status(self):
return self.output.status
@status.setter
def status(self, value):
self.output.status = value
@property
def should_retry(self):
return self.attempt <= self.max_retries
def __init__(self, id, actor, output, max_retries):
self.id = id
self.actor = actor
self.output = output
self.max_retries = max_retries
self.status = Status.NEW
self.attempt = 0
def initialize(self, context):
self.actor.initialize(context)
self.status = Status.PENDING
def run(self):
self.status = Status.RUNNING
self.attempt += 1
self.output.config = self.actor.get_config()
self.output.initialize()
self.actor.run()
self.status = Status.COMPLETE
def finalize(self):
self.actor.finalize()
def restart(self):
self.actor.restart()
def complete(self):
self.actor.complete()
__run_methods = set()
def runmethod(method):
"""
A method decorator that ensures that a method is invoked only once per run.
"""
def _method_wrapper(*args, **kwargs):
if method in __run_methods:
return
__run_methods.add(method)
ret = method(*args, **kwargs)
if ret is not None:
message = 'runmethod()\'s must return None; method "{}" returned "{}"'
raise RuntimeError(message.format(method, ret))
return _method_wrapper
def reset_runmethods():
global __run_methods
__run_methods = set()
class Runner(object):
@property
def info(self):
return self.output.info
@property
def status(self):
return self.output.status
@status.setter
def status(self, value):
self.output.status = value
@property
def jobs_pending(self):
return len(self.job_queue) > 0
@property
def current_job(self):
if self.job_queue:
return self.job_queue[0]
@property
def previous_job(self):
if self.completed_jobs:
return self.completed_jobs[-1]
@property
def next_job(self):
if len(self.job_queue) > 1:
return self.job_queue[1]
def __init__(self, output):
self.logger = logging.getLogger('runner')
self.output = output
self.context = RunContext(self)
self.status = Status.NEW
self.job_queue = []
self.completed_jobs = []
self._known_ids = set([])
def add_job(self, job_id, actor, max_retries=2):
job_id = caseless_string(job_id)
if job_id in self._known_ids:
raise JobError('Job with id "{}" already exists'.format(job_id))
output = self.output.create_job_output(job_id)
self.job_queue.append(RunnerJob(job_id, actor, output, max_retries))
self._known_ids.add(job_id)
def initialize(self):
self.logger.info('Initializing run')
self.start_time = datetime.now()
if not self.info.start_time:
self.info.start_time = self.start_time
self.info.duration = timedelta()
self.context.initialize()
for job in self.job_queue:
job.initialize(self.context)
self.persist_state()
self.logger.info('Run initialized')
def run(self):
self.status = Status.RUNNING
reset_runmethods()
signal.send(signal.RUN_STARTED, self, self.context)
self.initialize()
signal.send(signal.RUN_INITIALIZED, self, self.context)
self.run_jobs()
signal.send(signal.RUN_COMPLETED, self, self.context)
self.finalize()
signal.send(signal.RUN_FINALIZED, self, self.context)
def run_jobs(self):
try:
self.logger.info('Running jobs')
while self.jobs_pending:
self.begin_job()
log.indent()
try:
self.current_job.run()
except KeyboardInterrupt:
self.current_job.status = Status.ABORTED
signal.send(signal.JOB_ABORTED, self, self.current_job)
raise
except Exception as e:
self.current_job.status = Status.FAILED
log.log_error(e, self.logger)
signal.send(signal.JOB_FAILED, self, self.current_job)
else:
self.current_job.status = Status.COMPLETE
finally:
log.dedent()
self.complete_job()
except KeyboardInterrupt:
self.status = Status.ABORTED
while self.job_queue:
job = self.job_queue.pop(0)
job.status = RunnerJob.ABORTED
self.completed_jobs.append(job)
signal.send(signal.RUN_ABORTED, self, self)
raise
except Exception as e:
self.status = Status.FAILED
log.log_error(e, self.logger)
signal.send(signal.RUN_FAILED, self, self)
else:
self.status = Status.COMPLETE
def finalize(self):
self.logger.info('Finalizing run')
for job in self.job_queue:
job.finalize()
self.end_time = datetime.now()
self.info.end_time = self.end_time
self.info.duration += self.end_time - self.start_time
self.persist_state()
signal.send(signal.RUN_FINALIZED, self, self)
self.logger.info('Run completed')
def begin_job(self):
self.logger.info('Starting job {}'.format(self.current_job.id))
signal.send(signal.JOB_STARTED, self, self.current_job)
self.persist_state()
def complete_job(self):
if self.current_job.status == Status.FAILED:
self.output.move_failed(self.current_job.output)
if self.current_job.should_retry:
self.logger.info('Restarting job {}'.format(self.current_job.id))
self.persist_state()
self.current_job.restart()
signal.send(signal.JOB_RESTARTED, self, self.current_job)
return
self.logger.info('Completing job {}'.format(self.current_job.id))
self.current_job.complete()
self.persist_state()
signal.send(signal.JOB_COMPLETED, self, self.current_job)
job = self.job_queue.pop(0)
self.completed_jobs.append(job)
def persist_state(self):
self.output.persist()
class RunContext(object):
"""
Provides a context for instrumentation. Keeps track of things like
current workload and iteration.
"""
@property
def run_output(self):
return self.runner.output
@property
def current_job(self):
return self.runner.current_job
@property
def run_output_directory(self):
return self.run_output.output_directory
@property
def output_directory(self):
if self.runner.current_job:
return self.runner.current_job.output.output_directory
else:
return self.run_output.output_directory
@property
def info_directory(self):
return self.run_output.info_directory
@property
def config_directory(self):
return self.run_output.config_directory
@property
def failed_directory(self):
return self.run_output.failed_directory
@property
def log_file(self):
return os.path.join(self.output_directory, 'run.log')
def __init__(self, runner):
self.runner = runner
self.job = None
self.iteration = None
self.job_output = None
self.resolver = ResourceResolver()
def initialize(self):
self.resolver.load()
def get_path(self, subpath):
if self.current_job is None:
return self.run_output.get_path(subpath)
else:
return self.current_job.output.get_path(subpath)
def add_metric(self, *args, **kwargs):
if self.current_job is None:
self.run_output.add_metric(*args, **kwargs)
else:
self.current_job.output.add_metric(*args, **kwargs)
def add_artifact(self, name, path, kind, *args, **kwargs):
if self.current_job is None:
self.add_run_artifact(name, path, kind, *args, **kwargs)
else:
self.add_job_artifact(name, path, kind, *args, **kwargs)
def add_run_artifact(self, *args, **kwargs):
self.run_output.add_artifiact(*args, **kwargs)
def add_job_artifact(self, *args, **kwargs):
self.current_job.output.add_artifact(*args, **kwargs)
def get_artifact(self, name):
if self.iteration_artifacts:
for art in self.iteration_artifacts:
if art.name == name:
return art
for art in self.run_artifacts:
if art.name == name:
return art
return None