mirror of
https://github.com/ARM-software/workload-automation.git
synced 2025-02-22 04:49:00 +00:00
Changing the way target descriptions work from a static mapping to something that is dynamically generated and is extensible via plugins. Also moving core target implementation stuff under "framework".
356 lines
10 KiB
Python
356 lines
10 KiB
Python
# Copyright 2013-2015 ARM Limited
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
import uuid
|
|
import logging
|
|
from copy import copy
|
|
from datetime import datetime, timedelta
|
|
from collections import OrderedDict
|
|
|
|
from wa.framework import signal, pluginloader, log
|
|
from wa.framework.plugin import Plugin
|
|
from wa.framework.output import Status
|
|
from wa.framework.resource import ResourceResolver
|
|
from wa.framework.exception import JobError
|
|
from wa.utils import counter
|
|
from wa.utils.serializer import json
|
|
from wa.utils.misc import ensure_directory_exists as _d
|
|
from wa.utils.types import caseless_string
|
|
|
|
|
|
|
|
class JobActor(object):
|
|
|
|
def get_config(self):
|
|
return {}
|
|
|
|
def initialize(self, context):
|
|
pass
|
|
|
|
def run(self):
|
|
pass
|
|
|
|
def finalize(self):
|
|
pass
|
|
|
|
def restart(self):
|
|
pass
|
|
|
|
def complete(self):
|
|
pass
|
|
|
|
|
|
class RunnerJob(object):
|
|
|
|
@property
|
|
def status(self):
|
|
return self.output.status
|
|
|
|
@status.setter
|
|
def status(self, value):
|
|
self.output.status = value
|
|
|
|
@property
|
|
def should_retry(self):
|
|
return self.attempt <= self.max_retries
|
|
|
|
def __init__(self, id, actor, output, max_retries):
|
|
self.id = id
|
|
self.actor = actor
|
|
self.output = output
|
|
self.max_retries = max_retries
|
|
self.status = Status.NEW
|
|
self.attempt = 0
|
|
|
|
def initialize(self, context):
|
|
self.actor.initialize(context)
|
|
self.status = Status.PENDING
|
|
|
|
def run(self):
|
|
self.status = Status.RUNNING
|
|
self.attempt += 1
|
|
self.output.config = self.actor.get_config()
|
|
self.output.initialize()
|
|
self.actor.run()
|
|
self.status = Status.COMPLETE
|
|
|
|
def finalize(self):
|
|
self.actor.finalize()
|
|
|
|
def restart(self):
|
|
self.actor.restart()
|
|
|
|
def complete(self):
|
|
self.actor.complete()
|
|
|
|
|
|
__run_methods = set()
|
|
|
|
|
|
def runmethod(method):
|
|
"""
|
|
A method decorator that ensures that a method is invoked only once per run.
|
|
|
|
"""
|
|
def _method_wrapper(*args, **kwargs):
|
|
if method in __run_methods:
|
|
return
|
|
__run_methods.add(method)
|
|
ret = method(*args, **kwargs)
|
|
if ret is not None:
|
|
message = 'runmethod()\'s must return None; method "{}" returned "{}"'
|
|
raise RuntimeError(message.format(method, ret))
|
|
return _method_wrapper
|
|
|
|
|
|
def reset_runmethods():
|
|
global __run_methods
|
|
__run_methods = set()
|
|
|
|
|
|
class Runner(object):
|
|
|
|
@property
|
|
def info(self):
|
|
return self.output.info
|
|
|
|
@property
|
|
def status(self):
|
|
return self.output.status
|
|
|
|
@status.setter
|
|
def status(self, value):
|
|
self.output.status = value
|
|
|
|
@property
|
|
def jobs_pending(self):
|
|
return len(self.job_queue) > 0
|
|
|
|
@property
|
|
def current_job(self):
|
|
if self.job_queue:
|
|
return self.job_queue[0]
|
|
|
|
@property
|
|
def previous_job(self):
|
|
if self.completed_jobs:
|
|
return self.completed_jobs[-1]
|
|
|
|
@property
|
|
def next_job(self):
|
|
if len(self.job_queue) > 1:
|
|
return self.job_queue[1]
|
|
|
|
def __init__(self, output):
|
|
self.logger = logging.getLogger('runner')
|
|
self.output = output
|
|
self.context = RunContext(self)
|
|
self.status = Status.NEW
|
|
self.job_queue = []
|
|
self.completed_jobs = []
|
|
self._known_ids = set([])
|
|
|
|
def add_job(self, job_id, actor, max_retries=2):
|
|
job_id = caseless_string(job_id)
|
|
if job_id in self._known_ids:
|
|
raise JobError('Job with id "{}" already exists'.format(job_id))
|
|
output = self.output.create_job_output(job_id)
|
|
self.job_queue.append(RunnerJob(job_id, actor, output, max_retries))
|
|
self._known_ids.add(job_id)
|
|
|
|
def initialize(self):
|
|
self.logger.info('Initializing run')
|
|
self.start_time = datetime.now()
|
|
if not self.info.start_time:
|
|
self.info.start_time = self.start_time
|
|
self.info.duration = timedelta()
|
|
|
|
self.context.initialize()
|
|
for job in self.job_queue:
|
|
job.initialize(self.context)
|
|
self.persist_state()
|
|
self.logger.info('Run initialized')
|
|
|
|
def run(self):
|
|
self.status = Status.RUNNING
|
|
reset_runmethods()
|
|
signal.send(signal.RUN_STARTED, self, self.context)
|
|
self.initialize()
|
|
signal.send(signal.RUN_INITIALIZED, self, self.context)
|
|
self.run_jobs()
|
|
signal.send(signal.RUN_COMPLETED, self, self.context)
|
|
self.finalize()
|
|
signal.send(signal.RUN_FINALIZED, self, self.context)
|
|
|
|
def run_jobs(self):
|
|
try:
|
|
self.logger.info('Running jobs')
|
|
while self.jobs_pending:
|
|
self.begin_job()
|
|
log.indent()
|
|
try:
|
|
self.current_job.run()
|
|
except KeyboardInterrupt:
|
|
self.current_job.status = Status.ABORTED
|
|
signal.send(signal.JOB_ABORTED, self, self.current_job)
|
|
raise
|
|
except Exception as e:
|
|
self.current_job.status = Status.FAILED
|
|
log.log_error(e, self.logger)
|
|
signal.send(signal.JOB_FAILED, self, self.current_job)
|
|
else:
|
|
self.current_job.status = Status.COMPLETE
|
|
finally:
|
|
log.dedent()
|
|
self.complete_job()
|
|
except KeyboardInterrupt:
|
|
self.status = Status.ABORTED
|
|
while self.job_queue:
|
|
job = self.job_queue.pop(0)
|
|
job.status = RunnerJob.ABORTED
|
|
self.completed_jobs.append(job)
|
|
signal.send(signal.RUN_ABORTED, self, self)
|
|
raise
|
|
except Exception as e:
|
|
self.status = Status.FAILED
|
|
log.log_error(e, self.logger)
|
|
signal.send(signal.RUN_FAILED, self, self)
|
|
else:
|
|
self.status = Status.COMPLETE
|
|
|
|
def finalize(self):
|
|
self.logger.info('Finalizing run')
|
|
for job in self.job_queue:
|
|
job.finalize()
|
|
self.end_time = datetime.now()
|
|
self.info.end_time = self.end_time
|
|
self.info.duration += self.end_time - self.start_time
|
|
self.persist_state()
|
|
signal.send(signal.RUN_FINALIZED, self, self)
|
|
self.logger.info('Run completed')
|
|
|
|
def begin_job(self):
|
|
self.logger.info('Starting job {}'.format(self.current_job.id))
|
|
signal.send(signal.JOB_STARTED, self, self.current_job)
|
|
self.persist_state()
|
|
|
|
def complete_job(self):
|
|
if self.current_job.status == Status.FAILED:
|
|
self.output.move_failed(self.current_job.output)
|
|
if self.current_job.should_retry:
|
|
self.logger.info('Restarting job {}'.format(self.current_job.id))
|
|
self.persist_state()
|
|
self.current_job.restart()
|
|
signal.send(signal.JOB_RESTARTED, self, self.current_job)
|
|
return
|
|
|
|
self.logger.info('Completing job {}'.format(self.current_job.id))
|
|
self.current_job.complete()
|
|
self.persist_state()
|
|
signal.send(signal.JOB_COMPLETED, self, self.current_job)
|
|
job = self.job_queue.pop(0)
|
|
self.completed_jobs.append(job)
|
|
|
|
def persist_state(self):
|
|
self.output.persist()
|
|
|
|
|
|
class RunContext(object):
|
|
"""
|
|
Provides a context for instrumentation. Keeps track of things like
|
|
current workload and iteration.
|
|
|
|
"""
|
|
|
|
@property
|
|
def run_output(self):
|
|
return self.runner.output
|
|
|
|
@property
|
|
def current_job(self):
|
|
return self.runner.current_job
|
|
|
|
@property
|
|
def run_output_directory(self):
|
|
return self.run_output.output_directory
|
|
|
|
@property
|
|
def output_directory(self):
|
|
if self.runner.current_job:
|
|
return self.runner.current_job.output.output_directory
|
|
else:
|
|
return self.run_output.output_directory
|
|
|
|
@property
|
|
def info_directory(self):
|
|
return self.run_output.info_directory
|
|
|
|
@property
|
|
def config_directory(self):
|
|
return self.run_output.config_directory
|
|
|
|
@property
|
|
def failed_directory(self):
|
|
return self.run_output.failed_directory
|
|
|
|
@property
|
|
def log_file(self):
|
|
return os.path.join(self.output_directory, 'run.log')
|
|
|
|
|
|
def __init__(self, runner):
|
|
self.runner = runner
|
|
self.job = None
|
|
self.iteration = None
|
|
self.job_output = None
|
|
self.resolver = ResourceResolver()
|
|
|
|
def initialize(self):
|
|
self.resolver.load()
|
|
|
|
def get_path(self, subpath):
|
|
if self.current_job is None:
|
|
return self.run_output.get_path(subpath)
|
|
else:
|
|
return self.current_job.output.get_path(subpath)
|
|
|
|
def add_metric(self, *args, **kwargs):
|
|
if self.current_job is None:
|
|
self.run_output.add_metric(*args, **kwargs)
|
|
else:
|
|
self.current_job.output.add_metric(*args, **kwargs)
|
|
|
|
def add_artifact(self, name, path, kind, *args, **kwargs):
|
|
if self.current_job is None:
|
|
self.add_run_artifact(name, path, kind, *args, **kwargs)
|
|
else:
|
|
self.add_job_artifact(name, path, kind, *args, **kwargs)
|
|
|
|
def add_run_artifact(self, *args, **kwargs):
|
|
self.run_output.add_artifiact(*args, **kwargs)
|
|
|
|
def add_job_artifact(self, *args, **kwargs):
|
|
self.current_job.output.add_artifact(*args, **kwargs)
|
|
|
|
def get_artifact(self, name):
|
|
if self.iteration_artifacts:
|
|
for art in self.iteration_artifacts:
|
|
if art.name == name:
|
|
return art
|
|
for art in self.run_artifacts:
|
|
if art.name == name:
|
|
return art
|
|
return None
|
|
|