1
0
mirror of https://github.com/ARM-software/workload-automation.git synced 2025-09-03 03:42:35 +01:00

Initial implementation of state tracking and output handling.

This commit is contained in:
Sergei Trofimov
2017-03-16 17:54:48 +00:00
parent 6e13c96d9c
commit 24402660c4
10 changed files with 443 additions and 435 deletions

View File

@@ -49,17 +49,18 @@ from itertools import izip_longest
import wa.framework.signal as signal
from wa.framework import instrumentation, pluginloader
from wa.framework.configuration.core import settings
from wa.framework.configuration.execution import JobStatus
from wa.framework.configuration.core import settings, RunStatus, JobStatus
from wa.framework.exception import (WAError, ConfigError, TimeoutError,
InstrumentError, TargetError,
TargetNotRespondingError)
from wa.framework.output import init_job_output
from wa.framework.plugin import Artifact
from wa.framework.resource import ResourceResolver
from wa.framework.run import RunState
from wa.framework.target.info import TargetInfo
from wa.framework.target.manager import TargetManager
from wa.utils import log
from wa.utils.misc import (ensure_directory_exists as _d,
from wa.utils.misc import (ensure_directory_exists as _d, merge_config_values,
get_traceback, format_duration)
from wa.utils.serializer import json
@@ -105,16 +106,26 @@ class ExecutionContext(object):
return self.current_job.spec.id != self.next_job.spec.id
@property
def output_directory(self):
def job_output(self):
if self.current_job:
return os.path.join(self.output.basepath, self.current_job.output_name)
return self.current_job.output
@property
def output(self):
if self.current_job:
return self.job_output
return self.run_output
@property
def output_directory(self):
return self.output.basepath
def __init__(self, cm, tm, output):
self.logger = logging.getLogger('context')
self.cm = cm
self.tm = tm
self.output = output
self.run_output = output
self.run_state = output.state
self.logger.debug('Loading resource discoverers')
self.resolver = ResourceResolver(cm)
self.resolver.load()
@@ -127,31 +138,56 @@ class ExecutionContext(object):
self.output.write_info()
self.job_queue = copy(self.cm.jobs)
self.completed_jobs = []
self.run_state.status = RunStatus.STARTED
self.output.write_state()
def end_run(self):
self.output.info.end_time = datetime.now()
self.output.info.duration = self.output.info.end_time -\
self.output.info.start_time
self.output.write_info()
self.output.write_state()
self.output.write_result()
def start_job(self):
if not self.job_queue:
raise RuntimeError('No jobs to run')
self.current_job = self.job_queue.pop(0)
os.makedirs(self.output_directory)
self.current_job.output = init_job_output(self.run_output, self.current_job)
self.update_job_state(self.current_job)
return self.current_job
def end_job(self):
if not self.current_job:
raise RuntimeError('No jobs in progress')
self.completed_jobs.append(self.current_job)
self.update_job_state(self.current_job)
self.output.write_result()
self.current_job = None
def move_failed(self, job):
attempt = job.retries + 1
failed_name = '{}-attempt{:02}'.format(job.output_name, attempt)
self.output.move_failed(job.output_name, failed_name)
self.run_output.move_failed(job.output)
def update_job_state(self, job):
self.run_state.update_job(job)
self.run_output.write_state()
def write_state(self):
self.run_output.write_state()
def add_metric(self, name, value, units=None, lower_is_better=False,
classifiers=None):
if self.current_job:
classifiers = merge_config_values(self.current_job.classifiers,
classifiers)
self.output.add_metric(name, value, units, lower_is_better, classifiers)
def add_artifact(self, name, path, kind, description=None, classifiers=None):
self.output.add_artifact(name, path, kind, description, classifiers)
def add_run_artifact(self, name, path, kind, description=None,
classifiers=None):
self.run_output.add_artifact(name, path, kind, description, classifiers)
class OldExecutionContext(object):
"""
@@ -335,6 +371,7 @@ class Executor(object):
self.logger.info('Generating jobs')
config_manager.generate_jobs(context)
output.write_job_specs(config_manager.job_specs)
output.write_state()
self.logger.info('Installing instrumentation')
for instrument in config_manager.get_instruments(target_manager.target):
@@ -345,8 +382,6 @@ class Executor(object):
runner = Runner(context)
runner.run()
def execute_postamble(self):
"""
This happens after the run has completed. The overall results of the run are
@@ -374,22 +409,6 @@ class Executor(object):
self.logger.warn('There were warnings during execution.')
self.logger.warn('Please see {}'.format(self.config.log_file))
def _get_runner(self, result_manager):
if not self.config.execution_order or self.config.execution_order == 'by_iteration':
if self.config.reboot_policy == 'each_spec':
self.logger.info('each_spec reboot policy with the default by_iteration execution order is '
'equivalent to each_iteration policy.')
runnercls = ByIterationRunner
elif self.config.execution_order in ['classic', 'by_spec']:
runnercls = BySpecRunner
elif self.config.execution_order == 'by_section':
runnercls = BySectionRunner
elif self.config.execution_order == 'random':
runnercls = RandomRunner
else:
raise ConfigError('Unexpected execution order: {}'.format(self.config.execution_order))
return runnercls(self.device_manager, self.context, result_manager)
def _error_signalled_callback(self):
self.error_logged = True
signal.disconnect(self._error_signalled_callback, signal.ERROR_LOGGED)
@@ -436,6 +455,7 @@ class Runner(object):
for job in self.context.job_queue:
job.initialize(self.context)
log.dedent()
self.context.write_state()
def finalize_run(self):
self.logger.info('Finalizing run')
@@ -448,6 +468,7 @@ class Runner(object):
try:
log.indent()
self.do_run_job(job, context)
job.status = JobStatus.OK
except KeyboardInterrupt:
job.status = JobStatus.ABORTED
raise
@@ -503,12 +524,13 @@ class Runner(object):
rc = self.context.cm.run_config
if job.status in rc.retry_on_status:
if job.retries < rc.max_retries:
msg = 'Job {} iteration {} complted with status {}. retrying...'
msg = 'Job {} iteration {} completed with status {}. retrying...'
self.logger.error(msg.format(job.id, job.status, job.iteration))
self.context.move_failed(job)
job.retries += 1
job.status = JobStatus.PENDING
self.context.job_queue.insert(0, job)
self.context.write_state()
else:
msg = 'Job {} iteration {} completed with status {}. '\
'Max retries exceeded.'