1
0
mirror of https://github.com/ARM-software/workload-automation.git synced 2024-10-06 02:41:11 +01:00

fw/execution: Handle unresponsive targets

If a target error occurs, check whether the target is unresponsive. If
it is, attempt to hard reset it if possible, or gracefully terminate
execution if not.
This commit is contained in:
Sergei Trofimov 2018-02-28 10:24:56 +00:00 committed by Marc Bonnici
parent fdb872d9cd
commit 6fe31d6cad
4 changed files with 40 additions and 15 deletions

View File

@ -22,7 +22,8 @@ from datetime import datetime
import wa.framework.signal as signal
from wa.framework import instrument
from wa.framework.configuration.core import Status
from wa.framework.exception import HostError, WorkloadError
from wa.framework.exception import TargetError, HostError, WorkloadError,\
TargetNotRespondingError, TimeoutError
from wa.framework.job import Job
from wa.framework.output import init_job_output
from wa.framework.output_processor import ProcessorManager
@ -375,17 +376,18 @@ class Runner(object):
self.send(signal.RUN_INITIALIZED)
while self.context.job_queue:
try:
with signal.wrap('JOB_EXECUTION', self, self.context):
self.run_next_job(self.context)
except KeyboardInterrupt:
self.context.skip_remaining_jobs()
with signal.wrap('JOB_EXECUTION', self, self.context):
self.run_next_job(self.context)
except KeyboardInterrupt as e:
log.log_error(e, self.logger)
self.logger.info('Skipping remaining jobs.')
self.context.skip_remaining_jobs()
except Exception as e:
self.context.add_event(e.message)
if (not getattr(e, 'logged', None) and
not isinstance(e, KeyboardInterrupt)):
log.log_error(e, self.logger)
e.logged = True
message = e.message if e.message else str(e)
log.log_error(e, self.logger)
self.logger.error('Skipping remaining jobs due to "{}".'.format(e))
self.context.skip_remaining_jobs()
raise e
finally:
self.finalize_run()
@ -429,6 +431,10 @@ class Runner(object):
if not getattr(e, 'logged', None):
log.log_error(e, self.logger)
e.logged = True
if isinstance(e, ExecutionError):
raise e
elif isinstance(e, TargetError):
context.tm.verify_target_responsive()
finally:
self.logger.info('Completing job {}'.format(job.id))
self.send(signal.JOB_COMPLETED)
@ -467,6 +473,8 @@ class Runner(object):
if not getattr(e, 'logged', None):
log.log_error(e, self.logger)
e.logged = True
if isinstance(e, TargetError) or isinstance(e, TimeoutError):
context.tm.verify_target_responsive()
raise e
finally:
try:
@ -474,8 +482,10 @@ class Runner(object):
job.process_output(context)
self.pm.process_job_output(context)
self.pm.export_job_output(context)
except Exception:
except Exception as e:
job.set_status(Status.PARTIAL)
if isinstance(e, TargetError) or isinstance(e, TimeoutError):
context.tm.verify_target_responsive()
raise
except KeyboardInterrupt:

View File

@ -105,7 +105,7 @@ from collections import OrderedDict
from wa.framework import signal
from wa.framework.plugin import Plugin
from wa.framework.exception import (WAError, TargetNotRespondingError, TimeoutError,
WorkloadError)
WorkloadError, TargetError)
from wa.utils.log import log_error
from wa.utils.misc import isiterable
from wa.utils.types import identifier, enum, level
@ -263,6 +263,9 @@ class ManagedCallback(object):
def __call__(self, context):
if self.instrument.is_enabled:
try:
if not context.tm.is_responsive:
logger.debug("Target unreponsive; skipping callback {}".format(self.callback))
return
self.callback(context)
except (KeyboardInterrupt, TargetNotRespondingError, TimeoutError): # pylint: disable=W0703
raise
@ -274,6 +277,8 @@ class ManagedCallback(object):
context.add_event(e.message)
if isinstance(e, WorkloadError):
context.set_status('FAILED')
elif isinstance(e, TargetError) or isinstance(e, TimeoutError):
context.tm.verify_target_responsive()
else:
if context.current_job:
context.set_status('PARTIAL')

View File

@ -115,6 +115,9 @@ class Job(object):
self.run_time = datetime.utcnow() - start_time
def process_output(self, context):
if not context.tm.is_responsive:
self.logger.info('Target unresponsive; not processing job output.')
return
self.logger.info('Processing output for job {} [{}]'.format(self.id, self.iteration))
if self.status != Status.FAILED:
with signal.wrap('WORKLOAD_RESULT_EXTRACTION', self, context):
@ -124,11 +127,17 @@ class Job(object):
self.workload.update_output(context)
def teardown(self, context):
if not context.tm.is_responsive:
self.logger.info('Target unresponsive; not tearing down.')
return
self.logger.info('Tearing down job {} [{}]'.format(self.id, self.iteration))
with signal.wrap('WORKLOAD_TEARDOWN', self, context):
self.workload.teardown(context)
def finalize(self, context):
if not context.tm.is_responsive:
self.logger.info('Target unresponsive; not finalizing.')
return
self.logger.info('Finalizing job {} [{}]'.format(self.id, self.iteration))
with signal.wrap('WORKLOAD_FINALIZED', self, context):
self.workload.finalize(context)

View File

@ -1,7 +1,7 @@
import logging
from wa.framework import signal
from wa.framework.exception import ExecutionError, TargetError
from wa.framework.exception import ExecutionError, TargetError, TargetNotRespondingError
from wa.framework.plugin import Parameter
from wa.framework.target.descriptor import (get_target_description,
instantiate_target,
@ -90,8 +90,9 @@ class TargetManager(object):
self.logger.info('Target unresponsive; performing hard reset')
self.target.reboot(hard=True)
self.is_responsive = True
raise ExecutionError('Target became unresponsive but was recovered.')
else:
raise ExecutionError('Target unresponsive and hard reset not supported; bailing.')
raise TargetNotRespondingError('Target unresponsive and hard reset not supported; bailing.')
def _init_target(self):
tdesc = get_target_description(self.target_name)