mirror of
https://github.com/ARM-software/workload-automation.git
synced 2025-02-20 20:09:11 +00:00
fw/execution: Handle unresponsive targets
If a target error occurs, check whether the target is unresponsive. If it is, attempt to hard reset it if possible, or gracefully terminate execution if not.
This commit is contained in:
parent
fdb872d9cd
commit
6fe31d6cad
@ -22,7 +22,8 @@ from datetime import datetime
|
||||
import wa.framework.signal as signal
|
||||
from wa.framework import instrument
|
||||
from wa.framework.configuration.core import Status
|
||||
from wa.framework.exception import HostError, WorkloadError
|
||||
from wa.framework.exception import TargetError, HostError, WorkloadError,\
|
||||
TargetNotRespondingError, TimeoutError
|
||||
from wa.framework.job import Job
|
||||
from wa.framework.output import init_job_output
|
||||
from wa.framework.output_processor import ProcessorManager
|
||||
@ -375,17 +376,18 @@ class Runner(object):
|
||||
self.send(signal.RUN_INITIALIZED)
|
||||
|
||||
while self.context.job_queue:
|
||||
try:
|
||||
with signal.wrap('JOB_EXECUTION', self, self.context):
|
||||
self.run_next_job(self.context)
|
||||
except KeyboardInterrupt:
|
||||
self.context.skip_remaining_jobs()
|
||||
with signal.wrap('JOB_EXECUTION', self, self.context):
|
||||
self.run_next_job(self.context)
|
||||
|
||||
except KeyboardInterrupt as e:
|
||||
log.log_error(e, self.logger)
|
||||
self.logger.info('Skipping remaining jobs.')
|
||||
self.context.skip_remaining_jobs()
|
||||
except Exception as e:
|
||||
self.context.add_event(e.message)
|
||||
if (not getattr(e, 'logged', None) and
|
||||
not isinstance(e, KeyboardInterrupt)):
|
||||
log.log_error(e, self.logger)
|
||||
e.logged = True
|
||||
message = e.message if e.message else str(e)
|
||||
log.log_error(e, self.logger)
|
||||
self.logger.error('Skipping remaining jobs due to "{}".'.format(e))
|
||||
self.context.skip_remaining_jobs()
|
||||
raise e
|
||||
finally:
|
||||
self.finalize_run()
|
||||
@ -429,6 +431,10 @@ class Runner(object):
|
||||
if not getattr(e, 'logged', None):
|
||||
log.log_error(e, self.logger)
|
||||
e.logged = True
|
||||
if isinstance(e, ExecutionError):
|
||||
raise e
|
||||
elif isinstance(e, TargetError):
|
||||
context.tm.verify_target_responsive()
|
||||
finally:
|
||||
self.logger.info('Completing job {}'.format(job.id))
|
||||
self.send(signal.JOB_COMPLETED)
|
||||
@ -467,6 +473,8 @@ class Runner(object):
|
||||
if not getattr(e, 'logged', None):
|
||||
log.log_error(e, self.logger)
|
||||
e.logged = True
|
||||
if isinstance(e, TargetError) or isinstance(e, TimeoutError):
|
||||
context.tm.verify_target_responsive()
|
||||
raise e
|
||||
finally:
|
||||
try:
|
||||
@ -474,8 +482,10 @@ class Runner(object):
|
||||
job.process_output(context)
|
||||
self.pm.process_job_output(context)
|
||||
self.pm.export_job_output(context)
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
job.set_status(Status.PARTIAL)
|
||||
if isinstance(e, TargetError) or isinstance(e, TimeoutError):
|
||||
context.tm.verify_target_responsive()
|
||||
raise
|
||||
|
||||
except KeyboardInterrupt:
|
||||
|
@ -105,7 +105,7 @@ from collections import OrderedDict
|
||||
from wa.framework import signal
|
||||
from wa.framework.plugin import Plugin
|
||||
from wa.framework.exception import (WAError, TargetNotRespondingError, TimeoutError,
|
||||
WorkloadError)
|
||||
WorkloadError, TargetError)
|
||||
from wa.utils.log import log_error
|
||||
from wa.utils.misc import isiterable
|
||||
from wa.utils.types import identifier, enum, level
|
||||
@ -263,6 +263,9 @@ class ManagedCallback(object):
|
||||
def __call__(self, context):
|
||||
if self.instrument.is_enabled:
|
||||
try:
|
||||
if not context.tm.is_responsive:
|
||||
logger.debug("Target unreponsive; skipping callback {}".format(self.callback))
|
||||
return
|
||||
self.callback(context)
|
||||
except (KeyboardInterrupt, TargetNotRespondingError, TimeoutError): # pylint: disable=W0703
|
||||
raise
|
||||
@ -274,6 +277,8 @@ class ManagedCallback(object):
|
||||
context.add_event(e.message)
|
||||
if isinstance(e, WorkloadError):
|
||||
context.set_status('FAILED')
|
||||
elif isinstance(e, TargetError) or isinstance(e, TimeoutError):
|
||||
context.tm.verify_target_responsive()
|
||||
else:
|
||||
if context.current_job:
|
||||
context.set_status('PARTIAL')
|
||||
|
@ -115,6 +115,9 @@ class Job(object):
|
||||
self.run_time = datetime.utcnow() - start_time
|
||||
|
||||
def process_output(self, context):
|
||||
if not context.tm.is_responsive:
|
||||
self.logger.info('Target unresponsive; not processing job output.')
|
||||
return
|
||||
self.logger.info('Processing output for job {} [{}]'.format(self.id, self.iteration))
|
||||
if self.status != Status.FAILED:
|
||||
with signal.wrap('WORKLOAD_RESULT_EXTRACTION', self, context):
|
||||
@ -124,11 +127,17 @@ class Job(object):
|
||||
self.workload.update_output(context)
|
||||
|
||||
def teardown(self, context):
|
||||
if not context.tm.is_responsive:
|
||||
self.logger.info('Target unresponsive; not tearing down.')
|
||||
return
|
||||
self.logger.info('Tearing down job {} [{}]'.format(self.id, self.iteration))
|
||||
with signal.wrap('WORKLOAD_TEARDOWN', self, context):
|
||||
self.workload.teardown(context)
|
||||
|
||||
def finalize(self, context):
|
||||
if not context.tm.is_responsive:
|
||||
self.logger.info('Target unresponsive; not finalizing.')
|
||||
return
|
||||
self.logger.info('Finalizing job {} [{}]'.format(self.id, self.iteration))
|
||||
with signal.wrap('WORKLOAD_FINALIZED', self, context):
|
||||
self.workload.finalize(context)
|
||||
|
@ -1,7 +1,7 @@
|
||||
import logging
|
||||
|
||||
from wa.framework import signal
|
||||
from wa.framework.exception import ExecutionError, TargetError
|
||||
from wa.framework.exception import ExecutionError, TargetError, TargetNotRespondingError
|
||||
from wa.framework.plugin import Parameter
|
||||
from wa.framework.target.descriptor import (get_target_description,
|
||||
instantiate_target,
|
||||
@ -90,8 +90,9 @@ class TargetManager(object):
|
||||
self.logger.info('Target unresponsive; performing hard reset')
|
||||
self.target.reboot(hard=True)
|
||||
self.is_responsive = True
|
||||
raise ExecutionError('Target became unresponsive but was recovered.')
|
||||
else:
|
||||
raise ExecutionError('Target unresponsive and hard reset not supported; bailing.')
|
||||
raise TargetNotRespondingError('Target unresponsive and hard reset not supported; bailing.')
|
||||
|
||||
def _init_target(self):
|
||||
tdesc = get_target_description(self.target_name)
|
||||
|
Loading…
x
Reference in New Issue
Block a user