mirror of
https://github.com/ARM-software/workload-automation.git
synced 2025-01-18 20:11:20 +00:00
core,execution: Add run skipping on job failure
Add a global configuration parameter ``bail_on_job_failure`` that allows all remaining jobs in a run to be skipped should a job fail its initial execution and its retries. This is by default disabled.
This commit is contained in:
parent
66e220d444
commit
971289698b
@ -725,6 +725,17 @@ class RunConfiguration(Configuration):
|
|||||||
failed, but continue attempting to run others.
|
failed, but continue attempting to run others.
|
||||||
'''
|
'''
|
||||||
),
|
),
|
||||||
|
ConfigurationPoint(
|
||||||
|
'bail_on_job_failure',
|
||||||
|
kind=bool,
|
||||||
|
default=False,
|
||||||
|
description='''
|
||||||
|
When a job fails during its run phase, WA will attempt to retry the
|
||||||
|
job, then continue with remaining jobs after. Setting this to
|
||||||
|
``True`` means WA will skip remaining jobs and end the run if a job
|
||||||
|
has retried the maximum number of times, and still fails.
|
||||||
|
'''
|
||||||
|
),
|
||||||
ConfigurationPoint(
|
ConfigurationPoint(
|
||||||
'allow_phone_home',
|
'allow_phone_home',
|
||||||
kind=bool, default=True,
|
kind=bool, default=True,
|
||||||
|
@ -25,7 +25,7 @@ from datetime import datetime
|
|||||||
import wa.framework.signal as signal
|
import wa.framework.signal as signal
|
||||||
from wa.framework import instrument as instrumentation
|
from wa.framework import instrument as instrumentation
|
||||||
from wa.framework.configuration.core import Status
|
from wa.framework.configuration.core import Status
|
||||||
from wa.framework.exception import TargetError, HostError, WorkloadError
|
from wa.framework.exception import TargetError, HostError, WorkloadError, ExecutionError
|
||||||
from wa.framework.exception import TargetNotRespondingError, TimeoutError # pylint: disable=redefined-builtin
|
from wa.framework.exception import TargetNotRespondingError, TimeoutError # pylint: disable=redefined-builtin
|
||||||
from wa.framework.job import Job
|
from wa.framework.job import Job
|
||||||
from wa.framework.output import init_job_output
|
from wa.framework.output import init_job_output
|
||||||
@ -657,6 +657,9 @@ class Runner(object):
|
|||||||
self.logger.error(msg.format(job.id, job.iteration, job.status))
|
self.logger.error(msg.format(job.id, job.iteration, job.status))
|
||||||
self.context.failed_jobs += 1
|
self.context.failed_jobs += 1
|
||||||
self.send(signal.JOB_FAILED)
|
self.send(signal.JOB_FAILED)
|
||||||
|
if rc.bail_on_job_failure:
|
||||||
|
raise ExecutionError('Job {} failed, bailing.'.format(job.id))
|
||||||
|
|
||||||
else: # status not in retry_on_status
|
else: # status not in retry_on_status
|
||||||
self.logger.info('Job completed with status {}'.format(job.status))
|
self.logger.info('Job completed with status {}'.format(job.status))
|
||||||
if job.status != 'ABORTED':
|
if job.status != 'ABORTED':
|
||||||
|
Loading…
x
Reference in New Issue
Block a user