1
0
mirror of https://github.com/ARM-software/workload-automation.git synced 2025-06-18 14:26:09 +01:00

Added retries

Failed jobs will now be automatically retired. This is controlled by two
new settings:

retry_on_status - a list of statuses which will be consided failures and
                  result in a retry
max_retries - number of retries before giving up
This commit is contained in:
Sergei Trofimov
2015-06-18 16:36:56 +01:00
parent 51c5ef1520
commit ccea63555c
5 changed files with 70 additions and 3 deletions

@ -401,8 +401,9 @@ class RunnerJob(object):
"""
def __init__(self, spec):
def __init__(self, spec, retry=0):
self.spec = spec
self.retry = retry
self.iteration = None
self.result = IterationResult(self.spec)
@ -423,6 +424,10 @@ class Runner(object):
"""Internal runner error."""
pass
@property
def config(self):
return self.context.config
@property
def current_job(self):
if self.job_queue:
@ -623,8 +628,16 @@ class Runner(object):
def _finalize_job(self):
self.context.run_result.iteration_results.append(self.current_job.result)
self.job_queue[0].iteration = self.context.current_iteration
self.completed_jobs.append(self.job_queue.pop(0))
job = self.job_queue.pop(0)
job.iteration = self.context.current_iteration
if job.result.status in self.config.retry_on_status:
if job.retry >= self.config.max_retries:
self.logger.error('Exceeded maxium number of retries. Abandoning job.')
else:
self.logger.info('Job status was {}. Retrying...'.format(job.result.status))
retry_job = RunnerJob(job.spec, job.retry + 1)
self.job_queue.insert(0, retry_job)
self.completed_jobs.append(job)
self.context.end_job()
def _finalize_run(self):