mirror of
https://github.com/ARM-software/workload-automation.git
synced 2025-09-03 20:02:39 +01:00
Added retries
Failed jobs will now be automatically retired. This is controlled by two new settings: retry_on_status - a list of statuses which will be consided failures and result in a retry max_retries - number of retries before giving up
This commit is contained in:
@@ -308,6 +308,12 @@ def _combine_ids(*args):
|
||||
return '_'.join(args)
|
||||
|
||||
|
||||
class status_list(list):
|
||||
|
||||
def append(self, item):
|
||||
list.append(self, str(item).upper())
|
||||
|
||||
|
||||
class RunConfiguration(object):
|
||||
"""
|
||||
Loads and maintains the unified configuration for this run. This includes configuration
|
||||
@@ -470,6 +476,8 @@ class RunConfiguration(object):
|
||||
RunConfigurationItem('reboot_policy', 'scalar', 'replace'),
|
||||
RunConfigurationItem('device', 'scalar', 'replace'),
|
||||
RunConfigurationItem('flashing_config', 'dict', 'replace'),
|
||||
RunConfigurationItem('retry_on_status', 'list', 'replace'),
|
||||
RunConfigurationItem('max_retries', 'scalar', 'replace'),
|
||||
]
|
||||
|
||||
# Configuration specified for each workload spec. "workload_parameters"
|
||||
@@ -523,6 +531,8 @@ class RunConfiguration(object):
|
||||
self.workload_specs = []
|
||||
self.flashing_config = {}
|
||||
self.other_config = {} # keeps track of used config for extensions other than of the four main kinds.
|
||||
self.retry_on_status = status_list(['FAILED', 'PARTIAL'])
|
||||
self.max_retries = 3
|
||||
self._used_config_items = []
|
||||
self._global_instrumentation = []
|
||||
self._reboot_policy = None
|
||||
|
@@ -401,8 +401,9 @@ class RunnerJob(object):
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, spec):
|
||||
def __init__(self, spec, retry=0):
|
||||
self.spec = spec
|
||||
self.retry = retry
|
||||
self.iteration = None
|
||||
self.result = IterationResult(self.spec)
|
||||
|
||||
@@ -423,6 +424,10 @@ class Runner(object):
|
||||
"""Internal runner error."""
|
||||
pass
|
||||
|
||||
@property
|
||||
def config(self):
|
||||
return self.context.config
|
||||
|
||||
@property
|
||||
def current_job(self):
|
||||
if self.job_queue:
|
||||
@@ -623,8 +628,16 @@ class Runner(object):
|
||||
|
||||
def _finalize_job(self):
|
||||
self.context.run_result.iteration_results.append(self.current_job.result)
|
||||
self.job_queue[0].iteration = self.context.current_iteration
|
||||
self.completed_jobs.append(self.job_queue.pop(0))
|
||||
job = self.job_queue.pop(0)
|
||||
job.iteration = self.context.current_iteration
|
||||
if job.result.status in self.config.retry_on_status:
|
||||
if job.retry >= self.config.max_retries:
|
||||
self.logger.error('Exceeded maxium number of retries. Abandoning job.')
|
||||
else:
|
||||
self.logger.info('Job status was {}. Retrying...'.format(job.result.status))
|
||||
retry_job = RunnerJob(job.spec, job.retry + 1)
|
||||
self.job_queue.insert(0, retry_job)
|
||||
self.completed_jobs.append(job)
|
||||
self.context.end_job()
|
||||
|
||||
def _finalize_run(self):
|
||||
|
Reference in New Issue
Block a user