1
0
mirror of https://github.com/ARM-software/workload-automation.git synced 2025-09-03 11:52:36 +01:00

Handle retry failed

This commit is contained in:
Sergei Trofimov
2017-03-09 17:39:44 +00:00
parent 547ae1c10e
commit ccdc3492e7
8 changed files with 159 additions and 88 deletions

View File

@@ -147,6 +147,11 @@ class ExecutionContext(object):
self.completed_jobs.append(self.current_job)
self.current_job = None
def move_failed(self, job):
attempt = job.retries + 1
failed_name = '{}-attempt{:02}'.format(job.output_name, attempt)
self.output.move_failed(job.output_name, failed_name)
class OldExecutionContext(object):
"""
@@ -439,8 +444,28 @@ class Runner(object):
def run_next_job(self, context):
job = context.start_job()
self.logger.info('Running job {}'.format(job.id))
try:
log.indent()
self.do_run_job(job, context)
except KeyboardInterrupt:
job.status = JobStatus.ABORTED
raise
except Exception as e:
job.status = JobStatus.FAILED
if not getattr(e, 'logged', None):
log.log_error(e, self.logger)
e.logged = True
finally:
self.logger.info('Completing job {}'.format(job.id))
self.send(signal.JOB_COMPLETED)
context.end_job()
log.dedent()
self.check_job(job)
def do_run_job(self, job, context):
job.status = JobStatus.RUNNING
log.indent()
self.send(signal.JOB_STARTED)
with signal.wrap('JOB_TARGET_CONFIG', self):
@@ -455,7 +480,7 @@ class Runner(object):
try:
with signal.wrap('JOB_OUTPUT_PROCESSED', self):
job.run(context)
job.process_output(context)
except Exception:
job.status = JobStatus.PARTIAL
raise
@@ -474,10 +499,22 @@ class Runner(object):
with signal.wrap('JOB_TEARDOWN', self):
job.teardown(context)
log.dedent()
self.logger.info('Completing job {}'.format(job.id))
self.send(signal.JOB_COMPLETED)
context.end_job()
def check_job(self, job):
rc = self.context.cm.run_config
if job.status in rc.retry_on_status:
if job.retries < rc.max_retries:
msg = 'Job {} iteration {} complted with status {}. retrying...'
self.logger.error(msg.format(job.id, job.status, job.iteration))
self.context.move_failed(job)
job.retries += 1
job.status = JobStatus.PENDING
self.context.job_queue.insert(0, job)
else:
msg = 'Job {} iteration {} completed with status {}. '\
'Max retries exceeded.'
self.logger.error(msg.format(job.id, job.status, job.iteration))
else: # status not in retry_on_status
self.logger.info('Job completed with status {}'.format(job.status))
def send(self, s):
signal.send(s, self, self.context)