1
0
mirror of https://github.com/ARM-software/workload-automation.git synced 2024-10-06 10:51:13 +01:00
workload-automation/wa/utils/diff.py
Sergei Trofimov 64f9cf79e4 utils/diff: fix diff_interrupt_files on Python 2
During Python 3 migration, the izip call inside diff_interrupt_files has
been replaced with a zip call (zip returning an iterator in Python 3).
Import zip from builtins to ensure that it also produces an iterator in
Python 2.

Also fix the associated unit test by importing the correct function.
2018-06-14 15:04:56 +01:00

88 lines
3.9 KiB
Python

import os
import re
from builtins import zip
from future.moves.itertools import zip_longest
from wa.utils.misc import as_relative, diff_tokens, write_table
from wa.utils.misc import ensure_file_directory_exists as _f
from wa.utils.misc import ensure_directory_exists as _d
def diff_interrupt_files(before, after, result): # pylint: disable=R0914
output_lines = []
with open(before) as bfh:
with open(after) as ofh:
for bline, aline in zip(bfh, ofh):
bchunks = bline.strip().split()
while True:
achunks = aline.strip().split()
if achunks[0] == bchunks[0]:
diffchunks = ['']
diffchunks.append(achunks[0])
diffchunks.extend([diff_tokens(b, a) for b, a
in zip(bchunks[1:], achunks[1:])])
output_lines.append(diffchunks)
break
else: # new category appeared in the after file
diffchunks = ['>'] + achunks
output_lines.append(diffchunks)
try:
aline = next(ofh)
except StopIteration:
break
# Offset heading columns by one to allow for row labels on subsequent
# lines.
output_lines[0].insert(0, '')
# Any "columns" that do not have headings in the first row are not actually
# columns -- they are a single column where space-spearated words got
# split. Merge them back together to prevent them from being
# column-aligned by write_table.
table_rows = [output_lines[0]]
num_cols = len(output_lines[0])
for row in output_lines[1:]:
table_row = row[:num_cols]
table_row.append(' '.join(row[num_cols:]))
table_rows.append(table_row)
with open(result, 'w') as wfh:
write_table(table_rows, wfh)
def diff_sysfs_dirs(before, after, result): # pylint: disable=R0914
before_files = []
for root, dirs, files in os.walk(before):
before_files.extend([os.path.join(root, f) for f in files])
before_files = list(filter(os.path.isfile, before_files))
files = [os.path.relpath(f, before) for f in before_files]
after_files = [os.path.join(after, f) for f in files]
diff_files = [os.path.join(result, f) for f in files]
for bfile, afile, dfile in zip(before_files, after_files, diff_files):
if not os.path.isfile(afile):
logger.debug('sysfs_diff: {} does not exist or is not a file'.format(afile))
continue
with open(bfile) as bfh, open(afile) as afh: # pylint: disable=C0321
with open(_f(dfile), 'w') as dfh:
for i, (bline, aline) in enumerate(zip_longest(bfh, afh), 1):
if aline is None:
logger.debug('Lines missing from {}'.format(afile))
break
bchunks = re.split(r'(\W+)', bline)
achunks = re.split(r'(\W+)', aline)
if len(bchunks) != len(achunks):
logger.debug('Token length mismatch in {} on line {}'.format(bfile, i))
dfh.write('xxx ' + bline)
continue
if ((len([c for c in bchunks if c.strip()]) == len([c for c in achunks if c.strip()]) == 2) and
(bchunks[0] == achunks[0])):
# if there are only two columns and the first column is the
# same, assume it's a "header" column and do not diff it.
dchunks = [bchunks[0]] + [diff_tokens(b, a) for b, a in zip(bchunks[1:], achunks[1:])]
else:
dchunks = [diff_tokens(b, a) for b, a in zip(bchunks, achunks)]
dfh.write(''.join(dchunks))