From 7e79eeb9cb81b93dc7a098d294c2ecbee7f69bde Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 25 Jan 2019 13:25:05 +0000 Subject: [PATCH] target: Robustify read_tree_values() target.read_tree_values() has several weaknesses. It doesn't support files with ':' in their name, and it fails when reading binary files. In essence, these limitations are cause by its fragile implementation based on grep in shutils. In order to robustify read_tree_values(), use tar and base64 to send the content of a tree to the host, which can then process it from there. In the process, read_tree_values() gains two new arguments: - decode_unicode: must be set to work text/utf-8 content; - strip_null_chars: must be set to remove '\00' chars from text files. Both are set to true by default to keep backward compatibility with the existing code. Suggested-by: Douglas Raillard Signed-off-by: Quentin Perret --- devlib/bin/scripts/shutils.in | 32 ++++++++++-------- devlib/target.py | 63 +++++++++++++++++++++++++++++------ 2 files changed, 71 insertions(+), 24 deletions(-) diff --git a/devlib/bin/scripts/shutils.in b/devlib/bin/scripts/shutils.in index 35213ef..37991a7 100755 --- a/devlib/bin/scripts/shutils.in +++ b/devlib/bin/scripts/shutils.in @@ -255,26 +255,32 @@ sched_get_kernel_attributes() { # Misc ################################################################################ -read_tree_values() { +read_tree_tgz_b64() { BASEPATH=$1 MAXDEPTH=$2 + TMPBASE=$3 if [ ! -e $BASEPATH ]; then echo "ERROR: $BASEPATH does not exist" exit 1 fi - PATHS=$($BUSYBOX find $BASEPATH -follow -maxdepth $MAXDEPTH) - i=0 - for path in $PATHS; do - i=$(expr $i + 1) - if [ $i -gt 1 ]; then - break; - fi + cd $TMPBASE + TMP_FOLDER=$($BUSYBOX realpath $($BUSYBOX mktemp -d XXXXXX)) + + # 'tar' doesn't work as expected on debugfs, so copy the tree first to + # workaround the issue + cd $BASEPATH + for CUR_FILE in $($BUSYBOX find . -follow -type f -maxdepth $MAXDEPTH); do + $BUSYBOX cp --parents $CUR_FILE $TMP_FOLDER/ 2> /dev/null done - if [ $i -gt 1 ]; then - $BUSYBOX grep -s '' $PATHS - fi + + cd $TMP_FOLDER + $BUSYBOX tar cz * | $BUSYBOX base64 + + # Clean-up the tmp folder since we won't need it any more + cd $TMPBASE + rm -rf $TMP_FOLDER } get_linux_system_id() { @@ -347,8 +353,8 @@ ftrace_get_function_stats) hotplug_online_all) hotplug_online_all ;; -read_tree_values) - read_tree_values $* +read_tree_tgz_b64) + read_tree_tgz_b64 $* ;; get_linux_system_id) get_linux_system_id $* diff --git a/devlib/target.py b/devlib/target.py index 475c166..e210693 100644 --- a/devlib/target.py +++ b/devlib/target.py @@ -13,6 +13,9 @@ # limitations under the License. # +import io +import base64 +import gzip import os import re import time @@ -684,23 +687,61 @@ class Target(object): timeout = duration + 10 self.execute('sleep {}'.format(duration), timeout=timeout) - def read_tree_values_flat(self, path, depth=1, check_exit_code=True): - command = 'read_tree_values {} {}'.format(quote(path), depth) + def read_tree_values_flat(self, path, depth=1, check_exit_code=True, + decode_unicode=True, strip_null_chars=True): + command = 'read_tree_tgz_b64 {} {} {}'.format(quote(path), depth, + quote(self.working_directory)) output = self._execute_util(command, as_root=self.is_rooted, check_exit_code=check_exit_code) - accumulator = defaultdict(list) - for entry in output.strip().split('\n'): - if ':' not in entry: - continue - path, value = entry.strip().split(':', 1) - accumulator[path].append(value) + result = {} + + # Unpack the archive in memory + tar_gz = base64.b64decode(output) + tar_gz_bytes = io.BytesIO(tar_gz) + tar_buf = gzip.GzipFile(fileobj=tar_gz_bytes).read() + tar_bytes = io.BytesIO(tar_buf) + with tarfile.open(fileobj=tar_bytes) as tar: + for member in tar.getmembers(): + try: + content_f = tar.extractfile(member) + # ignore exotic members like sockets + except Exception: + continue + # if it is a file and not a folder + if content_f: + content = content_f.read() + if decode_unicode: + try: + content = content.decode('utf-8').strip() + if strip_null_chars: + content = content.replace('\x00', '').strip() + except UnicodeDecodeError: + content = '' + + name = self.path.join(path, member.name) + result[name] = content - result = {k: '\n'.join(v).strip() for k, v in accumulator.items()} return result - def read_tree_values(self, path, depth=1, dictcls=dict, check_exit_code=True): - value_map = self.read_tree_values_flat(path, depth, check_exit_code) + def read_tree_values(self, path, depth=1, dictcls=dict, + check_exit_code=True, decode_unicode=True, + strip_null_chars=True): + """ + Reads the content of all files under a given tree + + :path: path to the tree + :depth: maximum tree depth to read + :dictcls: type of the dict used to store the results + :check_exit_code: raise an exception if the shutil command fails + :decode_unicode: decode the content of files as utf-8 + :strip_null_chars: remove '\x00' chars from the content of utf-8 + decoded files + + :returns: a tree-like dict with the content of files as leafs + """ + value_map = self.read_tree_values_flat(path, depth, check_exit_code, + decode_unicode, strip_null_chars) return _build_path_tree(value_map, path, self.path.sep, dictcls) # internal methods