From 6f629601be418c8bd9fc7672a2a130526d92a8e0 Mon Sep 17 00:00:00 2001 From: Sergei Trofimov <sergei.trofimov@arm.com> Date: Thu, 20 Jul 2017 09:32:48 +0100 Subject: [PATCH 1/3] utils/trace_cmd: lazy evaluation of event fields Parsing if a trace events body text into fields is potentially expensive, so only do it if the fields are being accessed. --- wlauto/utils/trace_cmd.py | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/wlauto/utils/trace_cmd.py b/wlauto/utils/trace_cmd.py index 7a7b36d1..d5b7556c 100644 --- a/wlauto/utils/trace_cmd.py +++ b/wlauto/utils/trace_cmd.py @@ -41,7 +41,22 @@ class TraceCmdEvent(object): """ - __slots__ = ['thread', 'reporting_cpu_id', 'timestamp', 'name', 'text', 'fields'] + __slots__ = ['thread', 'reporting_cpu_id', 'timestamp', 'name', 'text', '_fields', '_parser'] + + @property + def fields(self): + if self._fields is not None: + return self._fields + self._fields = {} + + if self._parser: + try: + self._parser(self, self.text) + except Exception: # pylint: disable=broad-except + # unknown format assume user does not care or know how to + # parse self.text + pass + return self._fields def __init__(self, thread, cpu_id, ts, name, body, parser=None): """ @@ -70,15 +85,8 @@ class TraceCmdEvent(object): self.timestamp = numeric(ts) self.name = name self.text = body - self.fields = {} - - if parser: - try: - parser(self, self.text) - except Exception: # pylint: disable=broad-except - # unknown format assume user does not care or know how to - # parse self.text - pass + self._fields = None + self._parser = parser def __getattr__(self, name): try: @@ -143,7 +151,7 @@ def default_body_parser(event, text): v = int(v) except ValueError: pass - event.fields[k] = v + event._fields[k] = v def regex_body_parser(regex, flags=0): @@ -166,9 +174,9 @@ def regex_body_parser(regex, flags=0): if match: for k, v in match.groupdict().iteritems(): try: - event.fields[k] = int(v) + event._fields[k] = int(v) except ValueError: - event.fields[k] = v + event._fields[k] = v return regex_parser_func From 93dba8b35dd98b53cf46e5fc072084f4761d0a24 Mon Sep 17 00:00:00 2001 From: Sergei Trofimov <sergei.trofimov@arm.com> Date: Thu, 20 Jul 2017 10:55:38 +0100 Subject: [PATCH 2/3] utils/trace_cmd: optimize event parsing Use string searching and splitting instead of regex matching to parse trace events. This significantly speeds up trace parsing. --- wlauto/utils/trace_cmd.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/wlauto/utils/trace_cmd.py b/wlauto/utils/trace_cmd.py index d5b7556c..c2b655ce 100644 --- a/wlauto/utils/trace_cmd.py +++ b/wlauto/utils/trace_cmd.py @@ -232,9 +232,6 @@ EVENT_PARSER_MAP = { 'sched_wakeup_new': sched_wakeup_parser, } -TRACE_EVENT_REGEX = re.compile(r'^\s+(?P<thread>\S+.*?\S+)\s+\[(?P<cpu_id>\d+)\]\s+(?P<ts>[\d.]+):\s+' - r'(?P<name>[^:]+):\s+(?P<body>.*?)\s*$') - HEADER_REGEX = re.compile(r'^\s*(?:version|cpus)\s*=\s*([\d.]+)\s*$') DROPPED_EVENTS_REGEX = re.compile(r'CPU:(?P<cpu_id>\d+) \[\d*\s*EVENTS DROPPED\]') @@ -292,12 +289,12 @@ class TraceCmdTrace(object): if matched: continue - match = TRACE_EVENT_REGEX.search(line) - if not match: - logger.warning('Invalid trace event: "{}"'.format(line)) + # <thread/cpu/timestamp>: <event name>: <event body> + parts = line.split(': ', 2) + if len(parts) != 3: continue - event_name = match.group('name') + event_name = parts[1].strip() if filters: found = False @@ -308,10 +305,22 @@ class TraceCmdTrace(object): if not found: continue + thread_string, rest = parts[0].split(' [') + cpu_id, ts_string = rest.split('] ') + body = parts[2].strip() + body_parser = EVENT_PARSER_MAP.get(event_name, default_body_parser) if isinstance(body_parser, basestring) or isinstance(body_parser, re._pattern_type): # pylint: disable=protected-access body_parser = regex_body_parser(body_parser) - yield TraceCmdEvent(parser=body_parser, **match.groupdict()) + + yield TraceCmdEvent( + thread=thread_string.strip(), + cpu_id=cpu_id, + ts=ts_string.strip(), + name=event_name, + body=body, + parser=body_parser, + ) else: if self.filter_markers and inside_marked_region: logger.warning('Did not encounter a stop marker in trace') From 33874a8f71364cc99d6d67636cbd6ddff8e202e2 Mon Sep 17 00:00:00 2001 From: Sergei Trofimov <sergei.trofimov@arm.com> Date: Thu, 20 Jul 2017 11:01:14 +0100 Subject: [PATCH 3/3] utils/trace_cmd: reduce regex use Do not attempt to regex match each line for dropped events/preamble. Use substring search to detect them first, and only use regex on relevant lines. --- wlauto/utils/trace_cmd.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/wlauto/utils/trace_cmd.py b/wlauto/utils/trace_cmd.py index c2b655ce..77556b35 100644 --- a/wlauto/utils/trace_cmd.py +++ b/wlauto/utils/trace_cmd.py @@ -274,20 +274,23 @@ class TraceCmdTrace(object): elif TRACE_MARKER_STOP in line: break - match = DROPPED_EVENTS_REGEX.search(line) - if match: - yield DroppedEventsEvent(match.group('cpu_id')) - continue - - matched = False - for rx in [HEADER_REGEX, EMPTY_CPU_REGEX]: - match = rx.search(line) + if 'EVENTS DROPPED' in line: + match = DROPPED_EVENTS_REGEX.search(line) if match: - logger.debug(line.strip()) - matched = True - break - if matched: - continue + yield DroppedEventsEvent(match.group('cpu_id')) + continue + + if line.startswith('version') or line.startswith('cpus') or\ + line.startswith('CPU:'): + matched = False + for rx in [HEADER_REGEX, EMPTY_CPU_REGEX]: + match = rx.search(line) + if match: + logger.debug(line.strip()) + matched = True + break + if matched: + continue # <thread/cpu/timestamp>: <event name>: <event body> parts = line.split(': ', 2)