From 6f629601be418c8bd9fc7672a2a130526d92a8e0 Mon Sep 17 00:00:00 2001
From: Sergei Trofimov <sergei.trofimov@arm.com>
Date: Thu, 20 Jul 2017 09:32:48 +0100
Subject: [PATCH 1/3] utils/trace_cmd: lazy evaluation of event fields

Parsing if a trace events body text into fields is potentially
expensive, so only do it if the fields are being accessed.
---
 wlauto/utils/trace_cmd.py | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/wlauto/utils/trace_cmd.py b/wlauto/utils/trace_cmd.py
index 7a7b36d1..d5b7556c 100644
--- a/wlauto/utils/trace_cmd.py
+++ b/wlauto/utils/trace_cmd.py
@@ -41,7 +41,22 @@ class TraceCmdEvent(object):
 
     """
 
-    __slots__ = ['thread', 'reporting_cpu_id', 'timestamp', 'name', 'text', 'fields']
+    __slots__ = ['thread', 'reporting_cpu_id', 'timestamp', 'name', 'text', '_fields', '_parser']
+
+    @property
+    def fields(self):
+        if self._fields is not None:
+            return self._fields
+        self._fields = {}
+
+        if self._parser:
+            try:
+                self._parser(self, self.text)
+            except Exception:  # pylint: disable=broad-except
+                # unknown format assume user does not care or know how to
+                # parse self.text
+                pass
+        return self._fields
 
     def __init__(self, thread, cpu_id, ts, name, body, parser=None):
         """
@@ -70,15 +85,8 @@ class TraceCmdEvent(object):
         self.timestamp = numeric(ts)
         self.name = name
         self.text = body
-        self.fields = {}
-
-        if parser:
-            try:
-                parser(self, self.text)
-            except Exception:  # pylint: disable=broad-except
-                # unknown format assume user does not care or know how to
-                # parse self.text
-                pass
+        self._fields = None
+        self._parser = parser
 
     def __getattr__(self, name):
         try:
@@ -143,7 +151,7 @@ def default_body_parser(event, text):
                 v = int(v)
             except ValueError:
                 pass
-            event.fields[k] = v
+            event._fields[k] = v
 
 
 def regex_body_parser(regex, flags=0):
@@ -166,9 +174,9 @@ def regex_body_parser(regex, flags=0):
         if match:
             for k, v in match.groupdict().iteritems():
                 try:
-                    event.fields[k] = int(v)
+                    event._fields[k] = int(v)
                 except ValueError:
-                    event.fields[k] = v
+                    event._fields[k] = v
 
     return regex_parser_func
 

From 93dba8b35dd98b53cf46e5fc072084f4761d0a24 Mon Sep 17 00:00:00 2001
From: Sergei Trofimov <sergei.trofimov@arm.com>
Date: Thu, 20 Jul 2017 10:55:38 +0100
Subject: [PATCH 2/3] utils/trace_cmd: optimize event parsing

Use string searching and splitting instead of regex matching to parse
trace events. This significantly speeds up trace parsing.
---
 wlauto/utils/trace_cmd.py | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/wlauto/utils/trace_cmd.py b/wlauto/utils/trace_cmd.py
index d5b7556c..c2b655ce 100644
--- a/wlauto/utils/trace_cmd.py
+++ b/wlauto/utils/trace_cmd.py
@@ -232,9 +232,6 @@ EVENT_PARSER_MAP = {
     'sched_wakeup_new': sched_wakeup_parser,
 }
 
-TRACE_EVENT_REGEX = re.compile(r'^\s+(?P<thread>\S+.*?\S+)\s+\[(?P<cpu_id>\d+)\]\s+(?P<ts>[\d.]+):\s+'
-                               r'(?P<name>[^:]+):\s+(?P<body>.*?)\s*$')
-
 HEADER_REGEX = re.compile(r'^\s*(?:version|cpus)\s*=\s*([\d.]+)\s*$')
 
 DROPPED_EVENTS_REGEX = re.compile(r'CPU:(?P<cpu_id>\d+) \[\d*\s*EVENTS DROPPED\]')
@@ -292,12 +289,12 @@ class TraceCmdTrace(object):
                 if matched:
                     continue
 
-                match = TRACE_EVENT_REGEX.search(line)
-                if not match:
-                    logger.warning('Invalid trace event: "{}"'.format(line))
+                # <thread/cpu/timestamp>: <event name>: <event body>
+                parts = line.split(': ', 2)
+                if len(parts) != 3:
                     continue
 
-                event_name = match.group('name')
+                event_name = parts[1].strip()
 
                 if filters:
                     found = False
@@ -308,10 +305,22 @@ class TraceCmdTrace(object):
                     if not found:
                         continue
 
+                thread_string, rest = parts[0].split(' [')
+                cpu_id, ts_string = rest.split('] ')
+                body = parts[2].strip()
+
                 body_parser = EVENT_PARSER_MAP.get(event_name, default_body_parser)
                 if isinstance(body_parser, basestring) or isinstance(body_parser, re._pattern_type):  # pylint: disable=protected-access
                     body_parser = regex_body_parser(body_parser)
-                yield TraceCmdEvent(parser=body_parser, **match.groupdict())
+
+                yield TraceCmdEvent(
+                    thread=thread_string.strip(),
+                    cpu_id=cpu_id,
+                    ts=ts_string.strip(),
+                    name=event_name,
+                    body=body,
+                    parser=body_parser,
+                )
             else:
                 if self.filter_markers and inside_marked_region:
                     logger.warning('Did not encounter a stop marker in trace')

From 33874a8f71364cc99d6d67636cbd6ddff8e202e2 Mon Sep 17 00:00:00 2001
From: Sergei Trofimov <sergei.trofimov@arm.com>
Date: Thu, 20 Jul 2017 11:01:14 +0100
Subject: [PATCH 3/3] utils/trace_cmd: reduce regex use

Do not attempt to regex match each line for dropped events/preamble. Use
substring search to detect them first, and only use regex on relevant
lines.
---
 wlauto/utils/trace_cmd.py | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/wlauto/utils/trace_cmd.py b/wlauto/utils/trace_cmd.py
index c2b655ce..77556b35 100644
--- a/wlauto/utils/trace_cmd.py
+++ b/wlauto/utils/trace_cmd.py
@@ -274,20 +274,23 @@ class TraceCmdTrace(object):
                     elif TRACE_MARKER_STOP in line:
                         break
 
-                match = DROPPED_EVENTS_REGEX.search(line)
-                if match:
-                    yield DroppedEventsEvent(match.group('cpu_id'))
-                    continue
-
-                matched = False
-                for rx in [HEADER_REGEX, EMPTY_CPU_REGEX]:
-                    match = rx.search(line)
+                if 'EVENTS DROPPED' in line:
+                    match = DROPPED_EVENTS_REGEX.search(line)
                     if match:
-                        logger.debug(line.strip())
-                        matched = True
-                        break
-                if matched:
-                    continue
+                        yield DroppedEventsEvent(match.group('cpu_id'))
+                        continue
+
+                if line.startswith('version') or line.startswith('cpus') or\
+                        line.startswith('CPU:'):
+                    matched = False
+                    for rx in [HEADER_REGEX, EMPTY_CPU_REGEX]:
+                        match = rx.search(line)
+                        if match:
+                            logger.debug(line.strip())
+                            matched = True
+                            break
+                    if matched:
+                        continue
 
                 # <thread/cpu/timestamp>: <event name>: <event body>
                 parts = line.split(': ', 2)