From a2b0705ff0a9f3de642d837f5e9db8b096e96e6e Mon Sep 17 00:00:00 2001
From: Sergei Trofimov <sergei.trofimov@arm.com>
Date: Fri, 11 May 2018 16:21:45 +0100
Subject: [PATCH] processors: add cpustates

Add cpustates output processor. This is more-or-less a port of the
cpustates processor from WA, however there are some differences:

- Idle states are now tracked individually per-CPU. This will fix
  processing traces from targets that have different number of idle
  states on different clusters.
- Simplify the parameter list for report_power_stats:
    - Replace paths to individual report files with a path to a single
      directory. A subdirectory will be created under it which will
      contain all the reports.
    - Replace the individual bits bits of information about CPUs (core
      names, idle states, etc) with a list of CpuInfo objects.
- Clean up and simplify the code a bit:
    - Make all reports mandatory -- the marginal cost of generating
      an additional report is minimal compared to tracking power states
      in the first place.
    - Standardize the interface for Reporters and Reports.
- Rename some of the reports to something a bit more meaningful.
- The stand-alone command line interface is not ported for now, as it
  is now possible to run this offline on existing results using
  "wa process".
---
 wa/output_processors/cpustates.py | 151 +++++++
 wa/utils/cpustates.py             | 713 ++++++++++++++++++++++++++++++
 2 files changed, 864 insertions(+)
 create mode 100755 wa/output_processors/cpustates.py
 create mode 100755 wa/utils/cpustates.py

diff --git a/wa/output_processors/cpustates.py b/wa/output_processors/cpustates.py
new file mode 100755
index 00000000..938c2993
--- /dev/null
+++ b/wa/output_processors/cpustates.py
@@ -0,0 +1,151 @@
+#    Copyright 2015-2018 ARM Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import csv
+from collections import OrderedDict
+
+from wa import OutputProcessor, Parameter
+from wa.utils.types import list_of_strings
+from wa.utils.cpustates import report_power_stats
+
+
+def _get_cpustates_description():
+    """
+    Reuse the description for report_power_stats() but strip away it's
+    parameter docs, as they are not relevant to the OuputProcessor.
+    """
+    output_lines = []
+    lines = iter(report_power_stats.__doc__.split('\n'))
+    line = lines.next()
+    while True:
+        try:
+            if line.strip().startswith(':param'):
+                while line.strip():
+                    line = lines.next()
+            output_lines.append(line)
+            line = lines.next()
+        except StopIteration:
+            break
+    return '\n'.join(output_lines)
+
+
+class CpuStatesProcessor(OutputProcessor):
+
+    name = 'cpustates'
+
+    description = _get_cpustates_description()
+
+    parameters = [
+        Parameter('use_ratios', kind=bool, default=False,
+                  description="""
+                  By default proportional values will be reported as
+                  percentages, if this flag is enabled, they will be reported
+                  as ratios instead.
+                  """),
+        Parameter('no_idle', kind=bool, default=False,
+                  description="""
+                  Indicate that there will be no idle transitions in the trace.
+                  By default, a core will be reported as being in an "unknown"
+                  state until the first idle transtion for that core. Normally,
+                  this is not an issue, as cores are "nudged" as part of the
+                  setup to ensure that there is an idle transtion before the
+                  meassured region. However, if all idle states for the core
+                  have been disabled, or if the kernel does not have cpuidle,
+                  the nudge will not result in an idle transition, which would
+                  cause the cores to be reported to be in "unknown" state for
+                  the entire execution.
+
+                  If this parameter is set to ``True``, the processor will
+                  assume that cores are running prior to the begining of the
+                  issue, and they will leave unknown state on the first
+                  frequency transition.
+                  """),
+        Parameter('split_wfi_states', kind=bool, default=False,
+                  description="""
+                  WFI is a very shallow idle state. The core remains powered on
+                  when in this state, which means the power usage while in this
+                  state will depend on the current voltage, and therefore current
+                  frequency.
+
+                  Setting this to ``True`` will track time spent in WFI at
+                  each frequency separately, allowing to gain the most accurate
+                  picture of energy usage.
+                  """),
+    ]
+
+    def initialize(self):
+        self.iteration_reports = OrderedDict()
+
+    def process_job_output(self, output, target_info, run_output):
+        trace_file = output.get_artifact_path('trace-cmd-txt')
+        if not trace_file:
+            self.logger.warning('Text trace does not appear to have been generated; skipping this iteration.')
+            return
+
+        self.logger.info('Generating power state reports from trace...')
+        reports = report_power_stats(  # pylint: disable=unbalanced-tuple-unpacking
+            trace_file=trace_file,
+            output_basedir=output.basepath,
+            cpus=target_info.cpus,
+            use_ratios=self.use_ratios,
+            no_idle=self.no_idle,
+            split_wfi_states=self.split_wfi_states,
+        )
+
+        for report in reports.itervalues():
+            output.add_artifact(report.name, report.filepath, kind='data')
+
+        iteration_id = (output.id, output.label, output.iteration)
+        self.iteration_reports[iteration_id] = reports
+
+    def process_run_output(self, output, target_info):
+        if not self.iteration_reports:
+            self.logger.warning('No power state reports generated.')
+            return
+
+        parallel_rows = []
+        powerstate_rows = []
+        for iteration_id, reports in self.iteration_reports.iteritems():
+            job_id, workload, iteration = iteration_id
+            parallel_report = reports['parallel-stats']
+            powerstate_report = reports['power-state-stats']
+
+            for record in parallel_report.values:
+                parallel_rows.append([job_id, workload, iteration] + record)
+            for state in sorted(powerstate_report.state_stats):
+                stats = powerstate_report.state_stats[state]
+                powerstate_rows.append([job_id, workload, iteration, state] +
+                                       ['{:.3f}'.format(s if s is not None else 0)
+                                           for s in stats])
+
+        outpath = output.get_path('parallel-stats.csv')
+        with open(outpath, 'w') as wfh:
+            writer = csv.writer(wfh)
+            writer.writerow(['id', 'workload', 'iteration', 'cluster',
+                             'number_of_cores', 'total_time',
+                             '%time', '%running_time'])
+            writer.writerows(parallel_rows)
+        output.add_artifact('run-parallel-stats', outpath, kind='export')
+
+        outpath = output.get_path('power-state-stats.csv')
+        with open(outpath, 'w') as wfh:
+            writer = csv.writer(wfh)
+            headers = ['id', 'workload', 'iteration', 'state']
+            headers += ['{} CPU{}'.format(c, i)
+                        for i, c in enumerate(powerstate_report.core_names)]
+            writer.writerow(headers)
+            writer.writerows(powerstate_rows)
+        output.add_artifact('run-power-state-stats', outpath, kind='export')
diff --git a/wa/utils/cpustates.py b/wa/utils/cpustates.py
new file mode 100755
index 00000000..494b747e
--- /dev/null
+++ b/wa/utils/cpustates.py
@@ -0,0 +1,713 @@
+#    Copyright 2015-2018 ARM Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import division
+import os
+import sys
+import csv
+import re
+import logging
+from ctypes import c_int32
+from collections import defaultdict
+import argparse
+
+from wa.utils.trace_cmd import TraceCmdParser, trace_has_marker, TRACE_MARKER_START, TRACE_MARKER_STOP
+
+
+logger = logging.getLogger('cpustates')
+
+INIT_CPU_FREQ_REGEX = re.compile(r'CPU (?P<cpu>\d+) FREQUENCY: (?P<freq>\d+) kHZ')
+DEVLIB_CPU_FREQ_REGEX = re.compile(r'cpu_frequency(?:_devlib):\s+state=(?P<freq>\d+)\s+cpu_id=(?P<cpu>\d+)')
+
+
+class CorePowerTransitionEvent(object):
+
+    kind = 'transition'
+    __slots__ = ['timestamp', 'cpu_id', 'frequency', 'idle_state']
+
+    def __init__(self, timestamp, cpu_id, frequency=None, idle_state=None):
+        if (frequency is None) == (idle_state is None):
+            raise ValueError('Power transition must specify a frequency or an idle_state, but not both.')
+        self.timestamp = timestamp
+        self.cpu_id = cpu_id
+        self.frequency = frequency
+        self.idle_state = idle_state
+
+    def __str__(self):
+        return 'cpu {} @ {} -> freq: {} idle: {}'.format(self.cpu_id, self.timestamp,
+                                                         self.frequency, self.idle_state)
+
+    def __repr__(self):
+        return 'CPTE(c:{} t:{} f:{} i:{})'.format(self.cpu_id, self.timestamp,
+                                                  self.frequency, self.idle_state)
+
+
+class CorePowerDroppedEvents(object):
+
+    kind = 'dropped_events'
+    __slots__ = ['cpu_id']
+
+    def __init__(self, cpu_id):
+        self.cpu_id = cpu_id
+
+    def __str__(self):
+        return 'DROPPED EVENTS on CPU{}'.format(self.cpu_id)
+
+    __repr__ = __str__
+
+
+class TraceMarkerEvent(object):
+
+    kind = 'marker'
+    __slots__ = ['name']
+
+    def __init__(self, name):
+        self.name = name
+
+    def __str__(self):
+        return 'MARKER: {}'.format(self.name)
+
+
+class CpuPowerState(object):
+
+    __slots__ = ['frequency', 'idle_state']
+
+    @property
+    def is_idling(self):
+        return self.idle_state is not None and self.idle_state >= 0
+
+    @property
+    def is_active(self):
+        return self.idle_state == -1
+
+    def __init__(self, frequency=None, idle_state=None):
+        self.frequency = frequency
+        self.idle_state = idle_state
+
+    def __str__(self):
+        return 'CP(f:{} i:{})'.format(self.frequency, self.idle_state)
+
+    __repr__ = __str__
+
+
+class SystemPowerState(object):
+
+    __slots__ = ['timestamp', 'cpus']
+
+    @property
+    def num_cores(self):
+        return len(self.cpus)
+
+    def __init__(self, num_cores, no_idle=False):
+        self.timestamp = None
+        self.cpus = []
+        idle_state = -1 if no_idle else None
+        for _ in xrange(num_cores):
+            self.cpus.append(CpuPowerState(idle_state=idle_state))
+
+    def copy(self):
+        new = SystemPowerState(self.num_cores)
+        new.timestamp = self.timestamp
+        for i, c in enumerate(self.cpus):
+            new.cpus[i].frequency = c.frequency
+            new.cpus[i].idle_state = c.idle_state
+        return new
+
+    def __str__(self):
+        return 'SP(t:{} Cs:{})'.format(self.timestamp, self.cpus)
+
+    __repr__ = __str__
+
+
+class PowerStateProcessor(object):
+    """
+    This takes a stream of power transition events and yields a timeline stream
+    of system power states.
+
+    """
+
+    @property
+    def cpu_states(self):
+        return self.power_state.cpus
+
+    @property
+    def current_time(self):
+        return self.power_state.timestamp
+
+    @current_time.setter
+    def current_time(self, value):
+        self.power_state.timestamp = value
+
+    def __init__(self, cpus, wait_for_marker=True, no_idle=None):
+        if no_idle is None:
+            no_idle = True if cpus[0].cpuidle else False
+        self.power_state = SystemPowerState(len(cpus), no_idle=no_idle)
+        self.requested_states = {}  # cpu_id -> requeseted state
+        self.wait_for_marker = wait_for_marker
+        self._saw_start_marker = False
+        self._saw_stop_marker = False
+        self.exceptions = []
+
+        self.idle_related_cpus = build_idle_state_map(cpus)
+
+    def process(self, event_stream):
+        for event in event_stream:
+            try:
+                next_state = self.update_power_state(event)
+                if self._saw_start_marker or not self.wait_for_marker:
+                    yield next_state
+                if self._saw_stop_marker:
+                    break
+            except Exception as e:  # pylint: disable=broad-except
+                self.exceptions.append(e)
+        else:
+            if self.wait_for_marker:
+                logger.warning("Did not see a STOP marker in the trace")
+
+    def update_power_state(self, event):
+        """
+        Update the tracked power state based on the specified event and
+        return updated power state.
+
+        """
+        if event.kind == 'transition':
+            self._process_transition(event)
+        elif event.kind == 'dropped_events':
+            self._process_dropped_events(event)
+        elif event.kind == 'marker':
+            if event.name == 'START':
+                self._saw_start_marker = True
+            elif event.name == 'STOP':
+                self._saw_stop_marker = True
+        else:
+            raise ValueError('Unexpected event type: {}'.format(event.kind))
+        return self.power_state.copy()
+
+    def _process_transition(self, event):
+        self.current_time = event.timestamp
+        if event.idle_state is None:
+            self.cpu_states[event.cpu_id].frequency = event.frequency
+        else:
+            if event.idle_state == -1:
+                self._process_idle_exit(event)
+            else:
+                self._process_idle_entry(event)
+
+    def _process_dropped_events(self, event):
+        self.cpu_states[event.cpu_id].frequency = None
+        old_idle_state = self.cpu_states[event.cpu_id].idle_state
+        self.cpu_states[event.cpu_id].idle_state = None
+
+        related_ids = self.idle_related_cpus[(event.cpu_id, old_idle_state)]
+        for rid in related_ids:
+            self.cpu_states[rid].idle_state = None
+
+    def _process_idle_entry(self, event):
+        if self.cpu_states[event.cpu_id].is_idling:
+            raise ValueError('Got idle state entry event for an idling core: {}'.format(event))
+        self.requested_states[event.cpu_id] = event.idle_state
+        self._try_transition_to_idle_state(event.cpu_id, event.idle_state)
+
+    def _process_idle_exit(self, event):
+        if self.cpu_states[event.cpu_id].is_active:
+            raise ValueError('Got idle state exit event for an active core: {}'.format(event))
+        self.requested_states.pop(event.cpu_id, None)  # remove outstanding request if there is one
+        old_state = self.cpu_states[event.cpu_id].idle_state
+        self.cpu_states[event.cpu_id].idle_state = -1
+
+        related_ids = self.idle_related_cpus[(event.cpu_id, old_state)]
+        if old_state is not None:
+            new_state = old_state - 1
+            for rid in related_ids:
+                if self.cpu_states[rid].idle_state > new_state:
+                    self._try_transition_to_idle_state(rid, new_state)
+
+    def _try_transition_to_idle_state(self, cpu_id, idle_state):
+        related_ids = self.idle_related_cpus[(cpu_id, idle_state)]
+
+        # Tristate: True - can transition, False - can't transition,
+        #           None - unknown idle state on at least one related cpu
+        transition_check = self._can_enter_state(related_ids, idle_state)
+
+        if transition_check is None:
+            # Unknown state on a related cpu means we're not sure whether we're
+            # entering requested state or a shallower one
+            self.cpu_states[cpu_id].idle_state = None
+            return
+
+        # Keep trying shallower states until all related
+        while not self._can_enter_state(related_ids, idle_state):
+            idle_state -= 1
+            related_ids = self.idle_related_cpus[(cpu_id, idle_state)]
+
+        self.cpu_states[cpu_id].idle_state = idle_state
+        for rid in related_ids:
+            self.cpu_states[rid].idle_state = idle_state
+
+    def _can_enter_state(self, related_ids, state):
+        """
+        This is a tri-state check. Returns ``True`` if related cpu states allow transition
+        into this state, ``False`` if related cpu states don't allow transition into this
+        state, and ``None`` if at least one of the related cpus is in an unknown state
+        (so the decision of whether a transition is possible cannot be made).
+
+        """
+        for rid in related_ids:
+            rid_requested_state = self.requested_states.get(rid, None)
+            rid_current_state = self.cpu_states[rid].idle_state
+            if rid_current_state is None:
+                return None
+            if rid_current_state < state:
+                if rid_requested_state is None or rid_requested_state < state:
+                    return False
+        return True
+
+
+def stream_cpu_power_transitions(events):
+    for event in events:
+        if event.name == 'cpu_idle':
+            state = c_int32(event.state).value
+            yield CorePowerTransitionEvent(event.timestamp, event.cpu_id, idle_state=state)
+        elif event.name == 'cpu_frequency':
+            yield CorePowerTransitionEvent(event.timestamp, event.cpu_id, frequency=event.state)
+        elif event.name == 'DROPPED EVENTS DETECTED':
+            yield CorePowerDroppedEvents(event.cpu_id)
+        elif event.name == 'print':
+            if TRACE_MARKER_START in event.text:
+                yield TraceMarkerEvent('START')
+            elif TRACE_MARKER_STOP in event.text:
+                yield TraceMarkerEvent('STOP')
+            else:
+                if 'cpu_frequency' in event.text:
+                    match = DEVLIB_CPU_FREQ_REGEX.search(event.text)
+                else:
+                    match = INIT_CPU_FREQ_REGEX.search(event.text)
+                if match:
+                    yield CorePowerTransitionEvent(event.timestamp,
+                                                   int(match.group('cpu')),
+                                                   frequency=int(match.group('freq')))
+
+
+def gather_core_states(system_state_stream, freq_dependent_idle_states=None):  # NOQA
+    if freq_dependent_idle_states is None:
+        freq_dependent_idle_states = []
+    for system_state in system_state_stream:
+        core_states = []
+        for cpu in system_state.cpus:
+            if cpu.idle_state == -1:
+                core_states.append((-1, cpu.frequency))
+            elif cpu.idle_state in freq_dependent_idle_states:
+                if cpu.frequency is not None:
+                    core_states.append((cpu.idle_state, cpu.frequency))
+                else:
+                    core_states.append((None, None))
+            else:
+                core_states.append((cpu.idle_state, None))
+        yield (system_state.timestamp, core_states)
+
+
+def record_state_transitions(reporter, stream):
+    for event in stream:
+        if event.kind == 'transition':
+            reporter.record_transition(event)
+        yield event
+
+
+class PowerStateTransitions(object):
+
+    name = 'transitions-timeline'
+
+    def __init__(self, output_directory):
+        self.filepath = os.path.join(output_directory, 'state-transitions-timeline.csv')
+        self._wfh = open(self.filepath, 'w')
+        self.writer = csv.writer(self._wfh)
+        headers = ['timestamp', 'cpu_id', 'frequency', 'idle_state']
+        self.writer.writerow(headers)
+
+    def update(self, timestamp, core_states):  # NOQA
+        # Just recording transitions, not doing anything
+        # with states.
+        pass
+
+    def record_transition(self, transition):
+        row = [transition.timestamp, transition.cpu_id,
+               transition.frequency, transition.idle_state]
+        self.writer.writerow(row)
+
+    def report(self):
+        return self
+
+    def write(self):
+        self._wfh.close()
+
+
+class PowerStateTimeline(object):
+
+    name = 'state-timeline'
+
+    def __init__(self, output_directory, cpus):
+        self.filepath = os.path.join(output_directory, 'power-state-timeline.csv')
+        self.idle_state_names = {cpu.id: [s.name for s in cpu.cpuidle.states] for cpu in cpus}
+        self._wfh = open(self.filepath, 'w')
+        self.writer = csv.writer(self._wfh)
+        headers = ['ts'] + ['{} CPU{}'.format(cpu.name, cpu.id) for cpu in cpus]
+        self.writer.writerow(headers)
+
+    def update(self, timestamp, core_states):  # NOQA
+        row = [timestamp]
+        for cpu_idx, (idle_state, frequency) in enumerate(core_states):
+            if frequency is None:
+                if idle_state == -1:
+                    row.append('Running (unknown kHz)')
+                elif idle_state is None:
+                    row.append('unknown')
+                else:
+                    row.append(self.idle_state_names[cpu_idx][idle_state])
+            else:  # frequency is not None
+                if idle_state == -1:
+                    row.append(frequency)
+                elif idle_state is None:
+                    row.append('unknown')
+                else:
+                    row.append('{} ({})'.format(self.idle_state_names[cpu_idx][idle_state],
+                                                frequency))
+        self.writer.writerow(row)
+
+    def report(self):
+        return self
+
+    def write(self):
+        self._wfh.close()
+
+
+class ParallelStats(object):
+
+    def __init__(self, output_directory, cpus, use_ratios=False):
+        self.filepath = os.path.join(output_directory, 'parallel-stats.csv')
+        self.clusters = defaultdict(set)
+        self.use_ratios = use_ratios
+
+        clusters = []
+        for cpu in cpus:
+            if cpu.cpufreq.related_cpus not in clusters:
+                clusters.append(cpu.cpufreq.related_cpus)
+
+        for i, clust in enumerate(clusters):
+            self.clusters[i] = set(clust)
+        self.clusters['all'] = set([cpu.id for cpu in cpus])
+
+        self.first_timestamp = None
+        self.last_timestamp = None
+        self.previous_states = None
+        self.parallel_times = defaultdict(lambda: defaultdict(int))
+        self.running_times = defaultdict(int)
+
+    def update(self, timestamp, core_states):
+        if self.last_timestamp is not None:
+            delta = timestamp - self.last_timestamp
+            active_cores = [i for i, c in enumerate(self.previous_states)
+                            if c and c[0] == -1]
+            for cluster, cluster_cores in self.clusters.iteritems():
+                clust_active_cores = len(cluster_cores.intersection(active_cores))
+                self.parallel_times[cluster][clust_active_cores] += delta
+                if clust_active_cores:
+                    self.running_times[cluster] += delta
+        else:  # initial update
+            self.first_timestamp = timestamp
+
+        self.last_timestamp = timestamp
+        self.previous_states = core_states
+
+    def report(self):  # NOQA
+        if self.last_timestamp is None:
+            return None
+
+        report = ParallelReport(self.filepath)
+        total_time = self.last_timestamp - self.first_timestamp
+        for cluster in sorted(self.parallel_times):
+            running_time = self.running_times[cluster]
+            for n in xrange(len(self.clusters[cluster]) + 1):
+                time = self.parallel_times[cluster][n]
+                time_pc = time / total_time
+                if not self.use_ratios:
+                    time_pc *= 100
+                if n:
+                    if running_time:
+                        running_time_pc = time / running_time
+                    else:
+                        running_time_pc = 0
+                    if not self.use_ratios:
+                        running_time_pc *= 100
+                else:
+                    running_time_pc = 0
+                precision = self.use_ratios and 3 or 1
+                fmt = '{{:.{}f}}'.format(precision)
+                report.add([cluster, n,
+                            fmt.format(time),
+                            fmt.format(time_pc),
+                            fmt.format(running_time_pc),
+                            ])
+        return report
+
+
+class ParallelReport(object):
+
+    name = 'parallel-stats'
+
+    def __init__(self, filepath):
+        self.filepath = filepath
+        self.values = []
+
+    def add(self, value):
+        self.values.append(value)
+
+    def write(self):
+        with open(self.filepath, 'w') as wfh:
+            writer = csv.writer(wfh)
+            writer.writerow(['cluster', 'number_of_cores', 'total_time', '%time', '%running_time'])
+            writer.writerows(self.values)
+
+
+class PowerStateStats(object):
+
+    def __init__(self, output_directory, cpus, use_ratios=False):
+        self.filepath = os.path.join(output_directory, 'power-state-stats.csv')
+        self.core_names = [cpu.name for cpu in cpus]
+        self.idle_state_names = {cpu.id: [s.name for s in cpu.cpuidle.states] for cpu in cpus}
+        self.use_ratios = use_ratios
+        self.first_timestamp = None
+        self.last_timestamp = None
+        self.previous_states = None
+        self.cpu_states = defaultdict(lambda: defaultdict(int))
+
+    def update(self, timestamp, core_states):  # NOQA
+        if self.last_timestamp is not None:
+            delta = timestamp - self.last_timestamp
+            for cpu, (idle, freq) in enumerate(self.previous_states):
+                if idle == -1:
+                    if freq is not None:
+                        state = '{:07}KHz'.format(freq)
+                    else:
+                        state = 'Running (unknown KHz)'
+                elif freq:
+                    state = '{}-{:07}KHz'.format(self.idle_state_names[cpu][idle], freq)
+                elif idle is not None:
+                    state = self.idle_state_names[cpu][idle]
+                else:
+                    state = 'unknown'
+                self.cpu_states[cpu][state] += delta
+        else:  # initial update
+            self.first_timestamp = timestamp
+
+        self.last_timestamp = timestamp
+        self.previous_states = core_states
+
+    def report(self):
+        if self.last_timestamp is None:
+            return None
+        total_time = self.last_timestamp - self.first_timestamp
+        state_stats = defaultdict(lambda: [None] * len(self.core_names))
+
+        for cpu, states in self.cpu_states.iteritems():
+            for state in states:
+                time = states[state]
+                time_pc = time / total_time
+                if not self.use_ratios:
+                    time_pc *= 100
+                state_stats[state][cpu] = time_pc
+
+        precision = self.use_ratios and 3 or 1
+        return PowerStateStatsReport(self.filepath, state_stats, self.core_names, precision)
+
+
+class PowerStateStatsReport(object):
+
+    name = 'power-state-stats'
+
+    def __init__(self, filepath, state_stats, core_names, precision=2):
+        self.filepath = filepath
+        self.state_stats = state_stats
+        self.core_names = core_names
+        self.precision = precision
+
+    def write(self):
+        with open(self.filepath, 'w') as wfh:
+            writer = csv.writer(wfh)
+            headers = ['state'] + ['{} CPU{}'.format(c, i)
+                                   for i, c in enumerate(self.core_names)]
+            writer.writerow(headers)
+            for state in sorted(self.state_stats):
+                stats = self.state_stats[state]
+                fmt = '{{:.{}f}}'.format(self.precision)
+                writer.writerow([state] + [fmt.format(s if s is not None else 0)
+                                           for s in stats])
+
+
+class CpuUtilizationTimeline(object):
+
+    name = 'utilization-timeline'
+
+    def __init__(self, output_directory, cpus):
+        self.filepath = os.path.join(output_directory, 'utilization-timeline.csv')
+        self._wfh = open(self.filepath, 'w')
+        self.writer = csv.writer(self._wfh)
+
+        headers = ['ts'] + ['{} CPU{}'.format(cpu.name, cpu.id) for cpu in cpus]
+        self.writer.writerow(headers)
+        self._max_freq_list = [cpu.cpufreq.available_frequencies[-1] for cpu in cpus]
+
+    def update(self, timestamp, core_states):  # NOQA
+        row = [timestamp]
+        for core, [idle_state, frequency] in enumerate(core_states):
+            if frequency is not None:
+                frequency /= float(self._max_freq_list[core])
+                row.append(frequency)
+            else:
+                row.append(None)
+        self.writer.writerow(row)
+
+    def report(self):
+        return self
+
+    def write(self):
+        self._wfh.close()
+
+
+def build_idle_state_map(cpus):
+    idle_state_map = defaultdict(list)
+    for cpu_idx, cpu in enumerate(cpus):
+        related_cpus = set(cpu.cpufreq.related_cpus) - set([cpu_idx])
+        first_cluster_state = cpu.cpuidle.num_states - 1
+        for state_idx, state in enumerate(cpu.cpuidle.states):
+            if state_idx < first_cluster_state:
+                idle_state_map[(cpu_idx, state_idx)] = []
+            else:
+                idle_state_map[(cpu_idx, state_idx)] = list(related_cpus)
+    return idle_state_map
+
+
+def report_power_stats(trace_file, cpus, output_basedir, use_ratios=False, no_idle=None,
+                       split_wfi_states=False):
+    """
+    Process trace-cmd output to generate timelines and statistics of CPU power
+    state (a.k.a P- and C-state) transitions in the trace.
+
+    The results will be written into a subdirectory called "power-stats" under
+    the specified ``output_basedir``.
+
+    :param trace_file: trace-cmd's text trace to process.
+    :param cpus: A list of ``CpuInfo`` objects describing a target's CPUs.
+                 These are typically reported as part of ``TargetInfo`` in
+                 WA output.
+    :param output_basedir: Base location for the output. This directory must
+                        exist and must not contain a directory of file
+                        named ``"power-states"``.
+    :param use_rations: By default, stats will be reported as percentages. Set
+                        this to ``True`` to report stats as decimals in the
+                        ``0 <= value <= 1`` instead.
+    :param no_idle: ``False`` if cpuidle and at least one idle state per CPU are
+                    enabled, should be ``True`` otherwise. This influences the
+                    assumptions about CPU's initial states. If not explicitly
+                    set, the value for this will be guessed based on whether
+                    cpuidle states are present in the first ``CpuInfo``.
+
+
+    The output directory will contain the following files:
+
+    power-state-stats.csv
+        Power state residency statistics for each CPU. Shows the percentage of
+        time a CPU has spent in each of its available power states.
+
+    parallel-stats.csv
+        Parallel execution stats for each CPU cluster, and combined stats for
+        the whole system.
+
+    power-state-timeline.csv
+        Timeline of CPU power states. Shows which power state each CPU is in at
+        a point in time.
+
+    state-transitions-timeline.csv
+        Timeline of CPU power state transitions. Each entry shows a CPU's
+        transition from one power state to another.
+
+    utilzation-timeline.csv
+        Timeline of CPU utilizations.
+
+    .. note:: Timeline entries aren't at regular intervals, but at times of
+              power transition events.
+
+    Stats are generated by assembling a pipeline consisting of the following
+    stages:
+
+        1. Parse trace into trace events
+        2. Filter trace events into power state transition events
+        3. Record power state transitions
+        4. Convert transitions into a power states.
+        5. Collapse the power states into timestamped ``(C state, P state)``
+           tuples for each cpu.
+        6. Update reporters/stats generators with cpu states.
+
+    """
+    output_directory = os.path.join(output_basedir, 'power-states')
+    if not os.path.isdir(output_directory):
+        os.mkdir(output_directory)
+
+    freq_dependent_idle_states = []
+    if split_wfi_states:
+        freq_dependent_idle_states = [0]
+
+    # init trace, processor, and reporters
+    # note: filter_markers is False here, even though we *will* filter by them. The
+    #       reason for this is that we want to observe events before the start
+    #       marker in order to establish the intial power states.
+    parser = TraceCmdParser(filter_markers=False,
+                            events=['cpu_idle', 'cpu_frequency', 'print'])
+    ps_processor = PowerStateProcessor(cpus, wait_for_marker=trace_has_marker(trace_file),
+                                       no_idle=no_idle)
+    transitions_reporter = PowerStateTransitions(output_directory)
+    reporters = [
+        ParallelStats(output_directory, cpus, use_ratios),
+        PowerStateStats(output_directory, cpus, use_ratios),
+        PowerStateTimeline(output_directory, cpus),
+        CpuUtilizationTimeline(output_directory, cpus),
+        transitions_reporter,
+    ]
+
+    # assemble the pipeline
+    event_stream = parser.parse(trace_file)
+    transition_stream = stream_cpu_power_transitions(event_stream)
+    recorded_trans_stream = record_state_transitions(transitions_reporter, transition_stream)
+    power_state_stream = ps_processor.process(recorded_trans_stream)
+    core_state_stream = gather_core_states(power_state_stream, freq_dependent_idle_states)
+
+    # execute the pipeline
+    for timestamp, states in core_state_stream:
+        for reporter in reporters:
+            reporter.update(timestamp, states)
+
+    # report any issues encountered while executing the pipeline
+    if ps_processor.exceptions:
+        logger.warning('There were errors while processing trace:')
+        for e in ps_processor.exceptions:
+            logger.warning(str(e))
+
+    # generate reports
+    reports = {}
+    for reporter in reporters:
+        report = reporter.report()
+        report.write()
+        reports[report.name] = report
+    return reports
+