Adding cpustates result processor (and script)

2025-11-14 22:06:13 +00:00 · 2015-06-03 16:15:44 +01:00
parent 9971041e45
commit 02af02f0cb
4 changed files with 840 additions and 1 deletions
--- a/scripts/cpustates
+++ b/scripts/cpustates
@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+#    Copyright 2015 ARM Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from wlauto.utils.power import main
+main()
--- a/wlauto/result_processors/cpustate.py
+++ b/wlauto/result_processors/cpustate.py
@@ -0,0 +1,186 @@
+#    Copyright 2015 ARM Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import csv
+from collections import OrderedDict
+
+from wlauto import ResultProcessor, Parameter
+from wlauto.exceptions import ConfigError
+from wlauto.instrumentation import instrument_is_installed
+from wlauto.utils.power import report_power_stats
+
+
+class CpuStatesProcessor(ResultProcessor):
+
+    name = 'cpustates'
+    description = '''
+    Process power ftrace to produce CPU state and parallelism stats.
+
+    Parses trace-cmd output to extract power events and uses those to generate
+    statistics about parallelism and frequency/idle core residency.
+
+    .. note:: trace-cmd instrument must be enabled and configured to collect
+              at least ``power:cpu_idle`` and ``power:cpu_frequency`` events.
+              Reporting should also be enabled (it is by default) as
+              ``cpustate`` parses the text version of the trace.
+
+    This generates two reports for the run:
+
+    *parallel.csv*
+
+    Shows what percentage of time was spent with N cores active (for N
+    from 0 to the total number of cores), for a cluster or for a system as
+    a whole. It contain the following columns:
+
+        :workload: The workload label
+        :iteration: iteration that was run
+        :cluster: The cluster for which statics are reported. The value of
+                  ``"all"`` indicates that this row reports statistics for
+                  the whole system.
+        :number_of_cores: number of cores active. ``0`` indicates the cluster
+                          was idle.
+        :total_time: Total time spent in this state during workload execution
+        :%time: Percentage of total workload execution time spent in this state
+        :%running_time: Percentage of the time the cluster was active (i.e.
+                        ignoring time the cluster was idling) spent in this
+                        state.
+
+    *cpustate.csv*
+
+    Shows percentage of the time a core spent in a particular power state. The first
+    column names the state is followed by a column for each core. Power states include
+    available DVFS frequencies (for heterogeneous systems, this is the union of
+    frequencies supported by different core types) and idle states. Some shallow
+    states (e.g. ARM WFI) will consume different amount of power depending on the
+    current OPP. For such states, there will be an entry for each opp. ``"unknown"``
+    indicates the percentage of time for which a state could not be established from the
+    trace. This is usually due to core state being unknown at the beginning of the trace,
+    but may also be caused by dropped events in the middle of the trace.
+
+    '''
+
+    parameters = [
+        Parameter('first_cluster_state', kind=int, default=2,
+                  description="""
+                  The first idle state which is common to a cluster.
+                  """),
+        Parameter('first_system_state', kind=int, default=3,
+                  description="""
+                  The first idle state which is common to all cores.
+                  """),
+        Parameter('write_iteration_reports', kind=bool, default=False,
+                  description="""
+                  By default, this instrument will generate reports for the entire run
+                  in the overall output directory. Enabling this option will, in addition,
+                  create reports in each iteration's output directory. The formats of these
+                  reports will be similar to the overall report, except they won't mention
+                  the workload name or iteration number (as that is implied by their location).
+                  """),
+        Parameter('user_ratios', kind=bool, default=False,
+                  description="""
+                  By default proportional values will be reported as percentages, if this
+                  flag is enabled, they will be reported as ratios instead.
+                  """),
+
+    ]
+
+    def validate(self):
+        if not instrument_is_installed('trace-cmd'):
+            message = '''
+            {} requires "trace-cmd" instrument to be installed and the collection of at
+            least "power:cpu_frequency" and "power:cpu_idle" events to be enabled during worklad
+            execution.
+            '''
+            raise ConfigError(message.format(self.name).strip())
+
+    def initialize(self, context):
+        # pylint: disable=attribute-defined-outside-init
+        device = context.device
+        if not device.has('cpuidle'):
+            raise ConfigError('Device does not appear to have cpuidle capability; is the right module installed?')
+        if not device.core_names:
+            message = '{} requires"core_names" and "core_clusters" to be specified for the device.'
+            raise ConfigError(message.format(self.name))
+        self.core_names = device.core_names
+        self.core_clusters = device.core_clusters
+        idle_states = {s.id: s.desc for s in device.get_cpuidle_states()}
+        self.idle_state_names = [idle_states[i] for i in sorted(idle_states.keys())]
+        self.num_idle_states = len(self.idle_state_names)
+        self.iteration_reports = OrderedDict()
+
+    def process_iteration_result(self, result, context):
+        trace = context.get_artifact('txttrace')
+        if not trace:
+            self.logger.debug('Text trace does not appear to have been generated; skipping this iteration.')
+            return
+        self.logger.debug('Generating power state reports from trace...')
+        parallel_report, powerstate_report = report_power_stats(
+            trace_file=trace.path,
+            idle_state_names=self.idle_state_names,
+            core_names=self.core_names,
+            core_clusters=self.core_clusters,
+            num_idle_states=self.num_idle_states,
+            first_cluster_state=self.first_cluster_state,
+            first_system_state=self.first_system_state,
+            use_ratios=self.use_ratios,
+        )
+        if parallel_report is None:
+            self.logger.warning('No power state reports generated; are power '
+                                'events enabled in the trace?')
+            return
+        else:
+            self.logger.debug('Reports generated.')
+
+        iteration_id = (context.spec.label, context.current_iteration)
+        self.iteration_reports[iteration_id] = (parallel_report, powerstate_report)
+        if self.write_iteration_reports:
+            self.logger.debug('Writing iteration reports')
+            parallel_report.write(os.path.join(context.output_directory, 'parallel.csv'))
+            powerstate_report.write(os.path.join(context.output_directory, 'cpustates.csv'))
+
+    def process_run_result(self, result, context):  # pylint: disable=too-many-locals
+        if not self.iteration_reports:
+            self.logger.warning('No power state reports generated.')
+            return
+
+        parallel_rows = []
+        powerstate_rows = []
+        for iteration_id, reports in self.iteration_reports.iteritems():
+            workload, iteration = iteration_id
+            parallel_report, powerstate_report = reports
+            for record in parallel_report.values:
+                parallel_rows.append([workload, iteration] + record)
+            for state in sorted(powerstate_report.state_stats):
+                stats = powerstate_report.state_stats[state]
+                powerstate_rows.append([workload, iteration, state] +
+                                       ['{:.3f}'.format(s if s is not None else 0)
+                                           for s in stats])
+
+        with open(os.path.join(context.output_directory, 'parallel.csv'), 'w') as wfh:
+            writer = csv.writer(wfh)
+            writer.writerow(['workload', 'iteration', 'cluster',
+                             'number_of_cores', 'total_time',
+                             '%time', '%running_time'])
+            writer.writerows(parallel_rows)
+
+        with open(os.path.join(context.output_directory, 'cpustate.csv'), 'w') as wfh:
+            writer = csv.writer(wfh)
+            headers = ['workload', 'iteration', 'state']
+            headers += ['{} CPU{}'.format(c, i)
+                        for i, c in enumerate(powerstate_report.core_names)]
+            writer.writerow(headers)
+            writer.writerows(powerstate_rows)
+
--- a/wlauto/utils/power.py
+++ b/wlauto/utils/power.py
@@ -0,0 +1,636 @@
+#    Copyright 2015 ARM Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import division
+import os
+import sys
+import csv
+import logging
+from ctypes import c_int32
+from collections import defaultdict
+import argparse
+
+from wlauto.utils.trace_cmd import TraceCmdTrace
+
+
+logger = logging.getLogger('power')
+
+
+class CorePowerTransitionEvent(object):
+
+    kind = 'transition'
+    __slots__ = ['timestamp', 'cpu_id', 'frequency', 'idle_state']
+
+    def __init__(self, timestamp, cpu_id, frequency=None, idle_state=None):
+        if (frequency is None) == (idle_state is None):
+            raise ValueError('Power transition must specify a frequency or an idle_state, but not both.')
+        self.timestamp = timestamp
+        self.cpu_id = cpu_id
+        self.frequency = frequency
+        self.idle_state = idle_state
+
+    def __str__(self):
+        return 'cpu {} @ {} -> freq: {} idle: {}'.format(self.cpu_id, self.timestamp,
+                                                         self.frequency, self.idle_state)
+
+    def __repr__(self):
+        return 'CPTE(c:{} t:{} f:{} i:{})'.format(self.cpu_id, self.timestamp,
+                                                  self.frequency, self.idle_state)
+
+
+class CorePowerDroppedEvents(object):
+
+    kind = 'dropped_events'
+    __slots__ = ['cpu_id']
+
+    def __init__(self, cpu_id):
+        self.cpu_id = cpu_id
+
+    def __str__(self):
+        return 'DROPPED EVENTS on CPU{}'.format(self.cpu_id)
+
+    __repr__ = __str__
+
+
+class CpuPowerState(object):
+
+    __slots__ = ['frequency', 'idle_state']
+
+    @property
+    def is_idling(self):
+        return self.idle_state is not None and self.idle_state >= 0
+
+    @property
+    def is_active(self):
+        return self.idle_state == -1
+
+    def __init__(self, frequency=None, idle_state=None):
+        self.frequency = frequency
+        self.idle_state = idle_state
+
+    def __str__(self):
+        return 'CP(f:{} i:{})'.format(self.frequency, self.idle_state)
+
+    __repr__ = __str__
+
+
+class SystemPowerState(object):
+
+    __slots__ = ['timestamp', 'cpus']
+
+    @property
+    def num_cores(self):
+        return len(self.cpus)
+
+    def __init__(self, num_cores):
+        self.timestamp = None
+        self.cpus = []
+        for _ in xrange(num_cores):
+            self.cpus.append(CpuPowerState())
+
+    def copy(self):
+        new = SystemPowerState(self.num_cores)
+        new.timestamp = self.timestamp
+        for i, c in enumerate(self.cpus):
+            new.cpus[i].frequency = c.frequency
+            new.cpus[i].idle_state = c.idle_state
+        return new
+
+    def __str__(self):
+        return 'SP(t:{} Cs:{})'.format(self.timestamp, self.cpus)
+
+    __repr__ = __str__
+
+
+class PowerStateProcessor(object):
+    """
+    This takes a stream of power transition events and yields a timeline stream
+    of system power states.
+
+    """
+
+    @property
+    def cpu_states(self):
+        return self.power_state.cpus
+
+    @property
+    def current_time(self):
+        return self.power_state.timestamp
+
+    @current_time.setter
+    def current_time(self, value):
+        self.power_state.timestamp = value
+
+    def __init__(self, core_clusters, num_idle_states,
+                 first_cluster_state=sys.maxint, first_system_state=sys.maxint):
+        self.power_state = SystemPowerState(len(core_clusters))
+        self.requested_states = defaultdict(lambda: -1)  # cpu_id -> requeseted state
+
+        idle_state_domains = build_idle_domains(core_clusters,
+                                                num_states=num_idle_states,
+                                                first_cluster_state=first_cluster_state,
+                                                first_system_state=first_system_state)
+        # This tells us what other cpus we need to update when we see an idle
+        # state transition event
+        self.idle_related_cpus = defaultdict(list)  # (cpu, idle_state) --> relate_cpus_list
+        for state_id, idle_state_domain in enumerate(idle_state_domains):
+            for cpu_group in idle_state_domain:
+                for cpu in cpu_group:
+                    related = set(cpu_group) - set([cpu])
+                    self.idle_related_cpus[(cpu, state_id)] = related
+
+    def process(self, event_stream):
+        for event in event_stream:
+            yield self.update_power_state(event)
+
+    def update_power_state(self, event):
+        """
+        Update the tracked power state based on the specified event and
+        return updated power state.
+
+        """
+        if event.kind == 'transition':
+            self._process_transition(event)
+        elif event.kind == 'dropped_events':
+            self._process_dropped_events(event)
+        else:
+            raise ValueError('Unexpected event type: {}'.format(event.kind))
+        return self.power_state.copy()
+
+    def _process_transition(self, event):
+        self.current_time = event.timestamp
+        if event.idle_state is None:
+            self.cpu_states[event.cpu_id].frequency = event.frequency
+        else:
+            if event.idle_state == -1:
+                self._process_idle_exit(event)
+            else:
+                self._process_idle_entry(event)
+
+    def _process_dropped_events(self, event):
+        self.cpu_states[event.cpu_id].frequency = None
+        old_idle_state = self.cpu_states[event.cpu_id].idle_state
+        self.cpu_states[event.cpu_id].idle_state = None
+
+        related_ids = self.idle_related_cpus[(event.cpu_id, old_idle_state)]
+        for rid in related_ids:
+            self.cpu_states[rid].idle_state = None
+
+    def _process_idle_entry(self, event):
+        if self.cpu_states[event.cpu_id].is_idling:
+            raise ValueError('Got idle state entry event for an idling core: {}'.format(event))
+        self._try_transition_to_idle_state(event.cpu_id, event.idle_state)
+
+    def _process_idle_exit(self, event):
+        if self.cpu_states[event.cpu_id].is_active:
+            raise ValueError('Got idle state exit event for an active core: {}'.format(event))
+        self.requested_states.pop(event.cpu_id, None)  # remove outstanding request if there is one
+        old_state = self.cpu_states[event.cpu_id].idle_state
+        self.cpu_states[event.cpu_id].idle_state = -1
+
+        related_ids = self.idle_related_cpus[(event.cpu_id, old_state)]
+        if old_state is not None:
+            new_state = old_state - 1
+            for rid in related_ids:
+                if self.cpu_states[rid].idle_state > new_state:
+                    self._try_transition_to_idle_state(rid, new_state)
+
+    def _try_transition_to_idle_state(self, cpu_id, idle_state):
+        related_ids = self.idle_related_cpus[(cpu_id, idle_state)]
+        idle_state = idle_state
+
+        # Tristate: True - can transition, False - can't transition,
+        #           None - unknown idle state on at least one related cpu
+        transition_check = self._can_enter_state(related_ids, idle_state)
+
+        if not transition_check:
+            # If we can't enter an idle state right now, record that we've
+            # requested it, so that we may enter it later (once all related
+            # cpus also want a state at least as deep).
+            self.requested_states[cpu_id] = idle_state
+
+        if transition_check is None:
+            # Unknown state on a related cpu means we're not sure whether we're
+            # entering requested state or a shallower one
+            self.cpu_states[cpu_id].idle_state = None
+            return
+
+        # Keep trying shallower states until all related
+        while not self._can_enter_state(related_ids, idle_state):
+            idle_state -= 1
+            related_ids = self.idle_related_cpus[(cpu_id, idle_state)]
+
+        self.cpu_states[cpu_id].idle_state = idle_state
+        for rid in related_ids:
+            self.cpu_states[rid].idle_state = idle_state
+            if self.requested_states[rid] == idle_state:
+                del self.requested_states[rid]  # request satisfied, so remove
+
+    def _can_enter_state(self, related_ids, state):
+        """
+        This is a tri-state check. Returns ``True`` if related cpu states allow transition
+        into this state, ``False`` if related cpu states don't allow transition into this
+        state, and ``None`` if at least one of the related cpus is in an unknown state
+        (so the decision of whether a transition is possible cannot be made).
+
+        """
+        for rid in related_ids:
+            rid_requested_state = self.requested_states[rid]
+            rid_current_state = self.cpu_states[rid].idle_state
+            if rid_current_state is None:
+                return None
+            if rid_current_state < state and rid_requested_state < state:
+                return False
+        return True
+
+
+def stream_cpu_power_transitions(events):
+    for event in events:
+        if event.name == 'cpu_idle':
+            state = c_int32(event.state).value
+            yield CorePowerTransitionEvent(event.timestamp, event.cpu_id, idle_state=state)
+        elif event.name == 'cpu_frequency':
+            yield CorePowerTransitionEvent(event.timestamp, event.cpu_id, frequency=event.state)
+        elif event.name == 'DROPPED EVENTS DETECTED':
+            yield CorePowerDroppedEvents(event.cpu_id)
+
+
+def gather_core_states(system_state_stream, freq_dependent_idle_states=None):  # NOQA
+    if freq_dependent_idle_states is None:
+        freq_dependent_idle_states = [0]
+    for system_state in system_state_stream:
+        core_states = []
+        for cpu in system_state.cpus:
+            if cpu.idle_state == -1:
+                core_states.append((-1, cpu.frequency))
+            elif cpu.idle_state in freq_dependent_idle_states:
+                if cpu.frequency is not None:
+                    core_states.append((cpu.idle_state, cpu.frequency))
+                else:
+                    core_states.append((None, None))
+            else:
+                core_states.append((cpu.idle_state, None))
+        yield (system_state.timestamp, core_states)
+
+
+class ParallelStats(object):
+
+    def __init__(self, core_clusters, use_ratios=False):
+        self.clusters = defaultdict(set)
+        self.use_ratios = use_ratios
+        for i, clust in enumerate(core_clusters):
+            self.clusters[clust].add(i)
+        self.clusters['all'] = set(range(len(core_clusters)))
+
+        self.first_timestamp = None
+        self.last_timestamp = None
+        self.previous_states = None
+        self.parallel_times = defaultdict(lambda: defaultdict(int))
+        self.running_times = defaultdict(int)
+
+    def update(self, timestamp, core_states):
+        if self.last_timestamp is not None:
+            delta = timestamp - self.last_timestamp
+            active_cores = [i for i, c in enumerate(self.previous_states)
+                            if c and c[0] == -1]
+            for cluster, cluster_cores in self.clusters.iteritems():
+                clust_active_cores = len(cluster_cores.intersection(active_cores))
+                self.parallel_times[cluster][clust_active_cores] += delta
+                if clust_active_cores:
+                    self.running_times[cluster] += delta
+        else:  # initial update
+            self.first_timestamp = timestamp
+
+        self.last_timestamp = timestamp
+        self.previous_states = core_states
+
+    def report(self):  # NOQA
+        if self.last_timestamp is None:
+            return None
+
+        report = ParallelReport()
+        total_time = self.last_timestamp - self.first_timestamp
+        for cluster in sorted(self.parallel_times):
+            running_time = self.running_times[cluster]
+            for n in xrange(len(self.clusters[cluster]) + 1):
+                time = self.parallel_times[cluster][n]
+                time_pc = time / total_time
+                if not self.use_ratios:
+                    time_pc *= 100
+                if n:
+                    running_time_pc = time / running_time
+                    if not self.use_ratios:
+                        running_time_pc *= 100
+                else:
+                    running_time_pc = 0
+                precision = self.use_ratios and 3 or 1
+                fmt = '{{:.{}f}}'.format(precision)
+                report.add([cluster, n,
+                            fmt.format(time),
+                            fmt.format(time_pc),
+                            fmt.format(running_time_pc),
+                            ])
+        return report
+
+
+class ParallelReport(object):
+
+    def __init__(self):
+        self.values = []
+
+    def add(self, value):
+        self.values.append(value)
+
+    def write(self, filepath):
+        with open(filepath, 'w') as wfh:
+            writer = csv.writer(wfh)
+            writer.writerow(['cluster', 'number_of_cores', 'total_time', '%time', '%running_time'])
+            writer.writerows(self.values)
+
+
+class PowerStateStats(object):
+
+    def __init__(self, core_names, idle_state_names=None, use_ratios=False):
+        self.core_names = core_names
+        self.idle_state_names = idle_state_names
+        self.use_ratios = use_ratios
+        self.first_timestamp = None
+        self.last_timestamp = None
+        self.previous_states = None
+        self.cpu_states = defaultdict(lambda: defaultdict(int))
+
+    def update(self, timestamp, core_states):  # NOQA
+        if self.last_timestamp is not None:
+            delta = timestamp - self.last_timestamp
+            for cpu, (idle, freq) in enumerate(self.previous_states):
+                if idle == -1 and freq is not None:
+                    state = '{:07}KHz'.format(freq)
+                elif freq:
+                    if self.idle_state_names:
+                        state = '{}-{:07}KHz'.format(self.idle_state_names[idle], freq)
+                    else:
+                        state = 'idle{}-{:07}KHz'.format(idle, freq)
+                elif idle not in (None, -1):
+                    if self.idle_state_names:
+                        state = self.idle_state_names[idle]
+                    else:
+                        state = 'idle{}'.format(idle)
+                else:
+                    state = 'unkown'
+                self.cpu_states[cpu][state] += delta
+        else:  # initial update
+            self.first_timestamp = timestamp
+
+        self.last_timestamp = timestamp
+        self.previous_states = core_states
+
+    def report(self):
+        if self.last_timestamp is None:
+            return None
+        total_time = self.last_timestamp - self.first_timestamp
+        state_stats = defaultdict(lambda: [None] * len(self.core_names))
+
+        for cpu, states in self.cpu_states.iteritems():
+            for state in states:
+                time = states[state]
+                time_pc = time / total_time
+                if not self.use_ratios:
+                    time_pc *= 100
+                state_stats[state][cpu] = time_pc
+
+        precision = self.use_ratios and 3 or 1
+        return PowerStateStatsReport(state_stats, self.core_names, precision)
+
+
+class PowerStateStatsReport(object):
+
+    def __init__(self, state_stats, core_names, precision=2):
+        self.state_stats = state_stats
+        self.core_names = core_names
+        self.precision = precision
+
+    def write(self, filepath):
+        with open(filepath, 'w') as wfh:
+            writer = csv.writer(wfh)
+            headers = ['state'] + ['{} CPU{}'.format(c, i)
+                                   for i, c in enumerate(self.core_names)]
+            writer.writerow(headers)
+            for state in sorted(self.state_stats):
+                stats = self.state_stats[state]
+                fmt = '{{:.{}f}}'.format(self.precision)
+                writer.writerow([state] + [fmt.format(s if s is not None else 0)
+                                           for s in stats])
+
+
+def build_idle_domains(core_clusters,   # NOQA
+                       num_states,
+                       first_cluster_state=None,
+                       first_system_state=None):
+    """
+    Returns a list of idle domain groups (one for each idle state). Each group is a
+    list of domains, and a domain is a list of cpu ids for which that idle state is
+    common. E.g.
+
+        [[[0], [1], [2]], [[0, 1], [2]], [[0, 1, 2]]]
+
+    This defines three idle states for a machine with three cores. The first idle state
+    has three domains with one core in each domain; the second state has two domains,
+    with cores 0 and 1 sharing one domain; the final state has only one domain shared
+    by all cores.
+
+    This mapping created based on the assumptions
+
+    - The device is an SMP or a big.LITTLE-like system with cores in one or
+      more clusters (for SMP systems, all cores are considered to be in a "cluster").
+    - Idle domain correspend to either individual cores, individual custers, or
+      the compute subsystem as a whole.
+    - Cluster states are always deeper (higher index) than core states, and
+      system states are always deeper than cluster states.
+
+    parameters:
+
+        :core_clusters: a list indicating cluster "ID" of the corresponing core, e.g.
+                        ``[0, 0, 1]`` represents a three-core machines with cores 0
+                        and 1 on cluster 0, and core 2 on cluster 1.
+        :num_states: total number of idle states on a device.
+        :first_cluster_state: the ID of the first idle state shared by all cores in a
+                              cluster
+        :first_system_state: the ID of the first idle state shared by all cores.
+
+    """
+    if first_cluster_state is None:
+        first_cluster_state = sys.maxint
+    if first_system_state is None:
+        first_system_state = sys.maxint
+    all_cpus = range(len(core_clusters))
+    cluster_cpus = defaultdict(list)
+    for cpu, cluster in enumerate(core_clusters):
+        cluster_cpus[cluster].append(cpu)
+    cluster_domains = [cluster_cpus[c] for c in sorted(cluster_cpus)]
+    core_domains = [[c] for c in all_cpus]
+
+    idle_state_domains = []
+    for state_id in xrange(num_states):
+        if state_id >= first_system_state:
+            idle_state_domains.append([all_cpus])
+        elif state_id >= first_cluster_state:
+            idle_state_domains.append(cluster_domains)
+        else:
+            idle_state_domains.append(core_domains)
+
+    return idle_state_domains
+
+
+def report_power_stats(trace_file, idle_state_names, core_names, core_clusters,
+                       num_idle_states, first_cluster_state=sys.maxint,
+                       first_system_state=sys.maxint, use_ratios=False):
+    # pylint: disable=too-many-locals
+    trace = TraceCmdTrace()
+    ps_processor = PowerStateProcessor(core_clusters,
+                                       num_idle_states=num_idle_states,
+                                       first_cluster_state=first_cluster_state,
+                                       first_system_state=first_system_state)
+    parallel_stats = ParallelStats(core_clusters, use_ratios)
+    power_state_stats = PowerStateStats(core_names, idle_state_names, use_ratios)
+
+    event_stream = trace.parse(trace_file, names=['cpu_idle', 'cpu_frequency'])
+    transition_stream = stream_cpu_power_transitions(event_stream)
+    power_state_stream = ps_processor.process(transition_stream)
+    core_state_stream = gather_core_states(power_state_stream)
+
+    for timestamp, states in core_state_stream:
+        parallel_stats.update(timestamp, states)
+        power_state_stats.update(timestamp, states)
+
+    parallel_report = parallel_stats.report()
+    ps_report = power_state_stats.report()
+
+    return (parallel_report, ps_report)
+
+
+def main():
+    args = parse_arguments()
+
+    parallel_report, powerstate_report = report_power_stats(
+        trace_file=args.infile,
+        idle_state_names=args.idle_state_names,
+        core_names=args.core_names,
+        core_clusters=args.core_clusters,
+        num_idle_states=args.num_idle_states,
+        first_cluster_state=args.first_cluster_state,
+        first_system_state=args.first_system_state,
+        use_ratios=args.ratios,
+    )
+    parallel_report.write(os.path.join(args.output_directory, 'parallel.csv'))
+    powerstate_report.write(os.path.join(args.output_directory, 'cpustate.csv'))
+
+
+class SplitListAction(argparse.Action):
+
+    def __init__(self, option_strings, dest, nargs=None, **kwargs):
+        if nargs is not None:
+            raise ValueError('nargs not allowed')
+        super(SplitListAction, self).__init__(option_strings, dest, **kwargs)
+
+    def __call__(self, parser, namespace, values, option_string=None):
+        setattr(namespace, self.dest, [v.strip() for v in values.split(',')])
+
+
+def parse_arguments():  # NOQA
+    parser = argparse.ArgumentParser(description="""
+                                     Produce CPU power activity statistics reports from
+                                     power trace.
+                                     """)
+    parser.add_argument('infile', metavar='TRACEFILE', help='''
+                        Path to the trace file to parse. This must be in the format generated
+                        by "trace-cmd report" command.
+                        ''')
+    parser.add_argument('-d', '--output-directory', default='.',
+                        help='''
+                        Output directory where reports will be placed.
+                        ''')
+    parser.add_argument('-c', '--core-names', action=SplitListAction,
+                        help='''
+                        Comma-separated list of core names for the device on which the trace
+                        was collected.
+                        ''')
+    parser.add_argument('-C', '--core-clusters', action=SplitListAction, default=[],
+                        help='''
+                        Comma-separated list of core cluster IDs for the device on which the
+                        trace was collected. If not specified, this will be generated from
+                        core names on the assumption that all cores with the same name are on the
+                        same cluster.
+                        ''')
+    parser.add_argument('-i', '--idle-state-names', type=SplitListAction,
+                        help='''
+                        Comma-separated list of idle state names. The number of names must match
+                        --num-idle-states if that was explicitly specified.
+                        ''')
+    parser.add_argument('-n', '--num-idle-states', type=int,
+                        help='''
+                        number of  idle states on the device
+                        ''')
+    parser.add_argument('-q', '--first-cluster-state', type=int,
+                        help='''
+                        ID of the first cluster state. Must be < --num-idle-states.
+                        ''')
+    parser.add_argument('-s', '--first-system-state', type=int,
+                        help='''
+                        ID of the first system state. Must be < --numb-idle-states, and
+                        > --first-cluster-state.
+                        ''')
+    parser.add_argument('-R', '--ratios', action='store_true',
+                        help='''
+                        By default proportional values will be reported as percentages, if this
+                        flag is enabled, they will be reported as ratios instead.
+                        ''')
+
+    args = parser.parse_args()
+
+    if not args.core_names:
+        raise ValueError('core names must be specified using -c or --core-names')
+    if not args.core_clusters:
+        logger.debug('core clusters not specified, inferring from core names')
+        core_cluster_map = {}
+        core_clusters = []
+        current_cluster = 0
+        for cn in args.core_names:
+            if cn not in core_cluster_map:
+                core_cluster_map[cn] = current_cluster
+                current_cluster += 1
+            core_clusters.append(core_cluster_map[cn])
+        args.core_clusters = core_clusters
+    if not args.num_idle_states and args.idle_state_names:
+        args.num_idle_states = len(args.idle_state_names)
+    elif args.num_idle_states and not args.idle_state_names:
+        args.idle_state_names = ['idle{}'.format(i) for i in xrange(args.num_idle_states)]
+    elif args.num_idle_states and args.idle_state_names:
+        if len(args.idle_state_names) != args.num_idle_states:
+            raise ValueError('Number of idle state names does not match --num-idle-states')
+    else:
+        raise ValueError('Either --num-idle-states or --idle-state-names must be specified')
+
+    if not args.first_cluster_state and len(set(args.core_clusters)) > 1:
+        if args.first_system_state:
+            logger.debug('First cluster idle state not specified; state previous to first system state')
+            args.first_cluster_state = args.first_system_state - 1
+        else:
+            logger.debug('First cluster idle state not specified; assuming last available state')
+            args.first_cluster_state = args.num_idle_states - 1
+
+    return args
+
+if __name__ == '__main__':
+    main()
--- a/wlauto/utils/trace_cmd.py
+++ b/wlauto/utils/trace_cmd.py
@@ -31,7 +31,7 @@ TRACE_MARKER_STOP = 'TRACE_MARKER_STOP'

 class TraceCmdEvent(object):
    """
-    A single trace-cmd event. This will appear in the trace cmd report in the format
+    A single trace-cmd event. This will appear in the trace cmd report in the format ::

          <idle>-0     [000]  3284.126993: sched_rq_runnable_load: cpu=0 load=54
             |           |         |              |                |___________|