mirror of
https://github.com/ARM-software/workload-automation.git
synced 2025-01-18 12:06:08 +00:00
Adding cpustates result processor (and script)
This commit is contained in:
parent
9971041e45
commit
02af02f0cb
17
scripts/cpustates
Normal file
17
scripts/cpustates
Normal file
@ -0,0 +1,17 @@
|
||||
#!/usr/bin/env python
|
||||
# Copyright 2015 ARM Limited
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from wlauto.utils.power import main
|
||||
main()
|
186
wlauto/result_processors/cpustate.py
Normal file
186
wlauto/result_processors/cpustate.py
Normal file
@ -0,0 +1,186 @@
|
||||
# Copyright 2015 ARM Limited
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import os
|
||||
import csv
|
||||
from collections import OrderedDict
|
||||
|
||||
from wlauto import ResultProcessor, Parameter
|
||||
from wlauto.exceptions import ConfigError
|
||||
from wlauto.instrumentation import instrument_is_installed
|
||||
from wlauto.utils.power import report_power_stats
|
||||
|
||||
|
||||
class CpuStatesProcessor(ResultProcessor):
|
||||
|
||||
name = 'cpustates'
|
||||
description = '''
|
||||
Process power ftrace to produce CPU state and parallelism stats.
|
||||
|
||||
Parses trace-cmd output to extract power events and uses those to generate
|
||||
statistics about parallelism and frequency/idle core residency.
|
||||
|
||||
.. note:: trace-cmd instrument must be enabled and configured to collect
|
||||
at least ``power:cpu_idle`` and ``power:cpu_frequency`` events.
|
||||
Reporting should also be enabled (it is by default) as
|
||||
``cpustate`` parses the text version of the trace.
|
||||
|
||||
This generates two reports for the run:
|
||||
|
||||
*parallel.csv*
|
||||
|
||||
Shows what percentage of time was spent with N cores active (for N
|
||||
from 0 to the total number of cores), for a cluster or for a system as
|
||||
a whole. It contain the following columns:
|
||||
|
||||
:workload: The workload label
|
||||
:iteration: iteration that was run
|
||||
:cluster: The cluster for which statics are reported. The value of
|
||||
``"all"`` indicates that this row reports statistics for
|
||||
the whole system.
|
||||
:number_of_cores: number of cores active. ``0`` indicates the cluster
|
||||
was idle.
|
||||
:total_time: Total time spent in this state during workload execution
|
||||
:%time: Percentage of total workload execution time spent in this state
|
||||
:%running_time: Percentage of the time the cluster was active (i.e.
|
||||
ignoring time the cluster was idling) spent in this
|
||||
state.
|
||||
|
||||
*cpustate.csv*
|
||||
|
||||
Shows percentage of the time a core spent in a particular power state. The first
|
||||
column names the state is followed by a column for each core. Power states include
|
||||
available DVFS frequencies (for heterogeneous systems, this is the union of
|
||||
frequencies supported by different core types) and idle states. Some shallow
|
||||
states (e.g. ARM WFI) will consume different amount of power depending on the
|
||||
current OPP. For such states, there will be an entry for each opp. ``"unknown"``
|
||||
indicates the percentage of time for which a state could not be established from the
|
||||
trace. This is usually due to core state being unknown at the beginning of the trace,
|
||||
but may also be caused by dropped events in the middle of the trace.
|
||||
|
||||
'''
|
||||
|
||||
parameters = [
|
||||
Parameter('first_cluster_state', kind=int, default=2,
|
||||
description="""
|
||||
The first idle state which is common to a cluster.
|
||||
"""),
|
||||
Parameter('first_system_state', kind=int, default=3,
|
||||
description="""
|
||||
The first idle state which is common to all cores.
|
||||
"""),
|
||||
Parameter('write_iteration_reports', kind=bool, default=False,
|
||||
description="""
|
||||
By default, this instrument will generate reports for the entire run
|
||||
in the overall output directory. Enabling this option will, in addition,
|
||||
create reports in each iteration's output directory. The formats of these
|
||||
reports will be similar to the overall report, except they won't mention
|
||||
the workload name or iteration number (as that is implied by their location).
|
||||
"""),
|
||||
Parameter('user_ratios', kind=bool, default=False,
|
||||
description="""
|
||||
By default proportional values will be reported as percentages, if this
|
||||
flag is enabled, they will be reported as ratios instead.
|
||||
"""),
|
||||
|
||||
]
|
||||
|
||||
def validate(self):
|
||||
if not instrument_is_installed('trace-cmd'):
|
||||
message = '''
|
||||
{} requires "trace-cmd" instrument to be installed and the collection of at
|
||||
least "power:cpu_frequency" and "power:cpu_idle" events to be enabled during worklad
|
||||
execution.
|
||||
'''
|
||||
raise ConfigError(message.format(self.name).strip())
|
||||
|
||||
def initialize(self, context):
|
||||
# pylint: disable=attribute-defined-outside-init
|
||||
device = context.device
|
||||
if not device.has('cpuidle'):
|
||||
raise ConfigError('Device does not appear to have cpuidle capability; is the right module installed?')
|
||||
if not device.core_names:
|
||||
message = '{} requires"core_names" and "core_clusters" to be specified for the device.'
|
||||
raise ConfigError(message.format(self.name))
|
||||
self.core_names = device.core_names
|
||||
self.core_clusters = device.core_clusters
|
||||
idle_states = {s.id: s.desc for s in device.get_cpuidle_states()}
|
||||
self.idle_state_names = [idle_states[i] for i in sorted(idle_states.keys())]
|
||||
self.num_idle_states = len(self.idle_state_names)
|
||||
self.iteration_reports = OrderedDict()
|
||||
|
||||
def process_iteration_result(self, result, context):
|
||||
trace = context.get_artifact('txttrace')
|
||||
if not trace:
|
||||
self.logger.debug('Text trace does not appear to have been generated; skipping this iteration.')
|
||||
return
|
||||
self.logger.debug('Generating power state reports from trace...')
|
||||
parallel_report, powerstate_report = report_power_stats(
|
||||
trace_file=trace.path,
|
||||
idle_state_names=self.idle_state_names,
|
||||
core_names=self.core_names,
|
||||
core_clusters=self.core_clusters,
|
||||
num_idle_states=self.num_idle_states,
|
||||
first_cluster_state=self.first_cluster_state,
|
||||
first_system_state=self.first_system_state,
|
||||
use_ratios=self.use_ratios,
|
||||
)
|
||||
if parallel_report is None:
|
||||
self.logger.warning('No power state reports generated; are power '
|
||||
'events enabled in the trace?')
|
||||
return
|
||||
else:
|
||||
self.logger.debug('Reports generated.')
|
||||
|
||||
iteration_id = (context.spec.label, context.current_iteration)
|
||||
self.iteration_reports[iteration_id] = (parallel_report, powerstate_report)
|
||||
if self.write_iteration_reports:
|
||||
self.logger.debug('Writing iteration reports')
|
||||
parallel_report.write(os.path.join(context.output_directory, 'parallel.csv'))
|
||||
powerstate_report.write(os.path.join(context.output_directory, 'cpustates.csv'))
|
||||
|
||||
def process_run_result(self, result, context): # pylint: disable=too-many-locals
|
||||
if not self.iteration_reports:
|
||||
self.logger.warning('No power state reports generated.')
|
||||
return
|
||||
|
||||
parallel_rows = []
|
||||
powerstate_rows = []
|
||||
for iteration_id, reports in self.iteration_reports.iteritems():
|
||||
workload, iteration = iteration_id
|
||||
parallel_report, powerstate_report = reports
|
||||
for record in parallel_report.values:
|
||||
parallel_rows.append([workload, iteration] + record)
|
||||
for state in sorted(powerstate_report.state_stats):
|
||||
stats = powerstate_report.state_stats[state]
|
||||
powerstate_rows.append([workload, iteration, state] +
|
||||
['{:.3f}'.format(s if s is not None else 0)
|
||||
for s in stats])
|
||||
|
||||
with open(os.path.join(context.output_directory, 'parallel.csv'), 'w') as wfh:
|
||||
writer = csv.writer(wfh)
|
||||
writer.writerow(['workload', 'iteration', 'cluster',
|
||||
'number_of_cores', 'total_time',
|
||||
'%time', '%running_time'])
|
||||
writer.writerows(parallel_rows)
|
||||
|
||||
with open(os.path.join(context.output_directory, 'cpustate.csv'), 'w') as wfh:
|
||||
writer = csv.writer(wfh)
|
||||
headers = ['workload', 'iteration', 'state']
|
||||
headers += ['{} CPU{}'.format(c, i)
|
||||
for i, c in enumerate(powerstate_report.core_names)]
|
||||
writer.writerow(headers)
|
||||
writer.writerows(powerstate_rows)
|
||||
|
636
wlauto/utils/power.py
Normal file
636
wlauto/utils/power.py
Normal file
@ -0,0 +1,636 @@
|
||||
# Copyright 2015 ARM Limited
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import division
|
||||
import os
|
||||
import sys
|
||||
import csv
|
||||
import logging
|
||||
from ctypes import c_int32
|
||||
from collections import defaultdict
|
||||
import argparse
|
||||
|
||||
from wlauto.utils.trace_cmd import TraceCmdTrace
|
||||
|
||||
|
||||
logger = logging.getLogger('power')
|
||||
|
||||
|
||||
class CorePowerTransitionEvent(object):
|
||||
|
||||
kind = 'transition'
|
||||
__slots__ = ['timestamp', 'cpu_id', 'frequency', 'idle_state']
|
||||
|
||||
def __init__(self, timestamp, cpu_id, frequency=None, idle_state=None):
|
||||
if (frequency is None) == (idle_state is None):
|
||||
raise ValueError('Power transition must specify a frequency or an idle_state, but not both.')
|
||||
self.timestamp = timestamp
|
||||
self.cpu_id = cpu_id
|
||||
self.frequency = frequency
|
||||
self.idle_state = idle_state
|
||||
|
||||
def __str__(self):
|
||||
return 'cpu {} @ {} -> freq: {} idle: {}'.format(self.cpu_id, self.timestamp,
|
||||
self.frequency, self.idle_state)
|
||||
|
||||
def __repr__(self):
|
||||
return 'CPTE(c:{} t:{} f:{} i:{})'.format(self.cpu_id, self.timestamp,
|
||||
self.frequency, self.idle_state)
|
||||
|
||||
|
||||
class CorePowerDroppedEvents(object):
|
||||
|
||||
kind = 'dropped_events'
|
||||
__slots__ = ['cpu_id']
|
||||
|
||||
def __init__(self, cpu_id):
|
||||
self.cpu_id = cpu_id
|
||||
|
||||
def __str__(self):
|
||||
return 'DROPPED EVENTS on CPU{}'.format(self.cpu_id)
|
||||
|
||||
__repr__ = __str__
|
||||
|
||||
|
||||
class CpuPowerState(object):
|
||||
|
||||
__slots__ = ['frequency', 'idle_state']
|
||||
|
||||
@property
|
||||
def is_idling(self):
|
||||
return self.idle_state is not None and self.idle_state >= 0
|
||||
|
||||
@property
|
||||
def is_active(self):
|
||||
return self.idle_state == -1
|
||||
|
||||
def __init__(self, frequency=None, idle_state=None):
|
||||
self.frequency = frequency
|
||||
self.idle_state = idle_state
|
||||
|
||||
def __str__(self):
|
||||
return 'CP(f:{} i:{})'.format(self.frequency, self.idle_state)
|
||||
|
||||
__repr__ = __str__
|
||||
|
||||
|
||||
class SystemPowerState(object):
|
||||
|
||||
__slots__ = ['timestamp', 'cpus']
|
||||
|
||||
@property
|
||||
def num_cores(self):
|
||||
return len(self.cpus)
|
||||
|
||||
def __init__(self, num_cores):
|
||||
self.timestamp = None
|
||||
self.cpus = []
|
||||
for _ in xrange(num_cores):
|
||||
self.cpus.append(CpuPowerState())
|
||||
|
||||
def copy(self):
|
||||
new = SystemPowerState(self.num_cores)
|
||||
new.timestamp = self.timestamp
|
||||
for i, c in enumerate(self.cpus):
|
||||
new.cpus[i].frequency = c.frequency
|
||||
new.cpus[i].idle_state = c.idle_state
|
||||
return new
|
||||
|
||||
def __str__(self):
|
||||
return 'SP(t:{} Cs:{})'.format(self.timestamp, self.cpus)
|
||||
|
||||
__repr__ = __str__
|
||||
|
||||
|
||||
class PowerStateProcessor(object):
|
||||
"""
|
||||
This takes a stream of power transition events and yields a timeline stream
|
||||
of system power states.
|
||||
|
||||
"""
|
||||
|
||||
@property
|
||||
def cpu_states(self):
|
||||
return self.power_state.cpus
|
||||
|
||||
@property
|
||||
def current_time(self):
|
||||
return self.power_state.timestamp
|
||||
|
||||
@current_time.setter
|
||||
def current_time(self, value):
|
||||
self.power_state.timestamp = value
|
||||
|
||||
def __init__(self, core_clusters, num_idle_states,
|
||||
first_cluster_state=sys.maxint, first_system_state=sys.maxint):
|
||||
self.power_state = SystemPowerState(len(core_clusters))
|
||||
self.requested_states = defaultdict(lambda: -1) # cpu_id -> requeseted state
|
||||
|
||||
idle_state_domains = build_idle_domains(core_clusters,
|
||||
num_states=num_idle_states,
|
||||
first_cluster_state=first_cluster_state,
|
||||
first_system_state=first_system_state)
|
||||
# This tells us what other cpus we need to update when we see an idle
|
||||
# state transition event
|
||||
self.idle_related_cpus = defaultdict(list) # (cpu, idle_state) --> relate_cpus_list
|
||||
for state_id, idle_state_domain in enumerate(idle_state_domains):
|
||||
for cpu_group in idle_state_domain:
|
||||
for cpu in cpu_group:
|
||||
related = set(cpu_group) - set([cpu])
|
||||
self.idle_related_cpus[(cpu, state_id)] = related
|
||||
|
||||
def process(self, event_stream):
|
||||
for event in event_stream:
|
||||
yield self.update_power_state(event)
|
||||
|
||||
def update_power_state(self, event):
|
||||
"""
|
||||
Update the tracked power state based on the specified event and
|
||||
return updated power state.
|
||||
|
||||
"""
|
||||
if event.kind == 'transition':
|
||||
self._process_transition(event)
|
||||
elif event.kind == 'dropped_events':
|
||||
self._process_dropped_events(event)
|
||||
else:
|
||||
raise ValueError('Unexpected event type: {}'.format(event.kind))
|
||||
return self.power_state.copy()
|
||||
|
||||
def _process_transition(self, event):
|
||||
self.current_time = event.timestamp
|
||||
if event.idle_state is None:
|
||||
self.cpu_states[event.cpu_id].frequency = event.frequency
|
||||
else:
|
||||
if event.idle_state == -1:
|
||||
self._process_idle_exit(event)
|
||||
else:
|
||||
self._process_idle_entry(event)
|
||||
|
||||
def _process_dropped_events(self, event):
|
||||
self.cpu_states[event.cpu_id].frequency = None
|
||||
old_idle_state = self.cpu_states[event.cpu_id].idle_state
|
||||
self.cpu_states[event.cpu_id].idle_state = None
|
||||
|
||||
related_ids = self.idle_related_cpus[(event.cpu_id, old_idle_state)]
|
||||
for rid in related_ids:
|
||||
self.cpu_states[rid].idle_state = None
|
||||
|
||||
def _process_idle_entry(self, event):
|
||||
if self.cpu_states[event.cpu_id].is_idling:
|
||||
raise ValueError('Got idle state entry event for an idling core: {}'.format(event))
|
||||
self._try_transition_to_idle_state(event.cpu_id, event.idle_state)
|
||||
|
||||
def _process_idle_exit(self, event):
|
||||
if self.cpu_states[event.cpu_id].is_active:
|
||||
raise ValueError('Got idle state exit event for an active core: {}'.format(event))
|
||||
self.requested_states.pop(event.cpu_id, None) # remove outstanding request if there is one
|
||||
old_state = self.cpu_states[event.cpu_id].idle_state
|
||||
self.cpu_states[event.cpu_id].idle_state = -1
|
||||
|
||||
related_ids = self.idle_related_cpus[(event.cpu_id, old_state)]
|
||||
if old_state is not None:
|
||||
new_state = old_state - 1
|
||||
for rid in related_ids:
|
||||
if self.cpu_states[rid].idle_state > new_state:
|
||||
self._try_transition_to_idle_state(rid, new_state)
|
||||
|
||||
def _try_transition_to_idle_state(self, cpu_id, idle_state):
|
||||
related_ids = self.idle_related_cpus[(cpu_id, idle_state)]
|
||||
idle_state = idle_state
|
||||
|
||||
# Tristate: True - can transition, False - can't transition,
|
||||
# None - unknown idle state on at least one related cpu
|
||||
transition_check = self._can_enter_state(related_ids, idle_state)
|
||||
|
||||
if not transition_check:
|
||||
# If we can't enter an idle state right now, record that we've
|
||||
# requested it, so that we may enter it later (once all related
|
||||
# cpus also want a state at least as deep).
|
||||
self.requested_states[cpu_id] = idle_state
|
||||
|
||||
if transition_check is None:
|
||||
# Unknown state on a related cpu means we're not sure whether we're
|
||||
# entering requested state or a shallower one
|
||||
self.cpu_states[cpu_id].idle_state = None
|
||||
return
|
||||
|
||||
# Keep trying shallower states until all related
|
||||
while not self._can_enter_state(related_ids, idle_state):
|
||||
idle_state -= 1
|
||||
related_ids = self.idle_related_cpus[(cpu_id, idle_state)]
|
||||
|
||||
self.cpu_states[cpu_id].idle_state = idle_state
|
||||
for rid in related_ids:
|
||||
self.cpu_states[rid].idle_state = idle_state
|
||||
if self.requested_states[rid] == idle_state:
|
||||
del self.requested_states[rid] # request satisfied, so remove
|
||||
|
||||
def _can_enter_state(self, related_ids, state):
|
||||
"""
|
||||
This is a tri-state check. Returns ``True`` if related cpu states allow transition
|
||||
into this state, ``False`` if related cpu states don't allow transition into this
|
||||
state, and ``None`` if at least one of the related cpus is in an unknown state
|
||||
(so the decision of whether a transition is possible cannot be made).
|
||||
|
||||
"""
|
||||
for rid in related_ids:
|
||||
rid_requested_state = self.requested_states[rid]
|
||||
rid_current_state = self.cpu_states[rid].idle_state
|
||||
if rid_current_state is None:
|
||||
return None
|
||||
if rid_current_state < state and rid_requested_state < state:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def stream_cpu_power_transitions(events):
|
||||
for event in events:
|
||||
if event.name == 'cpu_idle':
|
||||
state = c_int32(event.state).value
|
||||
yield CorePowerTransitionEvent(event.timestamp, event.cpu_id, idle_state=state)
|
||||
elif event.name == 'cpu_frequency':
|
||||
yield CorePowerTransitionEvent(event.timestamp, event.cpu_id, frequency=event.state)
|
||||
elif event.name == 'DROPPED EVENTS DETECTED':
|
||||
yield CorePowerDroppedEvents(event.cpu_id)
|
||||
|
||||
|
||||
def gather_core_states(system_state_stream, freq_dependent_idle_states=None): # NOQA
|
||||
if freq_dependent_idle_states is None:
|
||||
freq_dependent_idle_states = [0]
|
||||
for system_state in system_state_stream:
|
||||
core_states = []
|
||||
for cpu in system_state.cpus:
|
||||
if cpu.idle_state == -1:
|
||||
core_states.append((-1, cpu.frequency))
|
||||
elif cpu.idle_state in freq_dependent_idle_states:
|
||||
if cpu.frequency is not None:
|
||||
core_states.append((cpu.idle_state, cpu.frequency))
|
||||
else:
|
||||
core_states.append((None, None))
|
||||
else:
|
||||
core_states.append((cpu.idle_state, None))
|
||||
yield (system_state.timestamp, core_states)
|
||||
|
||||
|
||||
class ParallelStats(object):
|
||||
|
||||
def __init__(self, core_clusters, use_ratios=False):
|
||||
self.clusters = defaultdict(set)
|
||||
self.use_ratios = use_ratios
|
||||
for i, clust in enumerate(core_clusters):
|
||||
self.clusters[clust].add(i)
|
||||
self.clusters['all'] = set(range(len(core_clusters)))
|
||||
|
||||
self.first_timestamp = None
|
||||
self.last_timestamp = None
|
||||
self.previous_states = None
|
||||
self.parallel_times = defaultdict(lambda: defaultdict(int))
|
||||
self.running_times = defaultdict(int)
|
||||
|
||||
def update(self, timestamp, core_states):
|
||||
if self.last_timestamp is not None:
|
||||
delta = timestamp - self.last_timestamp
|
||||
active_cores = [i for i, c in enumerate(self.previous_states)
|
||||
if c and c[0] == -1]
|
||||
for cluster, cluster_cores in self.clusters.iteritems():
|
||||
clust_active_cores = len(cluster_cores.intersection(active_cores))
|
||||
self.parallel_times[cluster][clust_active_cores] += delta
|
||||
if clust_active_cores:
|
||||
self.running_times[cluster] += delta
|
||||
else: # initial update
|
||||
self.first_timestamp = timestamp
|
||||
|
||||
self.last_timestamp = timestamp
|
||||
self.previous_states = core_states
|
||||
|
||||
def report(self): # NOQA
|
||||
if self.last_timestamp is None:
|
||||
return None
|
||||
|
||||
report = ParallelReport()
|
||||
total_time = self.last_timestamp - self.first_timestamp
|
||||
for cluster in sorted(self.parallel_times):
|
||||
running_time = self.running_times[cluster]
|
||||
for n in xrange(len(self.clusters[cluster]) + 1):
|
||||
time = self.parallel_times[cluster][n]
|
||||
time_pc = time / total_time
|
||||
if not self.use_ratios:
|
||||
time_pc *= 100
|
||||
if n:
|
||||
running_time_pc = time / running_time
|
||||
if not self.use_ratios:
|
||||
running_time_pc *= 100
|
||||
else:
|
||||
running_time_pc = 0
|
||||
precision = self.use_ratios and 3 or 1
|
||||
fmt = '{{:.{}f}}'.format(precision)
|
||||
report.add([cluster, n,
|
||||
fmt.format(time),
|
||||
fmt.format(time_pc),
|
||||
fmt.format(running_time_pc),
|
||||
])
|
||||
return report
|
||||
|
||||
|
||||
class ParallelReport(object):
|
||||
|
||||
def __init__(self):
|
||||
self.values = []
|
||||
|
||||
def add(self, value):
|
||||
self.values.append(value)
|
||||
|
||||
def write(self, filepath):
|
||||
with open(filepath, 'w') as wfh:
|
||||
writer = csv.writer(wfh)
|
||||
writer.writerow(['cluster', 'number_of_cores', 'total_time', '%time', '%running_time'])
|
||||
writer.writerows(self.values)
|
||||
|
||||
|
||||
class PowerStateStats(object):
|
||||
|
||||
def __init__(self, core_names, idle_state_names=None, use_ratios=False):
|
||||
self.core_names = core_names
|
||||
self.idle_state_names = idle_state_names
|
||||
self.use_ratios = use_ratios
|
||||
self.first_timestamp = None
|
||||
self.last_timestamp = None
|
||||
self.previous_states = None
|
||||
self.cpu_states = defaultdict(lambda: defaultdict(int))
|
||||
|
||||
def update(self, timestamp, core_states): # NOQA
|
||||
if self.last_timestamp is not None:
|
||||
delta = timestamp - self.last_timestamp
|
||||
for cpu, (idle, freq) in enumerate(self.previous_states):
|
||||
if idle == -1 and freq is not None:
|
||||
state = '{:07}KHz'.format(freq)
|
||||
elif freq:
|
||||
if self.idle_state_names:
|
||||
state = '{}-{:07}KHz'.format(self.idle_state_names[idle], freq)
|
||||
else:
|
||||
state = 'idle{}-{:07}KHz'.format(idle, freq)
|
||||
elif idle not in (None, -1):
|
||||
if self.idle_state_names:
|
||||
state = self.idle_state_names[idle]
|
||||
else:
|
||||
state = 'idle{}'.format(idle)
|
||||
else:
|
||||
state = 'unkown'
|
||||
self.cpu_states[cpu][state] += delta
|
||||
else: # initial update
|
||||
self.first_timestamp = timestamp
|
||||
|
||||
self.last_timestamp = timestamp
|
||||
self.previous_states = core_states
|
||||
|
||||
def report(self):
|
||||
if self.last_timestamp is None:
|
||||
return None
|
||||
total_time = self.last_timestamp - self.first_timestamp
|
||||
state_stats = defaultdict(lambda: [None] * len(self.core_names))
|
||||
|
||||
for cpu, states in self.cpu_states.iteritems():
|
||||
for state in states:
|
||||
time = states[state]
|
||||
time_pc = time / total_time
|
||||
if not self.use_ratios:
|
||||
time_pc *= 100
|
||||
state_stats[state][cpu] = time_pc
|
||||
|
||||
precision = self.use_ratios and 3 or 1
|
||||
return PowerStateStatsReport(state_stats, self.core_names, precision)
|
||||
|
||||
|
||||
class PowerStateStatsReport(object):
|
||||
|
||||
def __init__(self, state_stats, core_names, precision=2):
|
||||
self.state_stats = state_stats
|
||||
self.core_names = core_names
|
||||
self.precision = precision
|
||||
|
||||
def write(self, filepath):
|
||||
with open(filepath, 'w') as wfh:
|
||||
writer = csv.writer(wfh)
|
||||
headers = ['state'] + ['{} CPU{}'.format(c, i)
|
||||
for i, c in enumerate(self.core_names)]
|
||||
writer.writerow(headers)
|
||||
for state in sorted(self.state_stats):
|
||||
stats = self.state_stats[state]
|
||||
fmt = '{{:.{}f}}'.format(self.precision)
|
||||
writer.writerow([state] + [fmt.format(s if s is not None else 0)
|
||||
for s in stats])
|
||||
|
||||
|
||||
def build_idle_domains(core_clusters, # NOQA
|
||||
num_states,
|
||||
first_cluster_state=None,
|
||||
first_system_state=None):
|
||||
"""
|
||||
Returns a list of idle domain groups (one for each idle state). Each group is a
|
||||
list of domains, and a domain is a list of cpu ids for which that idle state is
|
||||
common. E.g.
|
||||
|
||||
[[[0], [1], [2]], [[0, 1], [2]], [[0, 1, 2]]]
|
||||
|
||||
This defines three idle states for a machine with three cores. The first idle state
|
||||
has three domains with one core in each domain; the second state has two domains,
|
||||
with cores 0 and 1 sharing one domain; the final state has only one domain shared
|
||||
by all cores.
|
||||
|
||||
This mapping created based on the assumptions
|
||||
|
||||
- The device is an SMP or a big.LITTLE-like system with cores in one or
|
||||
more clusters (for SMP systems, all cores are considered to be in a "cluster").
|
||||
- Idle domain correspend to either individual cores, individual custers, or
|
||||
the compute subsystem as a whole.
|
||||
- Cluster states are always deeper (higher index) than core states, and
|
||||
system states are always deeper than cluster states.
|
||||
|
||||
parameters:
|
||||
|
||||
:core_clusters: a list indicating cluster "ID" of the corresponing core, e.g.
|
||||
``[0, 0, 1]`` represents a three-core machines with cores 0
|
||||
and 1 on cluster 0, and core 2 on cluster 1.
|
||||
:num_states: total number of idle states on a device.
|
||||
:first_cluster_state: the ID of the first idle state shared by all cores in a
|
||||
cluster
|
||||
:first_system_state: the ID of the first idle state shared by all cores.
|
||||
|
||||
"""
|
||||
if first_cluster_state is None:
|
||||
first_cluster_state = sys.maxint
|
||||
if first_system_state is None:
|
||||
first_system_state = sys.maxint
|
||||
all_cpus = range(len(core_clusters))
|
||||
cluster_cpus = defaultdict(list)
|
||||
for cpu, cluster in enumerate(core_clusters):
|
||||
cluster_cpus[cluster].append(cpu)
|
||||
cluster_domains = [cluster_cpus[c] for c in sorted(cluster_cpus)]
|
||||
core_domains = [[c] for c in all_cpus]
|
||||
|
||||
idle_state_domains = []
|
||||
for state_id in xrange(num_states):
|
||||
if state_id >= first_system_state:
|
||||
idle_state_domains.append([all_cpus])
|
||||
elif state_id >= first_cluster_state:
|
||||
idle_state_domains.append(cluster_domains)
|
||||
else:
|
||||
idle_state_domains.append(core_domains)
|
||||
|
||||
return idle_state_domains
|
||||
|
||||
|
||||
def report_power_stats(trace_file, idle_state_names, core_names, core_clusters,
|
||||
num_idle_states, first_cluster_state=sys.maxint,
|
||||
first_system_state=sys.maxint, use_ratios=False):
|
||||
# pylint: disable=too-many-locals
|
||||
trace = TraceCmdTrace()
|
||||
ps_processor = PowerStateProcessor(core_clusters,
|
||||
num_idle_states=num_idle_states,
|
||||
first_cluster_state=first_cluster_state,
|
||||
first_system_state=first_system_state)
|
||||
parallel_stats = ParallelStats(core_clusters, use_ratios)
|
||||
power_state_stats = PowerStateStats(core_names, idle_state_names, use_ratios)
|
||||
|
||||
event_stream = trace.parse(trace_file, names=['cpu_idle', 'cpu_frequency'])
|
||||
transition_stream = stream_cpu_power_transitions(event_stream)
|
||||
power_state_stream = ps_processor.process(transition_stream)
|
||||
core_state_stream = gather_core_states(power_state_stream)
|
||||
|
||||
for timestamp, states in core_state_stream:
|
||||
parallel_stats.update(timestamp, states)
|
||||
power_state_stats.update(timestamp, states)
|
||||
|
||||
parallel_report = parallel_stats.report()
|
||||
ps_report = power_state_stats.report()
|
||||
|
||||
return (parallel_report, ps_report)
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_arguments()
|
||||
|
||||
parallel_report, powerstate_report = report_power_stats(
|
||||
trace_file=args.infile,
|
||||
idle_state_names=args.idle_state_names,
|
||||
core_names=args.core_names,
|
||||
core_clusters=args.core_clusters,
|
||||
num_idle_states=args.num_idle_states,
|
||||
first_cluster_state=args.first_cluster_state,
|
||||
first_system_state=args.first_system_state,
|
||||
use_ratios=args.ratios,
|
||||
)
|
||||
parallel_report.write(os.path.join(args.output_directory, 'parallel.csv'))
|
||||
powerstate_report.write(os.path.join(args.output_directory, 'cpustate.csv'))
|
||||
|
||||
|
||||
class SplitListAction(argparse.Action):
|
||||
|
||||
def __init__(self, option_strings, dest, nargs=None, **kwargs):
|
||||
if nargs is not None:
|
||||
raise ValueError('nargs not allowed')
|
||||
super(SplitListAction, self).__init__(option_strings, dest, **kwargs)
|
||||
|
||||
def __call__(self, parser, namespace, values, option_string=None):
|
||||
setattr(namespace, self.dest, [v.strip() for v in values.split(',')])
|
||||
|
||||
|
||||
def parse_arguments(): # NOQA
|
||||
parser = argparse.ArgumentParser(description="""
|
||||
Produce CPU power activity statistics reports from
|
||||
power trace.
|
||||
""")
|
||||
parser.add_argument('infile', metavar='TRACEFILE', help='''
|
||||
Path to the trace file to parse. This must be in the format generated
|
||||
by "trace-cmd report" command.
|
||||
''')
|
||||
parser.add_argument('-d', '--output-directory', default='.',
|
||||
help='''
|
||||
Output directory where reports will be placed.
|
||||
''')
|
||||
parser.add_argument('-c', '--core-names', action=SplitListAction,
|
||||
help='''
|
||||
Comma-separated list of core names for the device on which the trace
|
||||
was collected.
|
||||
''')
|
||||
parser.add_argument('-C', '--core-clusters', action=SplitListAction, default=[],
|
||||
help='''
|
||||
Comma-separated list of core cluster IDs for the device on which the
|
||||
trace was collected. If not specified, this will be generated from
|
||||
core names on the assumption that all cores with the same name are on the
|
||||
same cluster.
|
||||
''')
|
||||
parser.add_argument('-i', '--idle-state-names', type=SplitListAction,
|
||||
help='''
|
||||
Comma-separated list of idle state names. The number of names must match
|
||||
--num-idle-states if that was explicitly specified.
|
||||
''')
|
||||
parser.add_argument('-n', '--num-idle-states', type=int,
|
||||
help='''
|
||||
number of idle states on the device
|
||||
''')
|
||||
parser.add_argument('-q', '--first-cluster-state', type=int,
|
||||
help='''
|
||||
ID of the first cluster state. Must be < --num-idle-states.
|
||||
''')
|
||||
parser.add_argument('-s', '--first-system-state', type=int,
|
||||
help='''
|
||||
ID of the first system state. Must be < --numb-idle-states, and
|
||||
> --first-cluster-state.
|
||||
''')
|
||||
parser.add_argument('-R', '--ratios', action='store_true',
|
||||
help='''
|
||||
By default proportional values will be reported as percentages, if this
|
||||
flag is enabled, they will be reported as ratios instead.
|
||||
''')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.core_names:
|
||||
raise ValueError('core names must be specified using -c or --core-names')
|
||||
if not args.core_clusters:
|
||||
logger.debug('core clusters not specified, inferring from core names')
|
||||
core_cluster_map = {}
|
||||
core_clusters = []
|
||||
current_cluster = 0
|
||||
for cn in args.core_names:
|
||||
if cn not in core_cluster_map:
|
||||
core_cluster_map[cn] = current_cluster
|
||||
current_cluster += 1
|
||||
core_clusters.append(core_cluster_map[cn])
|
||||
args.core_clusters = core_clusters
|
||||
if not args.num_idle_states and args.idle_state_names:
|
||||
args.num_idle_states = len(args.idle_state_names)
|
||||
elif args.num_idle_states and not args.idle_state_names:
|
||||
args.idle_state_names = ['idle{}'.format(i) for i in xrange(args.num_idle_states)]
|
||||
elif args.num_idle_states and args.idle_state_names:
|
||||
if len(args.idle_state_names) != args.num_idle_states:
|
||||
raise ValueError('Number of idle state names does not match --num-idle-states')
|
||||
else:
|
||||
raise ValueError('Either --num-idle-states or --idle-state-names must be specified')
|
||||
|
||||
if not args.first_cluster_state and len(set(args.core_clusters)) > 1:
|
||||
if args.first_system_state:
|
||||
logger.debug('First cluster idle state not specified; state previous to first system state')
|
||||
args.first_cluster_state = args.first_system_state - 1
|
||||
else:
|
||||
logger.debug('First cluster idle state not specified; assuming last available state')
|
||||
args.first_cluster_state = args.num_idle_states - 1
|
||||
|
||||
return args
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -31,7 +31,7 @@ TRACE_MARKER_STOP = 'TRACE_MARKER_STOP'
|
||||
|
||||
class TraceCmdEvent(object):
|
||||
"""
|
||||
A single trace-cmd event. This will appear in the trace cmd report in the format
|
||||
A single trace-cmd event. This will appear in the trace cmd report in the format ::
|
||||
|
||||
<idle>-0 [000] 3284.126993: sched_rq_runnable_load: cpu=0 load=54
|
||||
| | | | |___________|
|
||||
|
Loading…
Reference in New Issue
Block a user