mirror of
https://github.com/ARM-software/workload-automation.git
synced 2025-02-20 20:09:11 +00:00
processors: add cpustates
Add cpustates output processor. This is more-or-less a port of the cpustates processor from WA, however there are some differences: - Idle states are now tracked individually per-CPU. This will fix processing traces from targets that have different number of idle states on different clusters. - Simplify the parameter list for report_power_stats: - Replace paths to individual report files with a path to a single directory. A subdirectory will be created under it which will contain all the reports. - Replace the individual bits bits of information about CPUs (core names, idle states, etc) with a list of CpuInfo objects. - Clean up and simplify the code a bit: - Make all reports mandatory -- the marginal cost of generating an additional report is minimal compared to tracking power states in the first place. - Standardize the interface for Reporters and Reports. - Rename some of the reports to something a bit more meaningful. - The stand-alone command line interface is not ported for now, as it is now possible to run this offline on existing results using "wa process".
This commit is contained in:
parent
e7e272cd03
commit
a2b0705ff0
151
wa/output_processors/cpustates.py
Executable file
151
wa/output_processors/cpustates.py
Executable file
@ -0,0 +1,151 @@
|
||||
# Copyright 2015-2018 ARM Limited
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import os
|
||||
import csv
|
||||
from collections import OrderedDict
|
||||
|
||||
from wa import OutputProcessor, Parameter
|
||||
from wa.utils.types import list_of_strings
|
||||
from wa.utils.cpustates import report_power_stats
|
||||
|
||||
|
||||
def _get_cpustates_description():
|
||||
"""
|
||||
Reuse the description for report_power_stats() but strip away it's
|
||||
parameter docs, as they are not relevant to the OuputProcessor.
|
||||
"""
|
||||
output_lines = []
|
||||
lines = iter(report_power_stats.__doc__.split('\n'))
|
||||
line = lines.next()
|
||||
while True:
|
||||
try:
|
||||
if line.strip().startswith(':param'):
|
||||
while line.strip():
|
||||
line = lines.next()
|
||||
output_lines.append(line)
|
||||
line = lines.next()
|
||||
except StopIteration:
|
||||
break
|
||||
return '\n'.join(output_lines)
|
||||
|
||||
|
||||
class CpuStatesProcessor(OutputProcessor):
|
||||
|
||||
name = 'cpustates'
|
||||
|
||||
description = _get_cpustates_description()
|
||||
|
||||
parameters = [
|
||||
Parameter('use_ratios', kind=bool, default=False,
|
||||
description="""
|
||||
By default proportional values will be reported as
|
||||
percentages, if this flag is enabled, they will be reported
|
||||
as ratios instead.
|
||||
"""),
|
||||
Parameter('no_idle', kind=bool, default=False,
|
||||
description="""
|
||||
Indicate that there will be no idle transitions in the trace.
|
||||
By default, a core will be reported as being in an "unknown"
|
||||
state until the first idle transtion for that core. Normally,
|
||||
this is not an issue, as cores are "nudged" as part of the
|
||||
setup to ensure that there is an idle transtion before the
|
||||
meassured region. However, if all idle states for the core
|
||||
have been disabled, or if the kernel does not have cpuidle,
|
||||
the nudge will not result in an idle transition, which would
|
||||
cause the cores to be reported to be in "unknown" state for
|
||||
the entire execution.
|
||||
|
||||
If this parameter is set to ``True``, the processor will
|
||||
assume that cores are running prior to the begining of the
|
||||
issue, and they will leave unknown state on the first
|
||||
frequency transition.
|
||||
"""),
|
||||
Parameter('split_wfi_states', kind=bool, default=False,
|
||||
description="""
|
||||
WFI is a very shallow idle state. The core remains powered on
|
||||
when in this state, which means the power usage while in this
|
||||
state will depend on the current voltage, and therefore current
|
||||
frequency.
|
||||
|
||||
Setting this to ``True`` will track time spent in WFI at
|
||||
each frequency separately, allowing to gain the most accurate
|
||||
picture of energy usage.
|
||||
"""),
|
||||
]
|
||||
|
||||
def initialize(self):
|
||||
self.iteration_reports = OrderedDict()
|
||||
|
||||
def process_job_output(self, output, target_info, run_output):
|
||||
trace_file = output.get_artifact_path('trace-cmd-txt')
|
||||
if not trace_file:
|
||||
self.logger.warning('Text trace does not appear to have been generated; skipping this iteration.')
|
||||
return
|
||||
|
||||
self.logger.info('Generating power state reports from trace...')
|
||||
reports = report_power_stats( # pylint: disable=unbalanced-tuple-unpacking
|
||||
trace_file=trace_file,
|
||||
output_basedir=output.basepath,
|
||||
cpus=target_info.cpus,
|
||||
use_ratios=self.use_ratios,
|
||||
no_idle=self.no_idle,
|
||||
split_wfi_states=self.split_wfi_states,
|
||||
)
|
||||
|
||||
for report in reports.itervalues():
|
||||
output.add_artifact(report.name, report.filepath, kind='data')
|
||||
|
||||
iteration_id = (output.id, output.label, output.iteration)
|
||||
self.iteration_reports[iteration_id] = reports
|
||||
|
||||
def process_run_output(self, output, target_info):
|
||||
if not self.iteration_reports:
|
||||
self.logger.warning('No power state reports generated.')
|
||||
return
|
||||
|
||||
parallel_rows = []
|
||||
powerstate_rows = []
|
||||
for iteration_id, reports in self.iteration_reports.iteritems():
|
||||
job_id, workload, iteration = iteration_id
|
||||
parallel_report = reports['parallel-stats']
|
||||
powerstate_report = reports['power-state-stats']
|
||||
|
||||
for record in parallel_report.values:
|
||||
parallel_rows.append([job_id, workload, iteration] + record)
|
||||
for state in sorted(powerstate_report.state_stats):
|
||||
stats = powerstate_report.state_stats[state]
|
||||
powerstate_rows.append([job_id, workload, iteration, state] +
|
||||
['{:.3f}'.format(s if s is not None else 0)
|
||||
for s in stats])
|
||||
|
||||
outpath = output.get_path('parallel-stats.csv')
|
||||
with open(outpath, 'w') as wfh:
|
||||
writer = csv.writer(wfh)
|
||||
writer.writerow(['id', 'workload', 'iteration', 'cluster',
|
||||
'number_of_cores', 'total_time',
|
||||
'%time', '%running_time'])
|
||||
writer.writerows(parallel_rows)
|
||||
output.add_artifact('run-parallel-stats', outpath, kind='export')
|
||||
|
||||
outpath = output.get_path('power-state-stats.csv')
|
||||
with open(outpath, 'w') as wfh:
|
||||
writer = csv.writer(wfh)
|
||||
headers = ['id', 'workload', 'iteration', 'state']
|
||||
headers += ['{} CPU{}'.format(c, i)
|
||||
for i, c in enumerate(powerstate_report.core_names)]
|
||||
writer.writerow(headers)
|
||||
writer.writerows(powerstate_rows)
|
||||
output.add_artifact('run-power-state-stats', outpath, kind='export')
|
713
wa/utils/cpustates.py
Executable file
713
wa/utils/cpustates.py
Executable file
@ -0,0 +1,713 @@
|
||||
# Copyright 2015-2018 ARM Limited
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import division
|
||||
import os
|
||||
import sys
|
||||
import csv
|
||||
import re
|
||||
import logging
|
||||
from ctypes import c_int32
|
||||
from collections import defaultdict
|
||||
import argparse
|
||||
|
||||
from wa.utils.trace_cmd import TraceCmdParser, trace_has_marker, TRACE_MARKER_START, TRACE_MARKER_STOP
|
||||
|
||||
|
||||
logger = logging.getLogger('cpustates')
|
||||
|
||||
INIT_CPU_FREQ_REGEX = re.compile(r'CPU (?P<cpu>\d+) FREQUENCY: (?P<freq>\d+) kHZ')
|
||||
DEVLIB_CPU_FREQ_REGEX = re.compile(r'cpu_frequency(?:_devlib):\s+state=(?P<freq>\d+)\s+cpu_id=(?P<cpu>\d+)')
|
||||
|
||||
|
||||
class CorePowerTransitionEvent(object):
|
||||
|
||||
kind = 'transition'
|
||||
__slots__ = ['timestamp', 'cpu_id', 'frequency', 'idle_state']
|
||||
|
||||
def __init__(self, timestamp, cpu_id, frequency=None, idle_state=None):
|
||||
if (frequency is None) == (idle_state is None):
|
||||
raise ValueError('Power transition must specify a frequency or an idle_state, but not both.')
|
||||
self.timestamp = timestamp
|
||||
self.cpu_id = cpu_id
|
||||
self.frequency = frequency
|
||||
self.idle_state = idle_state
|
||||
|
||||
def __str__(self):
|
||||
return 'cpu {} @ {} -> freq: {} idle: {}'.format(self.cpu_id, self.timestamp,
|
||||
self.frequency, self.idle_state)
|
||||
|
||||
def __repr__(self):
|
||||
return 'CPTE(c:{} t:{} f:{} i:{})'.format(self.cpu_id, self.timestamp,
|
||||
self.frequency, self.idle_state)
|
||||
|
||||
|
||||
class CorePowerDroppedEvents(object):
|
||||
|
||||
kind = 'dropped_events'
|
||||
__slots__ = ['cpu_id']
|
||||
|
||||
def __init__(self, cpu_id):
|
||||
self.cpu_id = cpu_id
|
||||
|
||||
def __str__(self):
|
||||
return 'DROPPED EVENTS on CPU{}'.format(self.cpu_id)
|
||||
|
||||
__repr__ = __str__
|
||||
|
||||
|
||||
class TraceMarkerEvent(object):
|
||||
|
||||
kind = 'marker'
|
||||
__slots__ = ['name']
|
||||
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
|
||||
def __str__(self):
|
||||
return 'MARKER: {}'.format(self.name)
|
||||
|
||||
|
||||
class CpuPowerState(object):
|
||||
|
||||
__slots__ = ['frequency', 'idle_state']
|
||||
|
||||
@property
|
||||
def is_idling(self):
|
||||
return self.idle_state is not None and self.idle_state >= 0
|
||||
|
||||
@property
|
||||
def is_active(self):
|
||||
return self.idle_state == -1
|
||||
|
||||
def __init__(self, frequency=None, idle_state=None):
|
||||
self.frequency = frequency
|
||||
self.idle_state = idle_state
|
||||
|
||||
def __str__(self):
|
||||
return 'CP(f:{} i:{})'.format(self.frequency, self.idle_state)
|
||||
|
||||
__repr__ = __str__
|
||||
|
||||
|
||||
class SystemPowerState(object):
|
||||
|
||||
__slots__ = ['timestamp', 'cpus']
|
||||
|
||||
@property
|
||||
def num_cores(self):
|
||||
return len(self.cpus)
|
||||
|
||||
def __init__(self, num_cores, no_idle=False):
|
||||
self.timestamp = None
|
||||
self.cpus = []
|
||||
idle_state = -1 if no_idle else None
|
||||
for _ in xrange(num_cores):
|
||||
self.cpus.append(CpuPowerState(idle_state=idle_state))
|
||||
|
||||
def copy(self):
|
||||
new = SystemPowerState(self.num_cores)
|
||||
new.timestamp = self.timestamp
|
||||
for i, c in enumerate(self.cpus):
|
||||
new.cpus[i].frequency = c.frequency
|
||||
new.cpus[i].idle_state = c.idle_state
|
||||
return new
|
||||
|
||||
def __str__(self):
|
||||
return 'SP(t:{} Cs:{})'.format(self.timestamp, self.cpus)
|
||||
|
||||
__repr__ = __str__
|
||||
|
||||
|
||||
class PowerStateProcessor(object):
|
||||
"""
|
||||
This takes a stream of power transition events and yields a timeline stream
|
||||
of system power states.
|
||||
|
||||
"""
|
||||
|
||||
@property
|
||||
def cpu_states(self):
|
||||
return self.power_state.cpus
|
||||
|
||||
@property
|
||||
def current_time(self):
|
||||
return self.power_state.timestamp
|
||||
|
||||
@current_time.setter
|
||||
def current_time(self, value):
|
||||
self.power_state.timestamp = value
|
||||
|
||||
def __init__(self, cpus, wait_for_marker=True, no_idle=None):
|
||||
if no_idle is None:
|
||||
no_idle = True if cpus[0].cpuidle else False
|
||||
self.power_state = SystemPowerState(len(cpus), no_idle=no_idle)
|
||||
self.requested_states = {} # cpu_id -> requeseted state
|
||||
self.wait_for_marker = wait_for_marker
|
||||
self._saw_start_marker = False
|
||||
self._saw_stop_marker = False
|
||||
self.exceptions = []
|
||||
|
||||
self.idle_related_cpus = build_idle_state_map(cpus)
|
||||
|
||||
def process(self, event_stream):
|
||||
for event in event_stream:
|
||||
try:
|
||||
next_state = self.update_power_state(event)
|
||||
if self._saw_start_marker or not self.wait_for_marker:
|
||||
yield next_state
|
||||
if self._saw_stop_marker:
|
||||
break
|
||||
except Exception as e: # pylint: disable=broad-except
|
||||
self.exceptions.append(e)
|
||||
else:
|
||||
if self.wait_for_marker:
|
||||
logger.warning("Did not see a STOP marker in the trace")
|
||||
|
||||
def update_power_state(self, event):
|
||||
"""
|
||||
Update the tracked power state based on the specified event and
|
||||
return updated power state.
|
||||
|
||||
"""
|
||||
if event.kind == 'transition':
|
||||
self._process_transition(event)
|
||||
elif event.kind == 'dropped_events':
|
||||
self._process_dropped_events(event)
|
||||
elif event.kind == 'marker':
|
||||
if event.name == 'START':
|
||||
self._saw_start_marker = True
|
||||
elif event.name == 'STOP':
|
||||
self._saw_stop_marker = True
|
||||
else:
|
||||
raise ValueError('Unexpected event type: {}'.format(event.kind))
|
||||
return self.power_state.copy()
|
||||
|
||||
def _process_transition(self, event):
|
||||
self.current_time = event.timestamp
|
||||
if event.idle_state is None:
|
||||
self.cpu_states[event.cpu_id].frequency = event.frequency
|
||||
else:
|
||||
if event.idle_state == -1:
|
||||
self._process_idle_exit(event)
|
||||
else:
|
||||
self._process_idle_entry(event)
|
||||
|
||||
def _process_dropped_events(self, event):
|
||||
self.cpu_states[event.cpu_id].frequency = None
|
||||
old_idle_state = self.cpu_states[event.cpu_id].idle_state
|
||||
self.cpu_states[event.cpu_id].idle_state = None
|
||||
|
||||
related_ids = self.idle_related_cpus[(event.cpu_id, old_idle_state)]
|
||||
for rid in related_ids:
|
||||
self.cpu_states[rid].idle_state = None
|
||||
|
||||
def _process_idle_entry(self, event):
|
||||
if self.cpu_states[event.cpu_id].is_idling:
|
||||
raise ValueError('Got idle state entry event for an idling core: {}'.format(event))
|
||||
self.requested_states[event.cpu_id] = event.idle_state
|
||||
self._try_transition_to_idle_state(event.cpu_id, event.idle_state)
|
||||
|
||||
def _process_idle_exit(self, event):
|
||||
if self.cpu_states[event.cpu_id].is_active:
|
||||
raise ValueError('Got idle state exit event for an active core: {}'.format(event))
|
||||
self.requested_states.pop(event.cpu_id, None) # remove outstanding request if there is one
|
||||
old_state = self.cpu_states[event.cpu_id].idle_state
|
||||
self.cpu_states[event.cpu_id].idle_state = -1
|
||||
|
||||
related_ids = self.idle_related_cpus[(event.cpu_id, old_state)]
|
||||
if old_state is not None:
|
||||
new_state = old_state - 1
|
||||
for rid in related_ids:
|
||||
if self.cpu_states[rid].idle_state > new_state:
|
||||
self._try_transition_to_idle_state(rid, new_state)
|
||||
|
||||
def _try_transition_to_idle_state(self, cpu_id, idle_state):
|
||||
related_ids = self.idle_related_cpus[(cpu_id, idle_state)]
|
||||
|
||||
# Tristate: True - can transition, False - can't transition,
|
||||
# None - unknown idle state on at least one related cpu
|
||||
transition_check = self._can_enter_state(related_ids, idle_state)
|
||||
|
||||
if transition_check is None:
|
||||
# Unknown state on a related cpu means we're not sure whether we're
|
||||
# entering requested state or a shallower one
|
||||
self.cpu_states[cpu_id].idle_state = None
|
||||
return
|
||||
|
||||
# Keep trying shallower states until all related
|
||||
while not self._can_enter_state(related_ids, idle_state):
|
||||
idle_state -= 1
|
||||
related_ids = self.idle_related_cpus[(cpu_id, idle_state)]
|
||||
|
||||
self.cpu_states[cpu_id].idle_state = idle_state
|
||||
for rid in related_ids:
|
||||
self.cpu_states[rid].idle_state = idle_state
|
||||
|
||||
def _can_enter_state(self, related_ids, state):
|
||||
"""
|
||||
This is a tri-state check. Returns ``True`` if related cpu states allow transition
|
||||
into this state, ``False`` if related cpu states don't allow transition into this
|
||||
state, and ``None`` if at least one of the related cpus is in an unknown state
|
||||
(so the decision of whether a transition is possible cannot be made).
|
||||
|
||||
"""
|
||||
for rid in related_ids:
|
||||
rid_requested_state = self.requested_states.get(rid, None)
|
||||
rid_current_state = self.cpu_states[rid].idle_state
|
||||
if rid_current_state is None:
|
||||
return None
|
||||
if rid_current_state < state:
|
||||
if rid_requested_state is None or rid_requested_state < state:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def stream_cpu_power_transitions(events):
|
||||
for event in events:
|
||||
if event.name == 'cpu_idle':
|
||||
state = c_int32(event.state).value
|
||||
yield CorePowerTransitionEvent(event.timestamp, event.cpu_id, idle_state=state)
|
||||
elif event.name == 'cpu_frequency':
|
||||
yield CorePowerTransitionEvent(event.timestamp, event.cpu_id, frequency=event.state)
|
||||
elif event.name == 'DROPPED EVENTS DETECTED':
|
||||
yield CorePowerDroppedEvents(event.cpu_id)
|
||||
elif event.name == 'print':
|
||||
if TRACE_MARKER_START in event.text:
|
||||
yield TraceMarkerEvent('START')
|
||||
elif TRACE_MARKER_STOP in event.text:
|
||||
yield TraceMarkerEvent('STOP')
|
||||
else:
|
||||
if 'cpu_frequency' in event.text:
|
||||
match = DEVLIB_CPU_FREQ_REGEX.search(event.text)
|
||||
else:
|
||||
match = INIT_CPU_FREQ_REGEX.search(event.text)
|
||||
if match:
|
||||
yield CorePowerTransitionEvent(event.timestamp,
|
||||
int(match.group('cpu')),
|
||||
frequency=int(match.group('freq')))
|
||||
|
||||
|
||||
def gather_core_states(system_state_stream, freq_dependent_idle_states=None): # NOQA
|
||||
if freq_dependent_idle_states is None:
|
||||
freq_dependent_idle_states = []
|
||||
for system_state in system_state_stream:
|
||||
core_states = []
|
||||
for cpu in system_state.cpus:
|
||||
if cpu.idle_state == -1:
|
||||
core_states.append((-1, cpu.frequency))
|
||||
elif cpu.idle_state in freq_dependent_idle_states:
|
||||
if cpu.frequency is not None:
|
||||
core_states.append((cpu.idle_state, cpu.frequency))
|
||||
else:
|
||||
core_states.append((None, None))
|
||||
else:
|
||||
core_states.append((cpu.idle_state, None))
|
||||
yield (system_state.timestamp, core_states)
|
||||
|
||||
|
||||
def record_state_transitions(reporter, stream):
|
||||
for event in stream:
|
||||
if event.kind == 'transition':
|
||||
reporter.record_transition(event)
|
||||
yield event
|
||||
|
||||
|
||||
class PowerStateTransitions(object):
|
||||
|
||||
name = 'transitions-timeline'
|
||||
|
||||
def __init__(self, output_directory):
|
||||
self.filepath = os.path.join(output_directory, 'state-transitions-timeline.csv')
|
||||
self._wfh = open(self.filepath, 'w')
|
||||
self.writer = csv.writer(self._wfh)
|
||||
headers = ['timestamp', 'cpu_id', 'frequency', 'idle_state']
|
||||
self.writer.writerow(headers)
|
||||
|
||||
def update(self, timestamp, core_states): # NOQA
|
||||
# Just recording transitions, not doing anything
|
||||
# with states.
|
||||
pass
|
||||
|
||||
def record_transition(self, transition):
|
||||
row = [transition.timestamp, transition.cpu_id,
|
||||
transition.frequency, transition.idle_state]
|
||||
self.writer.writerow(row)
|
||||
|
||||
def report(self):
|
||||
return self
|
||||
|
||||
def write(self):
|
||||
self._wfh.close()
|
||||
|
||||
|
||||
class PowerStateTimeline(object):
|
||||
|
||||
name = 'state-timeline'
|
||||
|
||||
def __init__(self, output_directory, cpus):
|
||||
self.filepath = os.path.join(output_directory, 'power-state-timeline.csv')
|
||||
self.idle_state_names = {cpu.id: [s.name for s in cpu.cpuidle.states] for cpu in cpus}
|
||||
self._wfh = open(self.filepath, 'w')
|
||||
self.writer = csv.writer(self._wfh)
|
||||
headers = ['ts'] + ['{} CPU{}'.format(cpu.name, cpu.id) for cpu in cpus]
|
||||
self.writer.writerow(headers)
|
||||
|
||||
def update(self, timestamp, core_states): # NOQA
|
||||
row = [timestamp]
|
||||
for cpu_idx, (idle_state, frequency) in enumerate(core_states):
|
||||
if frequency is None:
|
||||
if idle_state == -1:
|
||||
row.append('Running (unknown kHz)')
|
||||
elif idle_state is None:
|
||||
row.append('unknown')
|
||||
else:
|
||||
row.append(self.idle_state_names[cpu_idx][idle_state])
|
||||
else: # frequency is not None
|
||||
if idle_state == -1:
|
||||
row.append(frequency)
|
||||
elif idle_state is None:
|
||||
row.append('unknown')
|
||||
else:
|
||||
row.append('{} ({})'.format(self.idle_state_names[cpu_idx][idle_state],
|
||||
frequency))
|
||||
self.writer.writerow(row)
|
||||
|
||||
def report(self):
|
||||
return self
|
||||
|
||||
def write(self):
|
||||
self._wfh.close()
|
||||
|
||||
|
||||
class ParallelStats(object):
|
||||
|
||||
def __init__(self, output_directory, cpus, use_ratios=False):
|
||||
self.filepath = os.path.join(output_directory, 'parallel-stats.csv')
|
||||
self.clusters = defaultdict(set)
|
||||
self.use_ratios = use_ratios
|
||||
|
||||
clusters = []
|
||||
for cpu in cpus:
|
||||
if cpu.cpufreq.related_cpus not in clusters:
|
||||
clusters.append(cpu.cpufreq.related_cpus)
|
||||
|
||||
for i, clust in enumerate(clusters):
|
||||
self.clusters[i] = set(clust)
|
||||
self.clusters['all'] = set([cpu.id for cpu in cpus])
|
||||
|
||||
self.first_timestamp = None
|
||||
self.last_timestamp = None
|
||||
self.previous_states = None
|
||||
self.parallel_times = defaultdict(lambda: defaultdict(int))
|
||||
self.running_times = defaultdict(int)
|
||||
|
||||
def update(self, timestamp, core_states):
|
||||
if self.last_timestamp is not None:
|
||||
delta = timestamp - self.last_timestamp
|
||||
active_cores = [i for i, c in enumerate(self.previous_states)
|
||||
if c and c[0] == -1]
|
||||
for cluster, cluster_cores in self.clusters.iteritems():
|
||||
clust_active_cores = len(cluster_cores.intersection(active_cores))
|
||||
self.parallel_times[cluster][clust_active_cores] += delta
|
||||
if clust_active_cores:
|
||||
self.running_times[cluster] += delta
|
||||
else: # initial update
|
||||
self.first_timestamp = timestamp
|
||||
|
||||
self.last_timestamp = timestamp
|
||||
self.previous_states = core_states
|
||||
|
||||
def report(self): # NOQA
|
||||
if self.last_timestamp is None:
|
||||
return None
|
||||
|
||||
report = ParallelReport(self.filepath)
|
||||
total_time = self.last_timestamp - self.first_timestamp
|
||||
for cluster in sorted(self.parallel_times):
|
||||
running_time = self.running_times[cluster]
|
||||
for n in xrange(len(self.clusters[cluster]) + 1):
|
||||
time = self.parallel_times[cluster][n]
|
||||
time_pc = time / total_time
|
||||
if not self.use_ratios:
|
||||
time_pc *= 100
|
||||
if n:
|
||||
if running_time:
|
||||
running_time_pc = time / running_time
|
||||
else:
|
||||
running_time_pc = 0
|
||||
if not self.use_ratios:
|
||||
running_time_pc *= 100
|
||||
else:
|
||||
running_time_pc = 0
|
||||
precision = self.use_ratios and 3 or 1
|
||||
fmt = '{{:.{}f}}'.format(precision)
|
||||
report.add([cluster, n,
|
||||
fmt.format(time),
|
||||
fmt.format(time_pc),
|
||||
fmt.format(running_time_pc),
|
||||
])
|
||||
return report
|
||||
|
||||
|
||||
class ParallelReport(object):
|
||||
|
||||
name = 'parallel-stats'
|
||||
|
||||
def __init__(self, filepath):
|
||||
self.filepath = filepath
|
||||
self.values = []
|
||||
|
||||
def add(self, value):
|
||||
self.values.append(value)
|
||||
|
||||
def write(self):
|
||||
with open(self.filepath, 'w') as wfh:
|
||||
writer = csv.writer(wfh)
|
||||
writer.writerow(['cluster', 'number_of_cores', 'total_time', '%time', '%running_time'])
|
||||
writer.writerows(self.values)
|
||||
|
||||
|
||||
class PowerStateStats(object):
|
||||
|
||||
def __init__(self, output_directory, cpus, use_ratios=False):
|
||||
self.filepath = os.path.join(output_directory, 'power-state-stats.csv')
|
||||
self.core_names = [cpu.name for cpu in cpus]
|
||||
self.idle_state_names = {cpu.id: [s.name for s in cpu.cpuidle.states] for cpu in cpus}
|
||||
self.use_ratios = use_ratios
|
||||
self.first_timestamp = None
|
||||
self.last_timestamp = None
|
||||
self.previous_states = None
|
||||
self.cpu_states = defaultdict(lambda: defaultdict(int))
|
||||
|
||||
def update(self, timestamp, core_states): # NOQA
|
||||
if self.last_timestamp is not None:
|
||||
delta = timestamp - self.last_timestamp
|
||||
for cpu, (idle, freq) in enumerate(self.previous_states):
|
||||
if idle == -1:
|
||||
if freq is not None:
|
||||
state = '{:07}KHz'.format(freq)
|
||||
else:
|
||||
state = 'Running (unknown KHz)'
|
||||
elif freq:
|
||||
state = '{}-{:07}KHz'.format(self.idle_state_names[cpu][idle], freq)
|
||||
elif idle is not None:
|
||||
state = self.idle_state_names[cpu][idle]
|
||||
else:
|
||||
state = 'unknown'
|
||||
self.cpu_states[cpu][state] += delta
|
||||
else: # initial update
|
||||
self.first_timestamp = timestamp
|
||||
|
||||
self.last_timestamp = timestamp
|
||||
self.previous_states = core_states
|
||||
|
||||
def report(self):
|
||||
if self.last_timestamp is None:
|
||||
return None
|
||||
total_time = self.last_timestamp - self.first_timestamp
|
||||
state_stats = defaultdict(lambda: [None] * len(self.core_names))
|
||||
|
||||
for cpu, states in self.cpu_states.iteritems():
|
||||
for state in states:
|
||||
time = states[state]
|
||||
time_pc = time / total_time
|
||||
if not self.use_ratios:
|
||||
time_pc *= 100
|
||||
state_stats[state][cpu] = time_pc
|
||||
|
||||
precision = self.use_ratios and 3 or 1
|
||||
return PowerStateStatsReport(self.filepath, state_stats, self.core_names, precision)
|
||||
|
||||
|
||||
class PowerStateStatsReport(object):
|
||||
|
||||
name = 'power-state-stats'
|
||||
|
||||
def __init__(self, filepath, state_stats, core_names, precision=2):
|
||||
self.filepath = filepath
|
||||
self.state_stats = state_stats
|
||||
self.core_names = core_names
|
||||
self.precision = precision
|
||||
|
||||
def write(self):
|
||||
with open(self.filepath, 'w') as wfh:
|
||||
writer = csv.writer(wfh)
|
||||
headers = ['state'] + ['{} CPU{}'.format(c, i)
|
||||
for i, c in enumerate(self.core_names)]
|
||||
writer.writerow(headers)
|
||||
for state in sorted(self.state_stats):
|
||||
stats = self.state_stats[state]
|
||||
fmt = '{{:.{}f}}'.format(self.precision)
|
||||
writer.writerow([state] + [fmt.format(s if s is not None else 0)
|
||||
for s in stats])
|
||||
|
||||
|
||||
class CpuUtilizationTimeline(object):
|
||||
|
||||
name = 'utilization-timeline'
|
||||
|
||||
def __init__(self, output_directory, cpus):
|
||||
self.filepath = os.path.join(output_directory, 'utilization-timeline.csv')
|
||||
self._wfh = open(self.filepath, 'w')
|
||||
self.writer = csv.writer(self._wfh)
|
||||
|
||||
headers = ['ts'] + ['{} CPU{}'.format(cpu.name, cpu.id) for cpu in cpus]
|
||||
self.writer.writerow(headers)
|
||||
self._max_freq_list = [cpu.cpufreq.available_frequencies[-1] for cpu in cpus]
|
||||
|
||||
def update(self, timestamp, core_states): # NOQA
|
||||
row = [timestamp]
|
||||
for core, [idle_state, frequency] in enumerate(core_states):
|
||||
if frequency is not None:
|
||||
frequency /= float(self._max_freq_list[core])
|
||||
row.append(frequency)
|
||||
else:
|
||||
row.append(None)
|
||||
self.writer.writerow(row)
|
||||
|
||||
def report(self):
|
||||
return self
|
||||
|
||||
def write(self):
|
||||
self._wfh.close()
|
||||
|
||||
|
||||
def build_idle_state_map(cpus):
|
||||
idle_state_map = defaultdict(list)
|
||||
for cpu_idx, cpu in enumerate(cpus):
|
||||
related_cpus = set(cpu.cpufreq.related_cpus) - set([cpu_idx])
|
||||
first_cluster_state = cpu.cpuidle.num_states - 1
|
||||
for state_idx, state in enumerate(cpu.cpuidle.states):
|
||||
if state_idx < first_cluster_state:
|
||||
idle_state_map[(cpu_idx, state_idx)] = []
|
||||
else:
|
||||
idle_state_map[(cpu_idx, state_idx)] = list(related_cpus)
|
||||
return idle_state_map
|
||||
|
||||
|
||||
def report_power_stats(trace_file, cpus, output_basedir, use_ratios=False, no_idle=None,
|
||||
split_wfi_states=False):
|
||||
"""
|
||||
Process trace-cmd output to generate timelines and statistics of CPU power
|
||||
state (a.k.a P- and C-state) transitions in the trace.
|
||||
|
||||
The results will be written into a subdirectory called "power-stats" under
|
||||
the specified ``output_basedir``.
|
||||
|
||||
:param trace_file: trace-cmd's text trace to process.
|
||||
:param cpus: A list of ``CpuInfo`` objects describing a target's CPUs.
|
||||
These are typically reported as part of ``TargetInfo`` in
|
||||
WA output.
|
||||
:param output_basedir: Base location for the output. This directory must
|
||||
exist and must not contain a directory of file
|
||||
named ``"power-states"``.
|
||||
:param use_rations: By default, stats will be reported as percentages. Set
|
||||
this to ``True`` to report stats as decimals in the
|
||||
``0 <= value <= 1`` instead.
|
||||
:param no_idle: ``False`` if cpuidle and at least one idle state per CPU are
|
||||
enabled, should be ``True`` otherwise. This influences the
|
||||
assumptions about CPU's initial states. If not explicitly
|
||||
set, the value for this will be guessed based on whether
|
||||
cpuidle states are present in the first ``CpuInfo``.
|
||||
|
||||
|
||||
The output directory will contain the following files:
|
||||
|
||||
power-state-stats.csv
|
||||
Power state residency statistics for each CPU. Shows the percentage of
|
||||
time a CPU has spent in each of its available power states.
|
||||
|
||||
parallel-stats.csv
|
||||
Parallel execution stats for each CPU cluster, and combined stats for
|
||||
the whole system.
|
||||
|
||||
power-state-timeline.csv
|
||||
Timeline of CPU power states. Shows which power state each CPU is in at
|
||||
a point in time.
|
||||
|
||||
state-transitions-timeline.csv
|
||||
Timeline of CPU power state transitions. Each entry shows a CPU's
|
||||
transition from one power state to another.
|
||||
|
||||
utilzation-timeline.csv
|
||||
Timeline of CPU utilizations.
|
||||
|
||||
.. note:: Timeline entries aren't at regular intervals, but at times of
|
||||
power transition events.
|
||||
|
||||
Stats are generated by assembling a pipeline consisting of the following
|
||||
stages:
|
||||
|
||||
1. Parse trace into trace events
|
||||
2. Filter trace events into power state transition events
|
||||
3. Record power state transitions
|
||||
4. Convert transitions into a power states.
|
||||
5. Collapse the power states into timestamped ``(C state, P state)``
|
||||
tuples for each cpu.
|
||||
6. Update reporters/stats generators with cpu states.
|
||||
|
||||
"""
|
||||
output_directory = os.path.join(output_basedir, 'power-states')
|
||||
if not os.path.isdir(output_directory):
|
||||
os.mkdir(output_directory)
|
||||
|
||||
freq_dependent_idle_states = []
|
||||
if split_wfi_states:
|
||||
freq_dependent_idle_states = [0]
|
||||
|
||||
# init trace, processor, and reporters
|
||||
# note: filter_markers is False here, even though we *will* filter by them. The
|
||||
# reason for this is that we want to observe events before the start
|
||||
# marker in order to establish the intial power states.
|
||||
parser = TraceCmdParser(filter_markers=False,
|
||||
events=['cpu_idle', 'cpu_frequency', 'print'])
|
||||
ps_processor = PowerStateProcessor(cpus, wait_for_marker=trace_has_marker(trace_file),
|
||||
no_idle=no_idle)
|
||||
transitions_reporter = PowerStateTransitions(output_directory)
|
||||
reporters = [
|
||||
ParallelStats(output_directory, cpus, use_ratios),
|
||||
PowerStateStats(output_directory, cpus, use_ratios),
|
||||
PowerStateTimeline(output_directory, cpus),
|
||||
CpuUtilizationTimeline(output_directory, cpus),
|
||||
transitions_reporter,
|
||||
]
|
||||
|
||||
# assemble the pipeline
|
||||
event_stream = parser.parse(trace_file)
|
||||
transition_stream = stream_cpu_power_transitions(event_stream)
|
||||
recorded_trans_stream = record_state_transitions(transitions_reporter, transition_stream)
|
||||
power_state_stream = ps_processor.process(recorded_trans_stream)
|
||||
core_state_stream = gather_core_states(power_state_stream, freq_dependent_idle_states)
|
||||
|
||||
# execute the pipeline
|
||||
for timestamp, states in core_state_stream:
|
||||
for reporter in reporters:
|
||||
reporter.update(timestamp, states)
|
||||
|
||||
# report any issues encountered while executing the pipeline
|
||||
if ps_processor.exceptions:
|
||||
logger.warning('There were errors while processing trace:')
|
||||
for e in ps_processor.exceptions:
|
||||
logger.warning(str(e))
|
||||
|
||||
# generate reports
|
||||
reports = {}
|
||||
for reporter in reporters:
|
||||
report = reporter.report()
|
||||
report.write()
|
||||
reports[report.name] = report
|
||||
return reports
|
||||
|
Loading…
x
Reference in New Issue
Block a user