2015-06-03 16:15:44 +01:00
|
|
|
# Copyright 2015 ARM Limited
|
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
#
|
|
|
|
|
|
|
|
from __future__ import division
|
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
import csv
|
2016-02-04 15:33:46 +00:00
|
|
|
import re
|
2015-06-03 16:15:44 +01:00
|
|
|
import logging
|
|
|
|
from ctypes import c_int32
|
|
|
|
from collections import defaultdict
|
|
|
|
import argparse
|
|
|
|
|
2016-02-04 15:33:46 +00:00
|
|
|
from wlauto.utils.trace_cmd import TraceCmdTrace, TRACE_MARKER_START, TRACE_MARKER_STOP
|
2016-06-06 16:07:01 +01:00
|
|
|
from wlauto.exceptions import DeviceError
|
2015-06-03 16:15:44 +01:00
|
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger('power')
|
|
|
|
|
2015-07-20 17:04:09 +01:00
|
|
|
UNKNOWN_FREQUENCY = -1
|
|
|
|
|
2016-02-04 15:33:46 +00:00
|
|
|
INIT_CPU_FREQ_REGEX = re.compile(r'CPU (?P<cpu>\d+) FREQUENCY: (?P<freq>\d+) kHZ')
|
2016-11-29 10:15:40 +00:00
|
|
|
DEVLIB_CPU_FREQ_REGEX = re.compile(r'cpu_frequency(?:_devlib):\s+state=(?P<freq>\d+)\s+cpu_id=(?P<cpu>\d+)')
|
2016-02-04 15:33:46 +00:00
|
|
|
|
2015-06-03 16:15:44 +01:00
|
|
|
|
|
|
|
class CorePowerTransitionEvent(object):
|
|
|
|
|
|
|
|
kind = 'transition'
|
|
|
|
__slots__ = ['timestamp', 'cpu_id', 'frequency', 'idle_state']
|
|
|
|
|
|
|
|
def __init__(self, timestamp, cpu_id, frequency=None, idle_state=None):
|
|
|
|
if (frequency is None) == (idle_state is None):
|
|
|
|
raise ValueError('Power transition must specify a frequency or an idle_state, but not both.')
|
|
|
|
self.timestamp = timestamp
|
|
|
|
self.cpu_id = cpu_id
|
|
|
|
self.frequency = frequency
|
|
|
|
self.idle_state = idle_state
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
return 'cpu {} @ {} -> freq: {} idle: {}'.format(self.cpu_id, self.timestamp,
|
|
|
|
self.frequency, self.idle_state)
|
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
return 'CPTE(c:{} t:{} f:{} i:{})'.format(self.cpu_id, self.timestamp,
|
|
|
|
self.frequency, self.idle_state)
|
|
|
|
|
|
|
|
|
|
|
|
class CorePowerDroppedEvents(object):
|
|
|
|
|
|
|
|
kind = 'dropped_events'
|
|
|
|
__slots__ = ['cpu_id']
|
|
|
|
|
|
|
|
def __init__(self, cpu_id):
|
|
|
|
self.cpu_id = cpu_id
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
return 'DROPPED EVENTS on CPU{}'.format(self.cpu_id)
|
|
|
|
|
|
|
|
__repr__ = __str__
|
|
|
|
|
|
|
|
|
2016-02-04 15:33:46 +00:00
|
|
|
class TraceMarkerEvent(object):
|
|
|
|
|
|
|
|
kind = 'marker'
|
|
|
|
__slots__ = ['name']
|
|
|
|
|
|
|
|
def __init__(self, name):
|
|
|
|
self.name = name
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
return 'MARKER: {}'.format(self.name)
|
|
|
|
|
|
|
|
|
2015-06-03 16:15:44 +01:00
|
|
|
class CpuPowerState(object):
|
|
|
|
|
|
|
|
__slots__ = ['frequency', 'idle_state']
|
|
|
|
|
|
|
|
@property
|
|
|
|
def is_idling(self):
|
|
|
|
return self.idle_state is not None and self.idle_state >= 0
|
|
|
|
|
|
|
|
@property
|
|
|
|
def is_active(self):
|
|
|
|
return self.idle_state == -1
|
|
|
|
|
|
|
|
def __init__(self, frequency=None, idle_state=None):
|
|
|
|
self.frequency = frequency
|
|
|
|
self.idle_state = idle_state
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
return 'CP(f:{} i:{})'.format(self.frequency, self.idle_state)
|
|
|
|
|
|
|
|
__repr__ = __str__
|
|
|
|
|
|
|
|
|
|
|
|
class SystemPowerState(object):
|
|
|
|
|
|
|
|
__slots__ = ['timestamp', 'cpus']
|
|
|
|
|
|
|
|
@property
|
|
|
|
def num_cores(self):
|
|
|
|
return len(self.cpus)
|
|
|
|
|
|
|
|
def __init__(self, num_cores):
|
|
|
|
self.timestamp = None
|
|
|
|
self.cpus = []
|
|
|
|
for _ in xrange(num_cores):
|
|
|
|
self.cpus.append(CpuPowerState())
|
|
|
|
|
|
|
|
def copy(self):
|
|
|
|
new = SystemPowerState(self.num_cores)
|
|
|
|
new.timestamp = self.timestamp
|
|
|
|
for i, c in enumerate(self.cpus):
|
|
|
|
new.cpus[i].frequency = c.frequency
|
|
|
|
new.cpus[i].idle_state = c.idle_state
|
|
|
|
return new
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
return 'SP(t:{} Cs:{})'.format(self.timestamp, self.cpus)
|
|
|
|
|
|
|
|
__repr__ = __str__
|
|
|
|
|
|
|
|
|
|
|
|
class PowerStateProcessor(object):
|
|
|
|
"""
|
|
|
|
This takes a stream of power transition events and yields a timeline stream
|
|
|
|
of system power states.
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
@property
|
|
|
|
def cpu_states(self):
|
|
|
|
return self.power_state.cpus
|
|
|
|
|
|
|
|
@property
|
|
|
|
def current_time(self):
|
|
|
|
return self.power_state.timestamp
|
|
|
|
|
|
|
|
@current_time.setter
|
|
|
|
def current_time(self, value):
|
|
|
|
self.power_state.timestamp = value
|
|
|
|
|
|
|
|
def __init__(self, core_clusters, num_idle_states,
|
2016-02-04 15:33:46 +00:00
|
|
|
first_cluster_state=sys.maxint, first_system_state=sys.maxint,
|
|
|
|
wait_for_start_marker=False):
|
2015-06-03 16:15:44 +01:00
|
|
|
self.power_state = SystemPowerState(len(core_clusters))
|
|
|
|
self.requested_states = defaultdict(lambda: -1) # cpu_id -> requeseted state
|
2016-02-04 15:33:46 +00:00
|
|
|
self.wait_for_start_marker = wait_for_start_marker
|
|
|
|
self._saw_start_marker = False
|
2016-06-06 15:55:58 +01:00
|
|
|
self._saw_stop_marker = False
|
2016-06-06 15:48:47 +01:00
|
|
|
self.exceptions = []
|
2015-06-03 16:15:44 +01:00
|
|
|
|
|
|
|
idle_state_domains = build_idle_domains(core_clusters,
|
|
|
|
num_states=num_idle_states,
|
|
|
|
first_cluster_state=first_cluster_state,
|
|
|
|
first_system_state=first_system_state)
|
|
|
|
# This tells us what other cpus we need to update when we see an idle
|
|
|
|
# state transition event
|
|
|
|
self.idle_related_cpus = defaultdict(list) # (cpu, idle_state) --> relate_cpus_list
|
|
|
|
for state_id, idle_state_domain in enumerate(idle_state_domains):
|
|
|
|
for cpu_group in idle_state_domain:
|
|
|
|
for cpu in cpu_group:
|
|
|
|
related = set(cpu_group) - set([cpu])
|
|
|
|
self.idle_related_cpus[(cpu, state_id)] = related
|
|
|
|
|
|
|
|
def process(self, event_stream):
|
|
|
|
for event in event_stream:
|
2016-06-06 15:48:47 +01:00
|
|
|
try:
|
|
|
|
next_state = self.update_power_state(event)
|
|
|
|
if self._saw_start_marker or not self.wait_for_start_marker:
|
|
|
|
yield next_state
|
2016-06-06 15:55:58 +01:00
|
|
|
if self._saw_stop_marker:
|
|
|
|
break
|
2016-06-06 15:48:47 +01:00
|
|
|
except Exception as e: # pylint: disable=broad-except
|
|
|
|
self.exceptions.append(e)
|
2016-06-06 15:55:58 +01:00
|
|
|
else:
|
|
|
|
if self.wait_for_start_marker:
|
|
|
|
logger.warning("Did not see a STOP marker in the trace")
|
2015-06-03 16:15:44 +01:00
|
|
|
|
|
|
|
def update_power_state(self, event):
|
|
|
|
"""
|
|
|
|
Update the tracked power state based on the specified event and
|
|
|
|
return updated power state.
|
|
|
|
|
|
|
|
"""
|
|
|
|
if event.kind == 'transition':
|
|
|
|
self._process_transition(event)
|
|
|
|
elif event.kind == 'dropped_events':
|
|
|
|
self._process_dropped_events(event)
|
2016-02-04 15:33:46 +00:00
|
|
|
elif event.kind == 'marker':
|
|
|
|
if event.name == 'START':
|
|
|
|
self._saw_start_marker = True
|
|
|
|
elif event.name == 'STOP':
|
2016-06-06 15:55:58 +01:00
|
|
|
self._saw_stop_marker = True
|
2015-06-03 16:15:44 +01:00
|
|
|
else:
|
|
|
|
raise ValueError('Unexpected event type: {}'.format(event.kind))
|
|
|
|
return self.power_state.copy()
|
|
|
|
|
|
|
|
def _process_transition(self, event):
|
|
|
|
self.current_time = event.timestamp
|
|
|
|
if event.idle_state is None:
|
|
|
|
self.cpu_states[event.cpu_id].frequency = event.frequency
|
|
|
|
else:
|
|
|
|
if event.idle_state == -1:
|
|
|
|
self._process_idle_exit(event)
|
|
|
|
else:
|
|
|
|
self._process_idle_entry(event)
|
|
|
|
|
|
|
|
def _process_dropped_events(self, event):
|
|
|
|
self.cpu_states[event.cpu_id].frequency = None
|
|
|
|
old_idle_state = self.cpu_states[event.cpu_id].idle_state
|
|
|
|
self.cpu_states[event.cpu_id].idle_state = None
|
|
|
|
|
|
|
|
related_ids = self.idle_related_cpus[(event.cpu_id, old_idle_state)]
|
|
|
|
for rid in related_ids:
|
|
|
|
self.cpu_states[rid].idle_state = None
|
|
|
|
|
|
|
|
def _process_idle_entry(self, event):
|
|
|
|
if self.cpu_states[event.cpu_id].is_idling:
|
|
|
|
raise ValueError('Got idle state entry event for an idling core: {}'.format(event))
|
|
|
|
self._try_transition_to_idle_state(event.cpu_id, event.idle_state)
|
|
|
|
|
|
|
|
def _process_idle_exit(self, event):
|
|
|
|
if self.cpu_states[event.cpu_id].is_active:
|
|
|
|
raise ValueError('Got idle state exit event for an active core: {}'.format(event))
|
|
|
|
self.requested_states.pop(event.cpu_id, None) # remove outstanding request if there is one
|
|
|
|
old_state = self.cpu_states[event.cpu_id].idle_state
|
|
|
|
self.cpu_states[event.cpu_id].idle_state = -1
|
2015-07-20 17:04:09 +01:00
|
|
|
if self.cpu_states[event.cpu_id].frequency is None:
|
|
|
|
self.cpu_states[event.cpu_id].frequency = UNKNOWN_FREQUENCY
|
2015-06-03 16:15:44 +01:00
|
|
|
|
|
|
|
related_ids = self.idle_related_cpus[(event.cpu_id, old_state)]
|
|
|
|
if old_state is not None:
|
|
|
|
new_state = old_state - 1
|
|
|
|
for rid in related_ids:
|
|
|
|
if self.cpu_states[rid].idle_state > new_state:
|
|
|
|
self._try_transition_to_idle_state(rid, new_state)
|
|
|
|
|
|
|
|
def _try_transition_to_idle_state(self, cpu_id, idle_state):
|
|
|
|
related_ids = self.idle_related_cpus[(cpu_id, idle_state)]
|
|
|
|
idle_state = idle_state
|
|
|
|
|
|
|
|
# Tristate: True - can transition, False - can't transition,
|
|
|
|
# None - unknown idle state on at least one related cpu
|
|
|
|
transition_check = self._can_enter_state(related_ids, idle_state)
|
|
|
|
|
|
|
|
if not transition_check:
|
|
|
|
# If we can't enter an idle state right now, record that we've
|
|
|
|
# requested it, so that we may enter it later (once all related
|
|
|
|
# cpus also want a state at least as deep).
|
|
|
|
self.requested_states[cpu_id] = idle_state
|
|
|
|
|
|
|
|
if transition_check is None:
|
|
|
|
# Unknown state on a related cpu means we're not sure whether we're
|
|
|
|
# entering requested state or a shallower one
|
|
|
|
self.cpu_states[cpu_id].idle_state = None
|
|
|
|
return
|
|
|
|
|
|
|
|
# Keep trying shallower states until all related
|
|
|
|
while not self._can_enter_state(related_ids, idle_state):
|
|
|
|
idle_state -= 1
|
|
|
|
related_ids = self.idle_related_cpus[(cpu_id, idle_state)]
|
|
|
|
|
|
|
|
self.cpu_states[cpu_id].idle_state = idle_state
|
|
|
|
for rid in related_ids:
|
|
|
|
self.cpu_states[rid].idle_state = idle_state
|
|
|
|
if self.requested_states[rid] == idle_state:
|
|
|
|
del self.requested_states[rid] # request satisfied, so remove
|
|
|
|
|
|
|
|
def _can_enter_state(self, related_ids, state):
|
|
|
|
"""
|
|
|
|
This is a tri-state check. Returns ``True`` if related cpu states allow transition
|
|
|
|
into this state, ``False`` if related cpu states don't allow transition into this
|
|
|
|
state, and ``None`` if at least one of the related cpus is in an unknown state
|
|
|
|
(so the decision of whether a transition is possible cannot be made).
|
|
|
|
|
|
|
|
"""
|
|
|
|
for rid in related_ids:
|
|
|
|
rid_requested_state = self.requested_states[rid]
|
|
|
|
rid_current_state = self.cpu_states[rid].idle_state
|
|
|
|
if rid_current_state is None:
|
|
|
|
return None
|
|
|
|
if rid_current_state < state and rid_requested_state < state:
|
|
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
def stream_cpu_power_transitions(events):
|
|
|
|
for event in events:
|
|
|
|
if event.name == 'cpu_idle':
|
|
|
|
state = c_int32(event.state).value
|
|
|
|
yield CorePowerTransitionEvent(event.timestamp, event.cpu_id, idle_state=state)
|
|
|
|
elif event.name == 'cpu_frequency':
|
|
|
|
yield CorePowerTransitionEvent(event.timestamp, event.cpu_id, frequency=event.state)
|
|
|
|
elif event.name == 'DROPPED EVENTS DETECTED':
|
|
|
|
yield CorePowerDroppedEvents(event.cpu_id)
|
2016-02-04 15:33:46 +00:00
|
|
|
elif event.name == 'print':
|
|
|
|
if TRACE_MARKER_START in event.text:
|
|
|
|
yield TraceMarkerEvent('START')
|
|
|
|
elif TRACE_MARKER_STOP in event.text:
|
|
|
|
yield TraceMarkerEvent('STOP')
|
|
|
|
else:
|
2016-11-29 10:15:40 +00:00
|
|
|
if 'cpu_frequency' in event.text:
|
|
|
|
match = DEVLIB_CPU_FREQ_REGEX.search(event.text)
|
|
|
|
else:
|
|
|
|
match = INIT_CPU_FREQ_REGEX.search(event.text)
|
2016-02-04 15:33:46 +00:00
|
|
|
if match:
|
|
|
|
yield CorePowerTransitionEvent(event.timestamp,
|
|
|
|
int(match.group('cpu')),
|
|
|
|
frequency=int(match.group('freq')))
|
2015-06-03 16:15:44 +01:00
|
|
|
|
|
|
|
|
|
|
|
def gather_core_states(system_state_stream, freq_dependent_idle_states=None): # NOQA
|
|
|
|
if freq_dependent_idle_states is None:
|
|
|
|
freq_dependent_idle_states = [0]
|
|
|
|
for system_state in system_state_stream:
|
|
|
|
core_states = []
|
|
|
|
for cpu in system_state.cpus:
|
|
|
|
if cpu.idle_state == -1:
|
|
|
|
core_states.append((-1, cpu.frequency))
|
|
|
|
elif cpu.idle_state in freq_dependent_idle_states:
|
|
|
|
if cpu.frequency is not None:
|
|
|
|
core_states.append((cpu.idle_state, cpu.frequency))
|
|
|
|
else:
|
|
|
|
core_states.append((None, None))
|
|
|
|
else:
|
|
|
|
core_states.append((cpu.idle_state, None))
|
|
|
|
yield (system_state.timestamp, core_states)
|
|
|
|
|
|
|
|
|
2015-06-16 11:04:25 +01:00
|
|
|
class PowerStateTimeline(object):
|
|
|
|
|
|
|
|
def __init__(self, filepath, core_names, idle_state_names):
|
|
|
|
self.filepath = filepath
|
|
|
|
self.idle_state_names = idle_state_names
|
|
|
|
self._wfh = open(filepath, 'w')
|
|
|
|
self.writer = csv.writer(self._wfh)
|
|
|
|
if core_names:
|
|
|
|
headers = ['ts'] + ['{} CPU{}'.format(c, i)
|
|
|
|
for i, c in enumerate(core_names)]
|
|
|
|
self.writer.writerow(headers)
|
|
|
|
|
|
|
|
def update(self, timestamp, core_states): # NOQA
|
|
|
|
row = [timestamp]
|
|
|
|
for idle_state, frequency in core_states:
|
|
|
|
if frequency is None:
|
|
|
|
if idle_state is None or idle_state == -1:
|
|
|
|
row.append(None)
|
|
|
|
else:
|
|
|
|
row.append(self.idle_state_names[idle_state])
|
|
|
|
else: # frequency is not None
|
|
|
|
if idle_state == -1:
|
2015-07-20 17:04:09 +01:00
|
|
|
if frequency == UNKNOWN_FREQUENCY:
|
|
|
|
frequency = 'Running (Unknown Hz)'
|
2015-06-16 11:04:25 +01:00
|
|
|
row.append(frequency)
|
|
|
|
elif idle_state is None:
|
|
|
|
row.append(None)
|
|
|
|
else:
|
2015-07-20 17:04:09 +01:00
|
|
|
if frequency == UNKNOWN_FREQUENCY:
|
|
|
|
frequency = 'Unknown Hz'
|
2015-06-16 11:04:25 +01:00
|
|
|
row.append('{} ({})'.format(self.idle_state_names[idle_state],
|
|
|
|
frequency))
|
|
|
|
self.writer.writerow(row)
|
|
|
|
|
|
|
|
def report(self):
|
|
|
|
self._wfh.close()
|
|
|
|
|
|
|
|
|
2015-06-03 16:15:44 +01:00
|
|
|
class ParallelStats(object):
|
|
|
|
|
|
|
|
def __init__(self, core_clusters, use_ratios=False):
|
|
|
|
self.clusters = defaultdict(set)
|
|
|
|
self.use_ratios = use_ratios
|
|
|
|
for i, clust in enumerate(core_clusters):
|
|
|
|
self.clusters[clust].add(i)
|
|
|
|
self.clusters['all'] = set(range(len(core_clusters)))
|
|
|
|
|
|
|
|
self.first_timestamp = None
|
|
|
|
self.last_timestamp = None
|
|
|
|
self.previous_states = None
|
|
|
|
self.parallel_times = defaultdict(lambda: defaultdict(int))
|
|
|
|
self.running_times = defaultdict(int)
|
|
|
|
|
|
|
|
def update(self, timestamp, core_states):
|
|
|
|
if self.last_timestamp is not None:
|
|
|
|
delta = timestamp - self.last_timestamp
|
|
|
|
active_cores = [i for i, c in enumerate(self.previous_states)
|
|
|
|
if c and c[0] == -1]
|
|
|
|
for cluster, cluster_cores in self.clusters.iteritems():
|
|
|
|
clust_active_cores = len(cluster_cores.intersection(active_cores))
|
|
|
|
self.parallel_times[cluster][clust_active_cores] += delta
|
|
|
|
if clust_active_cores:
|
|
|
|
self.running_times[cluster] += delta
|
|
|
|
else: # initial update
|
|
|
|
self.first_timestamp = timestamp
|
|
|
|
|
|
|
|
self.last_timestamp = timestamp
|
|
|
|
self.previous_states = core_states
|
|
|
|
|
|
|
|
def report(self): # NOQA
|
|
|
|
if self.last_timestamp is None:
|
|
|
|
return None
|
|
|
|
|
|
|
|
report = ParallelReport()
|
|
|
|
total_time = self.last_timestamp - self.first_timestamp
|
|
|
|
for cluster in sorted(self.parallel_times):
|
|
|
|
running_time = self.running_times[cluster]
|
|
|
|
for n in xrange(len(self.clusters[cluster]) + 1):
|
|
|
|
time = self.parallel_times[cluster][n]
|
|
|
|
time_pc = time / total_time
|
|
|
|
if not self.use_ratios:
|
|
|
|
time_pc *= 100
|
|
|
|
if n:
|
2015-06-12 13:02:05 +01:00
|
|
|
if running_time:
|
|
|
|
running_time_pc = time / running_time
|
|
|
|
else:
|
|
|
|
running_time_pc = 0
|
2015-06-03 16:15:44 +01:00
|
|
|
if not self.use_ratios:
|
|
|
|
running_time_pc *= 100
|
|
|
|
else:
|
|
|
|
running_time_pc = 0
|
|
|
|
precision = self.use_ratios and 3 or 1
|
|
|
|
fmt = '{{:.{}f}}'.format(precision)
|
|
|
|
report.add([cluster, n,
|
|
|
|
fmt.format(time),
|
|
|
|
fmt.format(time_pc),
|
|
|
|
fmt.format(running_time_pc),
|
|
|
|
])
|
|
|
|
return report
|
|
|
|
|
|
|
|
|
|
|
|
class ParallelReport(object):
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
self.values = []
|
|
|
|
|
|
|
|
def add(self, value):
|
|
|
|
self.values.append(value)
|
|
|
|
|
|
|
|
def write(self, filepath):
|
|
|
|
with open(filepath, 'w') as wfh:
|
|
|
|
writer = csv.writer(wfh)
|
|
|
|
writer.writerow(['cluster', 'number_of_cores', 'total_time', '%time', '%running_time'])
|
|
|
|
writer.writerows(self.values)
|
|
|
|
|
|
|
|
|
|
|
|
class PowerStateStats(object):
|
|
|
|
|
|
|
|
def __init__(self, core_names, idle_state_names=None, use_ratios=False):
|
|
|
|
self.core_names = core_names
|
|
|
|
self.idle_state_names = idle_state_names
|
|
|
|
self.use_ratios = use_ratios
|
|
|
|
self.first_timestamp = None
|
|
|
|
self.last_timestamp = None
|
|
|
|
self.previous_states = None
|
|
|
|
self.cpu_states = defaultdict(lambda: defaultdict(int))
|
|
|
|
|
|
|
|
def update(self, timestamp, core_states): # NOQA
|
|
|
|
if self.last_timestamp is not None:
|
|
|
|
delta = timestamp - self.last_timestamp
|
|
|
|
for cpu, (idle, freq) in enumerate(self.previous_states):
|
|
|
|
if idle == -1 and freq is not None:
|
|
|
|
state = '{:07}KHz'.format(freq)
|
|
|
|
elif freq:
|
|
|
|
if self.idle_state_names:
|
|
|
|
state = '{}-{:07}KHz'.format(self.idle_state_names[idle], freq)
|
|
|
|
else:
|
|
|
|
state = 'idle{}-{:07}KHz'.format(idle, freq)
|
|
|
|
elif idle not in (None, -1):
|
|
|
|
if self.idle_state_names:
|
|
|
|
state = self.idle_state_names[idle]
|
|
|
|
else:
|
|
|
|
state = 'idle{}'.format(idle)
|
|
|
|
else:
|
|
|
|
state = 'unkown'
|
|
|
|
self.cpu_states[cpu][state] += delta
|
|
|
|
else: # initial update
|
|
|
|
self.first_timestamp = timestamp
|
|
|
|
|
|
|
|
self.last_timestamp = timestamp
|
|
|
|
self.previous_states = core_states
|
|
|
|
|
|
|
|
def report(self):
|
|
|
|
if self.last_timestamp is None:
|
|
|
|
return None
|
|
|
|
total_time = self.last_timestamp - self.first_timestamp
|
|
|
|
state_stats = defaultdict(lambda: [None] * len(self.core_names))
|
|
|
|
|
|
|
|
for cpu, states in self.cpu_states.iteritems():
|
|
|
|
for state in states:
|
|
|
|
time = states[state]
|
|
|
|
time_pc = time / total_time
|
|
|
|
if not self.use_ratios:
|
|
|
|
time_pc *= 100
|
|
|
|
state_stats[state][cpu] = time_pc
|
|
|
|
|
|
|
|
precision = self.use_ratios and 3 or 1
|
|
|
|
return PowerStateStatsReport(state_stats, self.core_names, precision)
|
|
|
|
|
|
|
|
|
|
|
|
class PowerStateStatsReport(object):
|
|
|
|
|
|
|
|
def __init__(self, state_stats, core_names, precision=2):
|
|
|
|
self.state_stats = state_stats
|
|
|
|
self.core_names = core_names
|
|
|
|
self.precision = precision
|
|
|
|
|
|
|
|
def write(self, filepath):
|
|
|
|
with open(filepath, 'w') as wfh:
|
|
|
|
writer = csv.writer(wfh)
|
|
|
|
headers = ['state'] + ['{} CPU{}'.format(c, i)
|
|
|
|
for i, c in enumerate(self.core_names)]
|
|
|
|
writer.writerow(headers)
|
|
|
|
for state in sorted(self.state_stats):
|
|
|
|
stats = self.state_stats[state]
|
|
|
|
fmt = '{{:.{}f}}'.format(self.precision)
|
|
|
|
writer.writerow([state] + [fmt.format(s if s is not None else 0)
|
|
|
|
for s in stats])
|
|
|
|
|
|
|
|
|
2016-04-06 10:22:55 +01:00
|
|
|
class CpuUtilisationTimeline(object):
|
|
|
|
|
|
|
|
def __init__(self, filepath, core_names, max_freq_list):
|
|
|
|
self.filepath = filepath
|
|
|
|
self._wfh = open(filepath, 'w')
|
|
|
|
self.writer = csv.writer(self._wfh)
|
|
|
|
if core_names:
|
|
|
|
headers = ['ts'] + ['{} CPU{}'.format(c, i)
|
|
|
|
for i, c in enumerate(core_names)]
|
|
|
|
self.writer.writerow(headers)
|
|
|
|
self._max_freq_list = max_freq_list
|
|
|
|
|
|
|
|
def update(self, timestamp, core_states): # NOQA
|
|
|
|
row = [timestamp]
|
|
|
|
for core, [idle_state, frequency] in enumerate(core_states):
|
|
|
|
if idle_state == -1:
|
|
|
|
if frequency == UNKNOWN_FREQUENCY:
|
|
|
|
frequency = 0
|
|
|
|
elif idle_state is None:
|
|
|
|
frequency = 0
|
|
|
|
else:
|
|
|
|
frequency = 0
|
|
|
|
if core < len(self._max_freq_list):
|
|
|
|
frequency /= float(self._max_freq_list[core])
|
|
|
|
row.append(frequency)
|
|
|
|
else:
|
|
|
|
logger.warning('Unable to detect max frequency for this core. Cannot log utilisation value')
|
|
|
|
self.writer.writerow(row)
|
|
|
|
|
|
|
|
def report(self):
|
|
|
|
self._wfh.close()
|
|
|
|
|
|
|
|
|
2015-06-03 16:15:44 +01:00
|
|
|
def build_idle_domains(core_clusters, # NOQA
|
|
|
|
num_states,
|
|
|
|
first_cluster_state=None,
|
|
|
|
first_system_state=None):
|
|
|
|
"""
|
|
|
|
Returns a list of idle domain groups (one for each idle state). Each group is a
|
|
|
|
list of domains, and a domain is a list of cpu ids for which that idle state is
|
|
|
|
common. E.g.
|
|
|
|
|
|
|
|
[[[0], [1], [2]], [[0, 1], [2]], [[0, 1, 2]]]
|
|
|
|
|
|
|
|
This defines three idle states for a machine with three cores. The first idle state
|
|
|
|
has three domains with one core in each domain; the second state has two domains,
|
|
|
|
with cores 0 and 1 sharing one domain; the final state has only one domain shared
|
|
|
|
by all cores.
|
|
|
|
|
|
|
|
This mapping created based on the assumptions
|
|
|
|
|
|
|
|
- The device is an SMP or a big.LITTLE-like system with cores in one or
|
|
|
|
more clusters (for SMP systems, all cores are considered to be in a "cluster").
|
|
|
|
- Idle domain correspend to either individual cores, individual custers, or
|
|
|
|
the compute subsystem as a whole.
|
|
|
|
- Cluster states are always deeper (higher index) than core states, and
|
|
|
|
system states are always deeper than cluster states.
|
|
|
|
|
|
|
|
parameters:
|
|
|
|
|
|
|
|
:core_clusters: a list indicating cluster "ID" of the corresponing core, e.g.
|
|
|
|
``[0, 0, 1]`` represents a three-core machines with cores 0
|
|
|
|
and 1 on cluster 0, and core 2 on cluster 1.
|
|
|
|
:num_states: total number of idle states on a device.
|
|
|
|
:first_cluster_state: the ID of the first idle state shared by all cores in a
|
|
|
|
cluster
|
|
|
|
:first_system_state: the ID of the first idle state shared by all cores.
|
|
|
|
|
|
|
|
"""
|
|
|
|
if first_cluster_state is None:
|
|
|
|
first_cluster_state = sys.maxint
|
|
|
|
if first_system_state is None:
|
|
|
|
first_system_state = sys.maxint
|
|
|
|
all_cpus = range(len(core_clusters))
|
|
|
|
cluster_cpus = defaultdict(list)
|
|
|
|
for cpu, cluster in enumerate(core_clusters):
|
|
|
|
cluster_cpus[cluster].append(cpu)
|
|
|
|
cluster_domains = [cluster_cpus[c] for c in sorted(cluster_cpus)]
|
|
|
|
core_domains = [[c] for c in all_cpus]
|
|
|
|
|
|
|
|
idle_state_domains = []
|
|
|
|
for state_id in xrange(num_states):
|
|
|
|
if state_id >= first_system_state:
|
|
|
|
idle_state_domains.append([all_cpus])
|
|
|
|
elif state_id >= first_cluster_state:
|
|
|
|
idle_state_domains.append(cluster_domains)
|
|
|
|
else:
|
|
|
|
idle_state_domains.append(core_domains)
|
|
|
|
|
|
|
|
return idle_state_domains
|
|
|
|
|
|
|
|
|
|
|
|
def report_power_stats(trace_file, idle_state_names, core_names, core_clusters,
|
|
|
|
num_idle_states, first_cluster_state=sys.maxint,
|
2015-06-16 11:04:25 +01:00
|
|
|
first_system_state=sys.maxint, use_ratios=False,
|
2016-06-06 16:07:01 +01:00
|
|
|
timeline_csv_file=None, cpu_utilisation=None,
|
|
|
|
max_freq_list=None, start_marker_handling='error'):
|
|
|
|
# pylint: disable=too-many-locals,too-many-branches
|
|
|
|
trace = TraceCmdTrace(trace_file,
|
|
|
|
filter_markers=False,
|
|
|
|
names=['cpu_idle', 'cpu_frequency', 'print'])
|
|
|
|
|
|
|
|
wait_for_start_marker = True
|
|
|
|
if start_marker_handling == "error" and not trace.has_start_marker:
|
|
|
|
raise DeviceError("Start marker was not found in the trace")
|
|
|
|
elif start_marker_handling == "try":
|
|
|
|
wait_for_start_marker = trace.has_start_marker
|
|
|
|
if not wait_for_start_marker:
|
|
|
|
logger.warning("Did not see a START marker in the trace, "
|
|
|
|
"state residency and parallelism statistics may be inaccurate.")
|
|
|
|
elif start_marker_handling == "ignore":
|
|
|
|
wait_for_start_marker = False
|
|
|
|
|
2015-06-03 16:15:44 +01:00
|
|
|
ps_processor = PowerStateProcessor(core_clusters,
|
|
|
|
num_idle_states=num_idle_states,
|
|
|
|
first_cluster_state=first_cluster_state,
|
2016-02-04 15:33:46 +00:00
|
|
|
first_system_state=first_system_state,
|
2016-06-06 16:07:01 +01:00
|
|
|
wait_for_start_marker=wait_for_start_marker)
|
2015-06-16 11:04:25 +01:00
|
|
|
reporters = [
|
|
|
|
ParallelStats(core_clusters, use_ratios),
|
|
|
|
PowerStateStats(core_names, idle_state_names, use_ratios)
|
|
|
|
]
|
|
|
|
if timeline_csv_file:
|
|
|
|
reporters.append(PowerStateTimeline(timeline_csv_file,
|
|
|
|
core_names, idle_state_names))
|
2016-04-06 10:22:55 +01:00
|
|
|
if cpu_utilisation:
|
|
|
|
if max_freq_list:
|
|
|
|
reporters.append(CpuUtilisationTimeline(cpu_utilisation, core_names, max_freq_list))
|
|
|
|
else:
|
|
|
|
logger.warning('Maximum frequencies not found. Cannot normalise. Skipping CPU Utilisation Timeline')
|
2015-06-03 16:15:44 +01:00
|
|
|
|
2016-06-06 16:07:01 +01:00
|
|
|
event_stream = trace.parse()
|
2015-06-03 16:15:44 +01:00
|
|
|
transition_stream = stream_cpu_power_transitions(event_stream)
|
|
|
|
power_state_stream = ps_processor.process(transition_stream)
|
|
|
|
core_state_stream = gather_core_states(power_state_stream)
|
|
|
|
|
|
|
|
for timestamp, states in core_state_stream:
|
2015-06-16 11:04:25 +01:00
|
|
|
for reporter in reporters:
|
|
|
|
reporter.update(timestamp, states)
|
2015-06-03 16:15:44 +01:00
|
|
|
|
2016-06-06 15:48:47 +01:00
|
|
|
if ps_processor.exceptions:
|
|
|
|
logger.warning('There were errors while processing trace:')
|
|
|
|
for e in ps_processor.exceptions:
|
|
|
|
logger.warning(str(e))
|
|
|
|
|
2015-06-16 11:04:25 +01:00
|
|
|
reports = []
|
|
|
|
for reporter in reporters:
|
|
|
|
report = reporter.report()
|
2016-05-09 10:07:36 +01:00
|
|
|
reports.append(report)
|
2015-06-16 11:04:25 +01:00
|
|
|
return reports
|
2015-06-03 16:15:44 +01:00
|
|
|
|
|
|
|
|
|
|
|
def main():
|
2015-06-16 11:04:25 +01:00
|
|
|
# pylint: disable=unbalanced-tuple-unpacking
|
2016-06-06 15:48:47 +01:00
|
|
|
logging.basicConfig(level=logging.INFO)
|
2015-06-03 16:15:44 +01:00
|
|
|
args = parse_arguments()
|
2016-08-30 17:48:40 +01:00
|
|
|
|
|
|
|
reports = report_power_stats(
|
2015-06-03 16:15:44 +01:00
|
|
|
trace_file=args.infile,
|
|
|
|
idle_state_names=args.idle_state_names,
|
|
|
|
core_names=args.core_names,
|
|
|
|
core_clusters=args.core_clusters,
|
|
|
|
num_idle_states=args.num_idle_states,
|
|
|
|
first_cluster_state=args.first_cluster_state,
|
|
|
|
first_system_state=args.first_system_state,
|
|
|
|
use_ratios=args.ratios,
|
2015-06-16 11:04:25 +01:00
|
|
|
timeline_csv_file=args.timeline_file,
|
2016-04-06 10:22:55 +01:00
|
|
|
cpu_utilisation=args.cpu_utilisation,
|
|
|
|
max_freq_list=args.max_freq_list,
|
2016-06-06 16:07:01 +01:00
|
|
|
start_marker_handling=args.start_marker_handling,
|
2015-06-03 16:15:44 +01:00
|
|
|
)
|
2016-08-30 17:48:40 +01:00
|
|
|
|
|
|
|
parallel_report = reports.pop(0)
|
|
|
|
powerstate_report = reports.pop(0)
|
|
|
|
|
2015-06-03 16:15:44 +01:00
|
|
|
parallel_report.write(os.path.join(args.output_directory, 'parallel.csv'))
|
|
|
|
powerstate_report.write(os.path.join(args.output_directory, 'cpustate.csv'))
|
|
|
|
|
|
|
|
|
|
|
|
class SplitListAction(argparse.Action):
|
|
|
|
|
|
|
|
def __init__(self, option_strings, dest, nargs=None, **kwargs):
|
|
|
|
if nargs is not None:
|
|
|
|
raise ValueError('nargs not allowed')
|
|
|
|
super(SplitListAction, self).__init__(option_strings, dest, **kwargs)
|
|
|
|
|
|
|
|
def __call__(self, parser, namespace, values, option_string=None):
|
|
|
|
setattr(namespace, self.dest, [v.strip() for v in values.split(',')])
|
|
|
|
|
|
|
|
|
|
|
|
def parse_arguments(): # NOQA
|
|
|
|
parser = argparse.ArgumentParser(description="""
|
|
|
|
Produce CPU power activity statistics reports from
|
|
|
|
power trace.
|
|
|
|
""")
|
|
|
|
parser.add_argument('infile', metavar='TRACEFILE', help='''
|
|
|
|
Path to the trace file to parse. This must be in the format generated
|
|
|
|
by "trace-cmd report" command.
|
|
|
|
''')
|
|
|
|
parser.add_argument('-d', '--output-directory', default='.',
|
|
|
|
help='''
|
|
|
|
Output directory where reports will be placed.
|
|
|
|
''')
|
|
|
|
parser.add_argument('-c', '--core-names', action=SplitListAction,
|
|
|
|
help='''
|
|
|
|
Comma-separated list of core names for the device on which the trace
|
|
|
|
was collected.
|
|
|
|
''')
|
|
|
|
parser.add_argument('-C', '--core-clusters', action=SplitListAction, default=[],
|
|
|
|
help='''
|
|
|
|
Comma-separated list of core cluster IDs for the device on which the
|
|
|
|
trace was collected. If not specified, this will be generated from
|
|
|
|
core names on the assumption that all cores with the same name are on the
|
|
|
|
same cluster.
|
|
|
|
''')
|
2015-06-16 11:04:25 +01:00
|
|
|
parser.add_argument('-i', '--idle-state-names', action=SplitListAction,
|
2015-06-03 16:15:44 +01:00
|
|
|
help='''
|
|
|
|
Comma-separated list of idle state names. The number of names must match
|
|
|
|
--num-idle-states if that was explicitly specified.
|
|
|
|
''')
|
|
|
|
parser.add_argument('-n', '--num-idle-states', type=int,
|
|
|
|
help='''
|
|
|
|
number of idle states on the device
|
|
|
|
''')
|
|
|
|
parser.add_argument('-q', '--first-cluster-state', type=int,
|
|
|
|
help='''
|
|
|
|
ID of the first cluster state. Must be < --num-idle-states.
|
|
|
|
''')
|
|
|
|
parser.add_argument('-s', '--first-system-state', type=int,
|
|
|
|
help='''
|
|
|
|
ID of the first system state. Must be < --numb-idle-states, and
|
|
|
|
> --first-cluster-state.
|
|
|
|
''')
|
|
|
|
parser.add_argument('-R', '--ratios', action='store_true',
|
|
|
|
help='''
|
|
|
|
By default proportional values will be reported as percentages, if this
|
|
|
|
flag is enabled, they will be reported as ratios instead.
|
|
|
|
''')
|
2015-06-16 11:04:25 +01:00
|
|
|
parser.add_argument('-t', '--timeline-file', metavar='FILE',
|
|
|
|
help='''
|
|
|
|
A timeline of core power states will be written to the specified file in
|
|
|
|
CSV format.
|
|
|
|
''')
|
2016-04-06 10:22:55 +01:00
|
|
|
parser.add_argument('-u', '--cpu-utilisation', metavar='FILE',
|
|
|
|
help='''
|
|
|
|
A timeline of cpu(s) utilisation will be written to the specified file in
|
|
|
|
CSV format.
|
|
|
|
''')
|
|
|
|
parser.add_argument('-m', '--max-freq-list', action=SplitListAction, default=[],
|
|
|
|
help='''
|
|
|
|
Comma-separated list of core maximum frequencies for the device on which
|
|
|
|
the trace was collected.
|
|
|
|
Only required if --cpu-utilisation is set.
|
|
|
|
This is used to normalise the frequencies to obtain percentage utilisation.
|
|
|
|
''')
|
2016-06-06 16:07:01 +01:00
|
|
|
parser.add_argument('-M', '--start-marker-handling', metavar='HANDLING', default="try",
|
|
|
|
choices=["error", "try", "ignore"],
|
|
|
|
help='''
|
|
|
|
The trace-cmd instrument inserts a marker into the trace to indicate the beginning
|
|
|
|
of workload execution. In some cases, this marker may be missing in the final
|
|
|
|
output (e.g. due to trace buffer overrun). This parameter specifies how a missing
|
|
|
|
start marker will be handled:
|
|
|
|
|
|
|
|
ignore: The start marker will be ignored. All events in the trace will be used.
|
|
|
|
error: An error will be raised if the start marker is not found in the trace.
|
|
|
|
try: If the start marker is not found, all events in the trace will be used.
|
|
|
|
''')
|
2015-06-03 16:15:44 +01:00
|
|
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
if not args.core_names:
|
|
|
|
raise ValueError('core names must be specified using -c or --core-names')
|
|
|
|
if not args.core_clusters:
|
|
|
|
logger.debug('core clusters not specified, inferring from core names')
|
|
|
|
core_cluster_map = {}
|
|
|
|
core_clusters = []
|
|
|
|
current_cluster = 0
|
|
|
|
for cn in args.core_names:
|
|
|
|
if cn not in core_cluster_map:
|
|
|
|
core_cluster_map[cn] = current_cluster
|
|
|
|
current_cluster += 1
|
|
|
|
core_clusters.append(core_cluster_map[cn])
|
|
|
|
args.core_clusters = core_clusters
|
|
|
|
if not args.num_idle_states and args.idle_state_names:
|
|
|
|
args.num_idle_states = len(args.idle_state_names)
|
|
|
|
elif args.num_idle_states and not args.idle_state_names:
|
|
|
|
args.idle_state_names = ['idle{}'.format(i) for i in xrange(args.num_idle_states)]
|
|
|
|
elif args.num_idle_states and args.idle_state_names:
|
|
|
|
if len(args.idle_state_names) != args.num_idle_states:
|
|
|
|
raise ValueError('Number of idle state names does not match --num-idle-states')
|
|
|
|
else:
|
|
|
|
raise ValueError('Either --num-idle-states or --idle-state-names must be specified')
|
|
|
|
|
|
|
|
if not args.first_cluster_state and len(set(args.core_clusters)) > 1:
|
|
|
|
if args.first_system_state:
|
|
|
|
logger.debug('First cluster idle state not specified; state previous to first system state')
|
|
|
|
args.first_cluster_state = args.first_system_state - 1
|
|
|
|
else:
|
|
|
|
logger.debug('First cluster idle state not specified; assuming last available state')
|
|
|
|
args.first_cluster_state = args.num_idle_states - 1
|
|
|
|
|
|
|
|
return args
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|