1
0
mirror of https://github.com/ARM-software/workload-automation.git synced 2025-01-19 12:24:32 +00:00

cpustate: ensure known initial state

cpustate result processor generates a view of the cpu subsystem power
state during execution of a workload from cpu_idle and cpu_frequency
ftraces. There exists a period before the first events in those
categories are seen where the state of the cpu subsystem is (partially)
unknown and it is reported as such by the result processor.

Unknown state usually exists for a relatively short period of time and
is generally not a big deal. For certain kinds of workloads, however, it
may constitude a significant protion of the trace.

Changes in this commit attempt to deal with this by a) reading starting
cpu frequencies and writing them into the trace, and b) nudging each
core to bring it out of idle; this happens before the start marker, so
that the system state between the markers should be completely known.
This commit is contained in:
Sergei Trofimov 2016-02-04 15:33:46 +00:00
parent 5e3cc8fcb5
commit 3a99a284c4
2 changed files with 73 additions and 8 deletions

View File

@ -18,9 +18,11 @@ import csv
from collections import OrderedDict from collections import OrderedDict
from wlauto import ResultProcessor, Parameter from wlauto import ResultProcessor, Parameter
from wlauto.core import signal
from wlauto.exceptions import ConfigError from wlauto.exceptions import ConfigError
from wlauto.instrumentation import instrument_is_installed from wlauto.instrumentation import instrument_is_installed
from wlauto.utils.power import report_power_stats from wlauto.utils.power import report_power_stats
from wlauto.utils.misc import unique
class CpuStatesProcessor(ResultProcessor): class CpuStatesProcessor(ResultProcessor):
@ -115,8 +117,10 @@ class CpuStatesProcessor(ResultProcessor):
def initialize(self, context): def initialize(self, context):
# pylint: disable=attribute-defined-outside-init # pylint: disable=attribute-defined-outside-init
device = context.device device = context.device
if not device.has('cpuidle'): for modname in ['cpuidle', 'cpufreq']:
raise ConfigError('Device does not appear to have cpuidle capability; is the right module installed?') if not device.has(modname):
message = 'Device does not appear to have {} capability; is the right module installed?'
raise ConfigError(message.format(modname))
if not device.core_names: if not device.core_names:
message = '{} requires"core_names" and "core_clusters" to be specified for the device.' message = '{} requires"core_names" and "core_clusters" to be specified for the device.'
raise ConfigError(message.format(self.name)) raise ConfigError(message.format(self.name))
@ -126,6 +130,30 @@ class CpuStatesProcessor(ResultProcessor):
self.idle_state_names = [idle_states[i] for i in sorted(idle_states.keys())] self.idle_state_names = [idle_states[i] for i in sorted(idle_states.keys())]
self.num_idle_states = len(self.idle_state_names) self.num_idle_states = len(self.idle_state_names)
self.iteration_reports = OrderedDict() self.iteration_reports = OrderedDict()
# priority -19: just higher than the slow_start of instrumentation
signal.connect(self.set_initial_state, signal.BEFORE_WORKLOAD_EXECUTION, priority=-19)
def set_initial_state(self, context):
# TODO: this does not play well with hotplug but leaving as-is, as this will be changed with
# the devilib port anyway.
# Write initial frequencies into the trace.
# NOTE: this assumes per-cluster DVFS, that is valid for devices that
# currently exist. This will need to be updated for per-CPU DFS.
self.logger.debug('Writing initial frequencies into trace...')
device = context.device
cluster_freqs = {}
for c in unique(device.core_clusters):
cluster_freqs[c] = device.get_cluster_cur_frequency(c)
for i, c in enumerate(device.core_clusters):
entry = 'CPU {} FREQUENCY: {} kHZ'.format(i, cluster_freqs[c])
device.set_sysfile_value('/sys/kernel/debug/tracing/trace_marker',
entry, verify=False)
# Nudge each cpu to force idle state transitions in the trace
self.logger.debug('Nudging all cores awake...')
for i in xrange(len(device.core_names)):
command = device.busybox + ' taskset 0x{:x} {}'
device.execute(command.format(1 << i, 'ls'))
def process_iteration_result(self, result, context): def process_iteration_result(self, result, context):
trace = context.get_artifact('txttrace') trace = context.get_artifact('txttrace')

View File

@ -17,18 +17,21 @@ from __future__ import division
import os import os
import sys import sys
import csv import csv
import re
import logging import logging
from ctypes import c_int32 from ctypes import c_int32
from collections import defaultdict from collections import defaultdict
import argparse import argparse
from wlauto.utils.trace_cmd import TraceCmdTrace from wlauto.utils.trace_cmd import TraceCmdTrace, TRACE_MARKER_START, TRACE_MARKER_STOP
logger = logging.getLogger('power') logger = logging.getLogger('power')
UNKNOWN_FREQUENCY = -1 UNKNOWN_FREQUENCY = -1
INIT_CPU_FREQ_REGEX = re.compile(r'CPU (?P<cpu>\d+) FREQUENCY: (?P<freq>\d+) kHZ')
class CorePowerTransitionEvent(object): class CorePowerTransitionEvent(object):
@ -66,6 +69,18 @@ class CorePowerDroppedEvents(object):
__repr__ = __str__ __repr__ = __str__
class TraceMarkerEvent(object):
kind = 'marker'
__slots__ = ['name']
def __init__(self, name):
self.name = name
def __str__(self):
return 'MARKER: {}'.format(self.name)
class CpuPowerState(object): class CpuPowerState(object):
__slots__ = ['frequency', 'idle_state'] __slots__ = ['frequency', 'idle_state']
@ -136,9 +151,12 @@ class PowerStateProcessor(object):
self.power_state.timestamp = value self.power_state.timestamp = value
def __init__(self, core_clusters, num_idle_states, def __init__(self, core_clusters, num_idle_states,
first_cluster_state=sys.maxint, first_system_state=sys.maxint): first_cluster_state=sys.maxint, first_system_state=sys.maxint,
wait_for_start_marker=False):
self.power_state = SystemPowerState(len(core_clusters)) self.power_state = SystemPowerState(len(core_clusters))
self.requested_states = defaultdict(lambda: -1) # cpu_id -> requeseted state self.requested_states = defaultdict(lambda: -1) # cpu_id -> requeseted state
self.wait_for_start_marker = wait_for_start_marker
self._saw_start_marker = False
idle_state_domains = build_idle_domains(core_clusters, idle_state_domains = build_idle_domains(core_clusters,
num_states=num_idle_states, num_states=num_idle_states,
@ -155,7 +173,9 @@ class PowerStateProcessor(object):
def process(self, event_stream): def process(self, event_stream):
for event in event_stream: for event in event_stream:
yield self.update_power_state(event) next_state = self.update_power_state(event)
if self._saw_start_marker or not self.wait_for_start_marker:
yield next_state
def update_power_state(self, event): def update_power_state(self, event):
""" """
@ -167,6 +187,11 @@ class PowerStateProcessor(object):
self._process_transition(event) self._process_transition(event)
elif event.kind == 'dropped_events': elif event.kind == 'dropped_events':
self._process_dropped_events(event) self._process_dropped_events(event)
elif event.kind == 'marker':
if event.name == 'START':
self._saw_start_marker = True
elif event.name == 'STOP':
self._saw_start_marker = False
else: else:
raise ValueError('Unexpected event type: {}'.format(event.kind)) raise ValueError('Unexpected event type: {}'.format(event.kind))
return self.power_state.copy() return self.power_state.copy()
@ -269,6 +294,17 @@ def stream_cpu_power_transitions(events):
yield CorePowerTransitionEvent(event.timestamp, event.cpu_id, frequency=event.state) yield CorePowerTransitionEvent(event.timestamp, event.cpu_id, frequency=event.state)
elif event.name == 'DROPPED EVENTS DETECTED': elif event.name == 'DROPPED EVENTS DETECTED':
yield CorePowerDroppedEvents(event.cpu_id) yield CorePowerDroppedEvents(event.cpu_id)
elif event.name == 'print':
if TRACE_MARKER_START in event.text:
yield TraceMarkerEvent('START')
elif TRACE_MARKER_STOP in event.text:
yield TraceMarkerEvent('STOP')
else:
match = INIT_CPU_FREQ_REGEX.search(event.text)
if match:
yield CorePowerTransitionEvent(event.timestamp,
int(match.group('cpu')),
frequency=int(match.group('freq')))
def gather_core_states(system_state_stream, freq_dependent_idle_states=None): # NOQA def gather_core_states(system_state_stream, freq_dependent_idle_states=None): # NOQA
@ -541,13 +577,14 @@ def build_idle_domains(core_clusters, # NOQA
def report_power_stats(trace_file, idle_state_names, core_names, core_clusters, def report_power_stats(trace_file, idle_state_names, core_names, core_clusters,
num_idle_states, first_cluster_state=sys.maxint, num_idle_states, first_cluster_state=sys.maxint,
first_system_state=sys.maxint, use_ratios=False, first_system_state=sys.maxint, use_ratios=False,
timeline_csv_file=None, filter_trace=True): timeline_csv_file=None, filter_trace=False):
# pylint: disable=too-many-locals # pylint: disable=too-many-locals
trace = TraceCmdTrace(filter_markers=filter_trace) trace = TraceCmdTrace(filter_markers=filter_trace)
ps_processor = PowerStateProcessor(core_clusters, ps_processor = PowerStateProcessor(core_clusters,
num_idle_states=num_idle_states, num_idle_states=num_idle_states,
first_cluster_state=first_cluster_state, first_cluster_state=first_cluster_state,
first_system_state=first_system_state) first_system_state=first_system_state,
wait_for_start_marker=not filter_trace)
reporters = [ reporters = [
ParallelStats(core_clusters, use_ratios), ParallelStats(core_clusters, use_ratios),
PowerStateStats(core_names, idle_state_names, use_ratios) PowerStateStats(core_names, idle_state_names, use_ratios)
@ -556,7 +593,7 @@ def report_power_stats(trace_file, idle_state_names, core_names, core_clusters,
reporters.append(PowerStateTimeline(timeline_csv_file, reporters.append(PowerStateTimeline(timeline_csv_file,
core_names, idle_state_names)) core_names, idle_state_names))
event_stream = trace.parse(trace_file, names=['cpu_idle', 'cpu_frequency']) event_stream = trace.parse(trace_file, names=['cpu_idle', 'cpu_frequency', 'print'])
transition_stream = stream_cpu_power_transitions(event_stream) transition_stream = stream_cpu_power_transitions(event_stream)
power_state_stream = ps_processor.process(transition_stream) power_state_stream = ps_processor.process(transition_stream)
core_state_stream = gather_core_states(power_state_stream) core_state_stream = gather_core_states(power_state_stream)