From 311c4e419fe22d7b6953e08ba0dbdfb6924a2728 Mon Sep 17 00:00:00 2001 From: Michael McGeagh Date: Tue, 1 Nov 2016 17:36:17 +0000 Subject: [PATCH] FPS. Added gfxinfo methods of obtaining fps stats. Auto detects and uses appropriate method via android version of device Output 90th, 95th and 99th percentile metrics. This happens for both SurfaceFlinger and Gfxinfo methods. --- wlauto/instrumentation/fps/__init__.py | 172 ++++++++++++++++++++----- wlauto/utils/fps.py | 112 ++++++++++++---- 2 files changed, 228 insertions(+), 56 deletions(-) mode change 100644 => 100755 wlauto/utils/fps.py diff --git a/wlauto/instrumentation/fps/__init__.py b/wlauto/instrumentation/fps/__init__.py index 6316eadd..ea89e6af 100755 --- a/wlauto/instrumentation/fps/__init__.py +++ b/wlauto/instrumentation/fps/__init__.py @@ -24,6 +24,8 @@ import shutil import threading import errno import tempfile +import collections +import re from distutils.version import LooseVersion @@ -37,7 +39,7 @@ from wlauto.instrumentation import instrument_is_installed from wlauto.exceptions import (InstrumentError, WorkerThreadError, ConfigError, DeviceNotRespondingError, TimeoutError) from wlauto.utils.types import boolean, numeric -from wlauto.utils.fps import FpsProcessor +from wlauto.utils.fps import FpsProcessor, SurfaceFlingerFrame, GfxInfoFrame, GFXINFO_EXEMPT VSYNC_INTERVAL = 16666667 @@ -49,20 +51,28 @@ class FpsInstrument(Instrument): name = 'fps' description = """ - Measures Frames Per Second (FPS) and associated metrics for a workload's main View. + Measures Frames Per Second (FPS) and associated metrics for a workload. .. note:: This instrument depends on pandas Python library (which is not part of standard WA dependencies), so you will need to install that first, before you can use it. + Android L and below use SurfaceFlinger to calculate the FPS data. + Android M and above use gfxinfo to calculate the FPS data. + + SurfaceFlinger: The view is specified by the workload as ``view`` attribute. This defaults to ``'SurfaceView'`` for game workloads, and ``None`` for non-game workloads (as for them FPS mesurement usually doesn't make sense). Individual workloads may override this. + gfxinfo: + The view is specified by the workload as ``package`` attribute. + This is because gfxinfo already processes for all views in a package. + This instrument adds four metrics to the results: :FPS: Frames Per Second. This is the frame rate of the workload. - :frames: The total number of frames rendered during the execution of + :frame_count: The total number of frames rendered during the execution of the workload. :janks: The number of "janks" that occured during execution of the workload. Janks are sudden shifts in frame rate. They result @@ -125,14 +135,13 @@ class FpsInstrument(Instrument): """), ] - clear_command = 'dumpsys SurfaceFlinger --latency-clear ' - def __init__(self, device, **kwargs): super(FpsInstrument, self).__init__(device, **kwargs) self.collector = None self.outfile = None self.fps_outfile = None self.is_enabled = True + self.fps_method = '' def validate(self): if not pd or LooseVersion(pd.__version__) < LooseVersion('0.13.1'): @@ -147,29 +156,56 @@ class FpsInstrument(Instrument): if hasattr(workload, 'view'): self.fps_outfile = os.path.join(context.output_directory, 'fps.csv') self.outfile = os.path.join(context.output_directory, 'frames.csv') - self.collector = LatencyCollector(self.outfile, self.device, workload.view or '', - self.keep_raw, self.logger, self.dumpsys_period) - self.device.execute(self.clear_command) + # Android M brings a new method of collecting FPS data + if self.device.get_sdk_version() >= 23: + # gfxinfo takes in the package name rather than a single view/activity + # so there is no 'list_command' to run and compare against a list of + # views/activities. Additionally, clearing the stats requires the package + # so we need to clear for every package in the workload. + # Usually there is only one package, but some workloads may run multiple + # packages so each one must be reset before continuing + self.fps_method = 'gfxinfo' + runcmd = 'dumpsys gfxinfo {} framestats' + lstcmd = None + params = workload.package + params = [params] if isinstance(params, basestring) else params + for pkg in params: + self.device.execute('dumpsys gfxinfo {} reset'.format(pkg)) + else: + self.fps_method = 'surfaceflinger' + runcmd = 'dumpsys SurfaceFlinger --latency {}' + lstcmd = 'dumpsys SurfaceFlinger --list' + params = workload.view + self.device.execute('dumpsys SurfaceFlinger --latency-clear ') + + self.collector = LatencyCollector(self.outfile, self.device, params or '', + self.keep_raw, self.logger, self.dumpsys_period, + runcmd, lstcmd, self.fps_method) else: self.logger.debug('Workload does not contain a view; disabling...') self.is_enabled = False def start(self, context): if self.is_enabled: - self.logger.debug('Starting SurfaceFlinger collection...') + self.logger.debug('Starting Frame Statistics collection...') self.collector.start() def stop(self, context): if self.is_enabled and self.collector.is_alive(): - self.logger.debug('Stopping SurfaceFlinger collection...') + self.logger.debug('Stopping Frame Statistics collection...') self.collector.stop() def update_result(self, context): if self.is_enabled: fps, frame_count, janks, not_at_vsync = float('nan'), 0, 0, 0 + p90, p95, p99 = [float('nan')] * 3 data = pd.read_csv(self.outfile) if not data.empty: # pylint: disable=maybe-no-member - fp = FpsProcessor(data) + # gfxinfo method has an additional file generated that contains statistics + stats_file = None + if self.fps_method == 'gfxinfo': + stats_file = os.path.join(os.path.dirname(self.outfile), 'gfxinfo.csv') + fp = FpsProcessor(data, extra_data=stats_file) per_frame_fps, metrics = fp.process(self.collector.refresh_period, self.drop_threshold) fps, frame_count, janks, not_at_vsync = metrics @@ -177,10 +213,15 @@ class FpsInstrument(Instrument): per_frame_fps.to_csv(self.fps_outfile, index=False, header=True) context.add_artifact('fps', path='fps.csv', kind='data') + p90, p95, p99 = fp.percentiles() + context.result.add_metric('FPS', fps) context.result.add_metric('frame_count', frame_count) - context.result.add_metric('janks', janks) - context.result.add_metric('not_at_vsync', not_at_vsync) + context.result.add_metric('janks', janks, lower_is_better=True) + context.result.add_metric('not_at_vsync', not_at_vsync, lower_is_better=True) + context.result.add_metric('frame_time_90percentile', p90, 'ms', lower_is_better=True) + context.result.add_metric('frame_time_95percentile', p95, 'ms', lower_is_better=True) + context.result.add_metric('frame_time_99percentile', p99, 'ms', lower_is_better=True) def slow_update_result(self, context): result = context.result @@ -200,17 +241,15 @@ class FpsInstrument(Instrument): class LatencyCollector(threading.Thread): - # Note: the size of the frames buffer for a particular surface is defined # by NUM_FRAME_RECORDS inside android/services/surfaceflinger/FrameTracker.h. # At the time of writing, this was hard-coded to 128. So at 60 fps # (and there is no reason to go above that, as it matches vsync rate # on pretty much all phones), there is just over 2 seconds' worth of # frames in there. Hence the default sleep time of 2 seconds between dumps. - #command_template = 'while (true); do dumpsys SurfaceFlinger --latency {}; sleep 2; done' - command_template = 'dumpsys SurfaceFlinger --latency {}' - def __init__(self, outfile, device, activities, keep_raw, logger, dumpsys_period): + def __init__(self, outfile, device, activities, keep_raw, logger, dumpsys_period, + run_command, list_command, fps_method): super(LatencyCollector, self).__init__() self.outfile = outfile self.device = device @@ -227,17 +266,47 @@ class LatencyCollector(threading.Thread): if isinstance(activities, basestring): activities = [activities] self.activities = activities + self.command_template = run_command + self.list_command = list_command + self.fps_method = fps_method + # Based on the fps_method, setup the header for the csv, + # and set the process_trace_line function accordingly + if fps_method == 'surfaceflinger': + self.header = SurfaceFlingerFrame._fields + self.process_trace_line = self._process_surfaceflinger_line + else: + self.header = GfxInfoFrame._fields + self.process_trace_line = self._process_gfxinfo_line + self.re_frame = re.compile('[0-9,]+') + self.re_stats = re.compile('.*(percentile|frames|Number).*') + # Create a template summary text block that matches what gfxinfo gives after a reset + # - 133 is the default ms value for percentiles after reset + self.summary = collections.OrderedDict((('Total frames rendered', 0), + ('Janky frames', 0), + ('90th percentile', 133), + ('95th percentile', 133), + ('99th percentile', 133), + ('Number Missed Vsync', 0), + ('Number High input latency', 0), + ('Number Slow UI thread', 0), + ('Number Slow bitmap uploads', 0), + ('Number Slow issue draw commands', 0))) def run(self): try: - self.logger.debug('SurfaceFlinger collection started.') + self.logger.debug('Frame Statistics collection started. Method: ' + self.fps_method) self.stop_signal.clear() fd, temp_file = tempfile.mkstemp() self.logger.debug('temp file: {}'.format(temp_file)) wfh = os.fdopen(fd, 'wb') try: + view_list = self.activities while not self.stop_signal.is_set(): - view_list = self.device.execute('dumpsys SurfaceFlinger --list').split() + # If a list_command is provided, set the view_list to be its output + # Then check for each activity in this list and if there is a match, + # process the output. If no command is provided, then always process. + if self.list_command: + view_list = self.device.execute(self.list_command).split() for activity in self.activities: if activity in view_list: wfh.write(self.device.execute(self.command_template.format(activity))) @@ -250,9 +319,9 @@ class LatencyCollector(threading.Thread): for line in text.split('\n'): line = line.strip() if line: - self._process_trace_line(line) + self.process_trace_line(line) if self.keep_raw: - raw_file = os.path.join(os.path.dirname(self.outfile), 'surfaceflinger.raw') + raw_file = os.path.join(os.path.dirname(self.outfile), self.fps_method + '.raw') shutil.copy(temp_file, raw_file) os.unlink(temp_file) except (DeviceNotRespondingError, TimeoutError): # pylint: disable=W0703 @@ -260,42 +329,81 @@ class LatencyCollector(threading.Thread): except Exception, e: # pylint: disable=W0703 self.logger.warning('Exception on collector thread: {}({})'.format(e.__class__.__name__, e)) self.exc = WorkerThreadError(self.name, sys.exc_info()) - self.logger.debug('SurfaceFlinger collection stopped.') + self.logger.debug('Frame Statistics collection stopped.') with open(self.outfile, 'w') as wfh: writer = csv.writer(wfh) - writer.writerow(['desired_present_time', 'actual_present_time', 'frame_ready_time']) + writer.writerow(self.header) writer.writerows(self.frames) self.logger.debug('Frames data written.') + # gfxinfo outputs its own summary statistics for the run. + # No point calculating those from the raw data, so store in its own file for later use. + if self.fps_method == 'gfxinfo': + stats_file = os.path.join(os.path.dirname(self.outfile), 'gfxinfo.csv') + with open(stats_file, 'w') as wfh: + writer = csv.writer(wfh) + writer.writerows(zip(self.summary.keys(), self.summary.values())) + self.logger.debug('Gfxinfo summary data written.') + def stop(self): self.stop_signal.set() self.join() if self.unresponsive_count: - message = 'SurfaceFlinger was unrepsonsive {} times.'.format(self.unresponsive_count) + message = 'LatencyCollector was unrepsonsive {} times.'.format(self.unresponsive_count) if self.unresponsive_count > 10: self.logger.warning(message) else: self.logger.debug(message) if self.exc: raise self.exc # pylint: disable=E0702 - self.logger.debug('FSP collection complete.') + self.logger.debug('Frame Statistics complete.') - def _process_trace_line(self, line): + def _process_surfaceflinger_line(self, line): parts = line.split() if len(parts) == 3: - desired_present_time, actual_present_time, frame_ready_time = map(int, parts) - if frame_ready_time <= self.last_ready_time: + frame = SurfaceFlingerFrame(*map(int, parts)) + if frame.frame_ready_time <= self.last_ready_time: return # duplicate frame - if (frame_ready_time - desired_present_time) > self.drop_threshold: + if (frame.frame_ready_time - frame.desired_present_time) > self.drop_threshold: self.logger.debug('Dropping bogus frame {}.'.format(line)) return # bogus data - self.last_ready_time = frame_ready_time - self.frames.append((desired_present_time, actual_present_time, frame_ready_time)) + self.last_ready_time = frame.frame_ready_time + self.frames.append(frame) elif len(parts) == 1: self.refresh_period = int(parts[0]) - self.drop_threshold = self.refresh_period * 10 + self.drop_threshold = self.refresh_period * 1000 elif 'SurfaceFlinger appears to be unresponsive, dumping anyways' in line: self.unresponsive_count += 1 else: self.logger.warning('Unexpected SurfaceFlinger dump output: {}'.format(line)) + + def _process_gfxinfo_line(self, line): + if 'No process found for' in line: + self.unresponsive_count += 1 + return + # Process lines related to the frame data + match = self.re_frame.match(line) + if match: + data = match.group(0)[:-1] + data = map(int, data.split(',')) + frame = GfxInfoFrame(*data) + if frame not in self.frames: + if frame.Flags & GFXINFO_EXEMPT: + self.logger.debug('Dropping exempt frame {}.'.format(line)) + else: + self.frames.append(frame) + return + # Process lines related to the summary statistics + match = self.re_stats.match(line) + if match: + data = match.group(0) + title, value = data.split(':', 1) + title = title.strip() + value = value.strip() + if title in self.summary: + if 'ms' in value: + value = value.strip('ms') + if '%' in value: + value = value.split()[0] + self.summary[title] = int(value) diff --git a/wlauto/utils/fps.py b/wlauto/utils/fps.py old mode 100644 new mode 100755 index 0b3c2d97..b25083db --- a/wlauto/utils/fps.py +++ b/wlauto/utils/fps.py @@ -12,6 +12,22 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import collections + +try: + import pandas as pd +except ImportError: + pd = None + +SurfaceFlingerFrame = collections.namedtuple('SurfaceFlingerFrame', 'desired_present_time actual_present_time frame_ready_time') +GfxInfoFrame = collections.namedtuple('GfxInfoFrame', 'Flags IntendedVsync Vsync OldestInputEvent NewestInputEvent HandleInputStart AnimationStart PerformTraversalsStart DrawStart SyncQueued SyncStart IssueDrawCommandsStart SwapBuffers FrameCompleted') +# https://android.googlesource.com/platform/frameworks/base/+/marshmallow-release/libs/hwui/JankTracker.cpp +# Frames that are exempt from jank metrics. +# First-draw frames, for example, are expected to be slow, +# this is hidden from the user with window animations and other tricks +# Similarly, we don't track direct-drawing via Surface:lockHardwareCanvas() for now +# Android M: WindowLayoutChanged | SurfaceCanvas +GFXINFO_EXEMPT = 1 | 4 class FpsProcessor(object): @@ -22,7 +38,7 @@ class FpsProcessor(object): This processor returns the four frame statistics below: :FPS: Frames Per Second. This is the frame rate of the workload. - :frames: The total number of frames rendered during the execution of + :frame_count: The total number of frames rendered during the execution of the workload. :janks: The number of "janks" that occurred during execution of the workload. Janks are sudden shifts in frame rate. They result @@ -31,13 +47,15 @@ class FpsProcessor(object): vsync cycle. """ - def __init__(self, data, action=None): + def __init__(self, data, action=None, extra_data=None): """ data - a pandas.DataFrame object with frame data (e.g. frames.csv) action - output metrics names with additional action specifier + extra_data - extra data given to use for calculations of metrics """ self.data = data self.action = action + self.extra_data = extra_data def process(self, refresh_period, drop_threshold): # pylint: disable=too-many-locals """ @@ -49,39 +67,85 @@ class FpsProcessor(object): fps = float('nan') frame_count, janks, not_at_vsync = 0, 0, 0 vsync_interval = refresh_period + per_frame_fps = pd.Series() - # fiter out bogus frames. - bogus_frames_filter = self.data.actual_present_time != 0x7fffffffffffffff - actual_present_times = self.data.actual_present_time[bogus_frames_filter] + # SurfaceFlinger Algorithm + if self.data.columns.tolist() == list(SurfaceFlingerFrame._fields): + # fiter out bogus frames. + bogus_frames_filter = self.data.actual_present_time != 0x7fffffffffffffff + actual_present_times = self.data.actual_present_time[bogus_frames_filter] - actual_present_time_deltas = actual_present_times - actual_present_times.shift() - actual_present_time_deltas = actual_present_time_deltas.drop(0) + actual_present_time_deltas = actual_present_times - actual_present_times.shift() + actual_present_time_deltas = actual_present_time_deltas.drop(0) - vsyncs_to_compose = actual_present_time_deltas / vsync_interval - vsyncs_to_compose.apply(lambda x: int(round(x, 0))) + vsyncs_to_compose = actual_present_time_deltas / vsync_interval + vsyncs_to_compose.apply(lambda x: int(round(x, 0))) - # drop values lower than drop_threshold FPS as real in-game frame - # rate is unlikely to drop below that (except on loading screens - # etc, which should not be factored in frame rate calculation). - per_frame_fps = (1.0 / (vsyncs_to_compose * (vsync_interval / 1e9))) - keep_filter = per_frame_fps > drop_threshold - filtered_vsyncs_to_compose = vsyncs_to_compose[keep_filter] - per_frame_fps.name = 'fps' + # drop values lower than drop_threshold FPS as real in-game frame + # rate is unlikely to drop below that (except on loading screens + # etc, which should not be factored in frame rate calculation). + per_frame_fps = (1.0 / (vsyncs_to_compose * (vsync_interval / 1e9))) + keep_filter = per_frame_fps > drop_threshold + filtered_vsyncs_to_compose = vsyncs_to_compose[keep_filter] + per_frame_fps.name = 'fps' - if not filtered_vsyncs_to_compose.empty: - total_vsyncs = filtered_vsyncs_to_compose.sum() - frame_count = filtered_vsyncs_to_compose.size + if not filtered_vsyncs_to_compose.empty: + total_vsyncs = filtered_vsyncs_to_compose.sum() + frame_count = filtered_vsyncs_to_compose.size - if total_vsyncs: - fps = 1e9 * frame_count / (vsync_interval * total_vsyncs) + if total_vsyncs: + fps = 1e9 * frame_count / (vsync_interval * total_vsyncs) - janks = self._calc_janks(filtered_vsyncs_to_compose) - not_at_vsync = self._calc_not_at_vsync(vsyncs_to_compose) + janks = self._calc_janks(filtered_vsyncs_to_compose) + not_at_vsync = self._calc_not_at_vsync(vsyncs_to_compose) + + # GfxInfo Algorithm + elif self.data.columns.tolist() == list(GfxInfoFrame._fields): + frame_time = self.data.FrameCompleted - self.data.IntendedVsync + per_frame_fps = (1e9 / frame_time) + keep_filter = per_frame_fps > drop_threshold + per_frame_fps = per_frame_fps[keep_filter] + per_frame_fps.name = 'fps' + + frame_count = self.data.index.size + janks = frame_time[frame_time >= vsync_interval].count() + not_at_vsync = self.data.IntendedVsync - self.data.Vsync + not_at_vsync = not_at_vsync[not_at_vsync != 0].count() + + duration = self.data.Vsync.iloc[-1] - self.data.Vsync.iloc[0] + fps = (1e9 * frame_count) / float(duration) + + # If gfxinfocsv is provided, get stats from that instead + if self.extra_data: + series = pd.read_csv(self.extra_data, header=None, index_col=0, squeeze=True) + if not series.empty: # pylint: disable=maybe-no-member + frame_count = series['Total frames rendered'] + janks = series['Janky frames'] + not_at_vsync = series['Number Missed Vsync'] metrics = (fps, frame_count, janks, not_at_vsync) - return per_frame_fps, metrics + def percentiles(self): + # SurfaceFlinger Algorithm + if self.data.columns.tolist() == list(SurfaceFlingerFrame._fields): + frame_time = self.data.frame_ready_time.diff() + # GfxInfo Algorithm + elif self.data.columns.tolist() == list(GfxInfoFrame._fields): + frame_time = self.data.FrameCompleted - self.data.IntendedVsync + + data = frame_time.quantile([0.90, 0.95, 0.99]) + # Convert to ms, round to nearest, cast to int + data = data.div(1e6).round().astype('int') + + # If gfxinfocsv is provided, get stats from that instead + if self.extra_data: + series = pd.read_csv(self.extra_data, header=None, index_col=0, squeeze=True) + if not series.empty: # pylint: disable=maybe-no-member + data = series[series.index.str.contains('th percentile')] # pylint: disable=maybe-no-member + + return list(data.get_values()) + @staticmethod def _calc_janks(filtered_vsyncs_to_compose): """