diff --git a/doc/source/developer_information/how_to.rst b/doc/source/developer_information/how_to.rst index 094c9ae2..ff9fee29 100644 --- a/doc/source/developer_information/how_to.rst +++ b/doc/source/developer_information/how_to.rst @@ -7,3 +7,5 @@ How Tos :local: .. include:: developer_information/how_tos/adding_plugins.rst + +.. include:: developer_information/how_tos/processing_output.rst diff --git a/doc/source/developer_information/how_tos/processing_output.rst b/doc/source/developer_information/how_tos/processing_output.rst new file mode 100644 index 00000000..16d31a47 --- /dev/null +++ b/doc/source/developer_information/how_tos/processing_output.rst @@ -0,0 +1,394 @@ +.. _processing_output: + +Processing WA Output +==================== + +This section will illustrate the use of WA's :ref:`output processing API +` by creating a simple ASCII report generator. To make +things concrete, this how-to will be processing the output from running the +following agenda:: + + sections: + - runtime_params: + frequency: min + classifiers: + frequency: min + - runtime_params: + frequency: max + classifiers: + frequency: max + workloads: + - sysbench + - deepbench + +This runs two workloads under two different configurations each -- once with +CPU frequency fixed to max, and once with CPU frequency fixed to min. +Classifiers are used to indicate the configuration in the output. + +First, create the :class:`RunOutput` object, which is the main interface for +interacting with WA outputs. + +.. code-block:: python + + import sys + + from wa import RunOutput + + # Path to the output directory specified in the first argument + ro = RunOutput(sys.argv[1]) + + +Run Info +-------- + +Next, we're going to print out an overall summary of the run. + + +.. code-block:: python + + from __future__ import print_function # for Python 2 compat. + + from wa.utils.misc import format_duration + + print('-'*20) + print('Run ID:', ro.info.uuid) + print('Run status:', ro.status) + print('Run started at:', ro.info.start_time.isoformat()) + print('Run completed at:', ro.info.end_time.isoformat()) + print('Run duration:', format_duration(ro.info.duration)) + print('Ran', len(ro.jobs), 'jobs') + print('-'*20) + print() + +``RunOutput.info`` is an instance of :class:`RunInfo` which encapsulates +Overall-run metadata, such as the duration. + + +Target Info +----------- + +Next, some information about the device the results where collected on. + +.. code-block:: python + + print(' Target Information ') + print(' ------------------- ') + print('hostname:', ro.target_info.hostname) + if ro.target_info.os == 'android': + print('Android ID:', ro.target_info.android_id) + else: + print('host ID:', ro.target_info.hostid) + print('CPUs:', ', '.join(cpu.name for cpu in ro.target_info.cpus)) + print() + + print('OS:', ro.target_info.os) + print('ABI:', ro.target_info.abi) + print('rooted:', ro.target_info.is_rooted) + print('kernel version:', ro.target_info.kernel_version) + print('os version:') + for k, v in ro.target_info.os_version.items(): + print('\t', k+':', v) + print() + print('-'*27) + print() + +``RunOutput.target_info`` is an instance of :class:`TargetInfo` that contains +information collected from the target during the run. + + +Jobs Summary +------------ + +Next, show a summary of executed jobs. + +.. code-block:: python + + from wa.utils.misc import write_table + + print(' Jobs ') + print(' ---- ') + print() + rows = [] + for job in ro.jobs: + rows.append([job.id, job.label, job.iteration, job.status]) + write_table(rows, sys.stdout, align='<<><', + headers=['ID', 'LABEL', 'ITER.', 'STATUS']) + print() + print('-'*27) + print() + +``RunOutput.jobs`` is a list of :class:`JobOutput` objects. These contain +information about that particular job, including its execution status, and +:ref:`metrics` and :ref:`artifact` generated by the job. + + +Compare Metrics +--------------- + +Finally, collect metrics, sort them by the "frequency" classifier. Classifiers +that are present in the metric but not its job have been added by the workload. +For the purposes of this report, they will be used to augment the metric's name. + +.. code-block:: python + + from collections import defaultdict + + print() + print(' Metrics Comparison ') + print(' ------------------ ') + print() + scores = defaultdict(lambda: defaultdict(lambda: defaultdict())) + for job in ro.jobs: + for metric in job.metrics: + workload = job.label + name = metric.name + freq = job.classifiers['frequency'] + for cname, cval in sorted(metric.classifiers.items()): + if cname not in job.classifiers: + # was not propagated from the job, therefore was + # added by the workload + name += '/{}={}'.format(cname, cval) + + scores[workload][name][freq] = metric + + rows = [] + for workload in sorted(scores.keys()): + wldata = scores[workload] + +Once the metrics have been sorted, generate the report showing the delta +between the two configurations (indicated by the "frequency" classifier) and +highlight any unexpected deltas (based on the ``lower_is_better`` attribute of +the metric). (In practice, you will want to run multiple iterations of each +configuration, calculate averages and standard deviations, and only highlight +statically significant deltas.) + +.. code-block:: python + + for name in sorted(wldata.keys()): + min_score = wldata[name]['min'].value + max_score = wldata[name]['max'].value + delta = max_score - min_score + units = wldata[name]['min'].units or '' + lib = wldata[name]['min'].lower_is_better + + warn = '' + if (lib and delta > 0) or (not lib and delta < 0): + warn = '!!!' + + rows.append([workload, name, + '{:.3f}'.format(min_score), '{:.3f}'.format(max_score), + '{:.3f}'.format(delta), units, warn]) + + # separate workloads with a blank row + rows.append(['', '', '', '', '', '', '']) + + + write_table(rows, sys.stdout, align='<<>>><<', + headers=['WORKLOAD', 'METRIC', 'MIN.', 'MAX', 'DELTA', 'UNITS', '']) + print() + print('-'*27) + +This concludes this how-to. For more information, please see :ref:`output +processing API documentation `. + + +Complete Example +---------------- + +Below is the complete example code, and a report it generated for a sample run. + +.. code-block:: python + + from __future__ import print_function # for Python 2 compat. + import sys + from collections import defaultdict + + from wa import RunOutput + from wa.utils.misc import format_duration, write_table + + + + # Path to the output directory specified in the first argument + ro = RunOutput(sys.argv[1]) + + print('-'*27) + print('Run ID:', ro.info.uuid) + print('Run status:', ro.status) + print('Run started at:', ro.info.start_time.isoformat()) + print('Run completed at:', ro.info.end_time.isoformat()) + print('Run duration:', format_duration(ro.info.duration)) + print('Ran', len(ro.jobs), 'jobs') + print('-'*27) + print() + + print(' Target Information ') + print(' ------------------- ') + print('hostname:', ro.target_info.hostname) + if ro.target_info.os == 'android': + print('Android ID:', ro.target_info.android_id) + else: + print('host ID:', ro.target_info.hostid) + print('CPUs:', ', '.join(cpu.name for cpu in ro.target_info.cpus)) + print() + + print('OS:', ro.target_info.os) + print('ABI:', ro.target_info.abi) + print('rooted:', ro.target_info.is_rooted) + print('kernel version:', ro.target_info.kernel_version) + print('OS version:') + for k, v in ro.target_info.os_version.items(): + print('\t', k+':', v) + print() + print('-'*27) + print() + + print(' Jobs ') + print(' ---- ') + print() + rows = [] + for job in ro.jobs: + rows.append([job.id, job.label, job.iteration, job.status]) + write_table(rows, sys.stdout, align='<<><', + headers=['ID', 'LABEL', 'ITER.', 'STATUS']) + print() + print('-'*27) + + print() + print(' Metrics Comparison ') + print(' ------------------ ') + print() + scores = defaultdict(lambda: defaultdict(lambda: defaultdict())) + for job in ro.jobs: + for metric in job.metrics: + workload = job.label + name = metric.name + freq = job.classifiers['frequency'] + for cname, cval in sorted(metric.classifiers.items()): + if cname not in job.classifiers: + # was not propagated from the job, therefore was + # added by the workload + name += '/{}={}'.format(cname, cval) + + scores[workload][name][freq] = metric + + rows = [] + for workload in sorted(scores.keys()): + wldata = scores[workload] + + for name in sorted(wldata.keys()): + min_score = wldata[name]['min'].value + max_score = wldata[name]['max'].value + delta = max_score - min_score + units = wldata[name]['min'].units or '' + lib = wldata[name]['min'].lower_is_better + + warn = '' + if (lib and delta > 0) or (not lib and delta < 0): + warn = '!!!' + + rows.append([workload, name, + '{:.3f}'.format(min_score), '{:.3f}'.format(max_score), + '{:.3f}'.format(delta), units, warn]) + + # separate workloads with a blank row + rows.append(['', '', '', '', '', '', '']) + + + write_table(rows, sys.stdout, align='<<>>><<', + headers=['WORKLOAD', 'METRIC', 'MIN.', 'MAX', 'DELTA', 'UNITS', '']) + print() + print('-'*27) + +Sample output:: + + --------------------------- + Run ID: 78aef931-cd4c-429b-ac9f-61f6893312e6 + Run status: OK + Run started at: 2018-06-27T12:55:23.746941 + Run completed at: 2018-06-27T13:04:51.067309 + Run duration: 9 minutes 27 seconds + Ran 4 jobs + --------------------------- + + Target Information + ------------------- + hostname: localhost + Android ID: b9d1d8b48cfba007 + CPUs: A53, A53, A53, A53, A73, A73, A73, A73 + + OS: android + ABI: arm64 + rooted: True + kernel version: 4.9.75-04208-g2c913991a83d-dirty 114 SMP PREEMPT Wed May 9 10:33:36 BST 2018 + OS version: + all_codenames: O + base_os: + codename: O + incremental: eng.valsch.20170517.180115 + preview_sdk: 0 + release: O + sdk: 25 + security_patch: 2017-04-05 + + --------------------------- + + Jobs + ---- + + ID LABEL ITER. STATUS + -- ----- ----- ------ + s1-wk1 sysbench 1 OK + s1-wk2 deepbench 1 OK + s2-wk1 sysbench 1 OK + s2-wk2 deepbench 1 OK + + --------------------------- + + Metrics Comparison + ------------------ + + WORKLOAD METRIC MIN. MAX DELTA UNITS + -------- ------ ---- --- ----- ----- + deepbench GOPS/a_t=n/b_t=n/k=1024/m=128/n=1 0.699 0.696 -0.003 !!! + deepbench GOPS/a_t=n/b_t=n/k=1024/m=3072/n=1 0.471 0.715 0.244 + deepbench GOPS/a_t=n/b_t=n/k=1024/m=3072/n=1500 23.514 36.432 12.918 + deepbench GOPS/a_t=n/b_t=n/k=1216/m=64/n=1 0.333 0.333 -0.000 !!! + deepbench GOPS/a_t=n/b_t=n/k=128/m=3072/n=1 0.405 1.073 0.668 + deepbench GOPS/a_t=n/b_t=n/k=128/m=3072/n=1500 19.914 34.966 15.052 + deepbench GOPS/a_t=n/b_t=n/k=128/m=4224/n=1 0.232 0.486 0.255 + deepbench GOPS/a_t=n/b_t=n/k=1280/m=128/n=1500 20.721 31.654 10.933 + deepbench GOPS/a_t=n/b_t=n/k=1408/m=128/n=1 0.701 0.702 0.001 + deepbench GOPS/a_t=n/b_t=n/k=1408/m=176/n=1500 19.902 29.116 9.214 + deepbench GOPS/a_t=n/b_t=n/k=176/m=4224/n=1500 26.030 39.550 13.519 + deepbench GOPS/a_t=n/b_t=n/k=2048/m=35/n=700 10.884 23.615 12.731 + deepbench GOPS/a_t=n/b_t=n/k=2048/m=5124/n=700 26.740 37.334 10.593 + deepbench execution_time 318.758 220.629 -98.129 seconds !!! + deepbench time (msec)/a_t=n/b_t=n/k=1024/m=128/n=1 0.375 0.377 0.002 !!! + deepbench time (msec)/a_t=n/b_t=n/k=1024/m=3072/n=1 13.358 8.793 -4.565 + deepbench time (msec)/a_t=n/b_t=n/k=1024/m=3072/n=1500 401.338 259.036 -142.302 + deepbench time (msec)/a_t=n/b_t=n/k=1216/m=64/n=1 0.467 0.467 0.000 !!! + deepbench time (msec)/a_t=n/b_t=n/k=128/m=3072/n=1 1.943 0.733 -1.210 + deepbench time (msec)/a_t=n/b_t=n/k=128/m=3072/n=1500 59.237 33.737 -25.500 + deepbench time (msec)/a_t=n/b_t=n/k=128/m=4224/n=1 4.666 2.224 -2.442 + deepbench time (msec)/a_t=n/b_t=n/k=1280/m=128/n=1500 23.721 15.528 -8.193 + deepbench time (msec)/a_t=n/b_t=n/k=1408/m=128/n=1 0.514 0.513 -0.001 + deepbench time (msec)/a_t=n/b_t=n/k=1408/m=176/n=1500 37.354 25.533 -11.821 + deepbench time (msec)/a_t=n/b_t=n/k=176/m=4224/n=1500 85.679 56.391 -29.288 + deepbench time (msec)/a_t=n/b_t=n/k=2048/m=35/n=700 9.220 4.249 -4.970 + deepbench time (msec)/a_t=n/b_t=n/k=2048/m=5124/n=700 549.413 393.517 -155.896 + + sysbench approx. 95 percentile 3.800 1.450 -2.350 ms + sysbench execution_time 1.790 1.437 -0.353 seconds !!! + sysbench response time avg 1.400 1.120 -0.280 ms + sysbench response time max 40.740 42.760 2.020 ms !!! + sysbench response time min 0.710 0.710 0.000 ms + sysbench thread fairness events avg 1250.000 1250.000 0.000 + sysbench thread fairness events stddev 772.650 213.040 -559.610 + sysbench thread fairness execution time avg 1.753 1.401 -0.352 !!! + sysbench thread fairness execution time stddev 0.000 0.000 0.000 + sysbench total number of events 10000.000 10000.000 0.000 + sysbench total time 1.761 1.409 -0.352 s + + + --------------------------- +