diff --git a/wa/workloads/meabo/__init__.py b/wa/workloads/meabo/__init__.py new file mode 100644 index 00000000..81f3f348 --- /dev/null +++ b/wa/workloads/meabo/__init__.py @@ -0,0 +1,313 @@ +# Copyright 2016 ARM Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re + +from wa import Workload, Parameter, Executable, ConfigError, WorkloadError +from wa.utils.exec_control import once +from wa.utils.types import list_of_ints + + +phase_start_regex = re.compile(r"Starting phase\s+(?P\d+)") +counter_value_regex = re.compile(r"Thread\s+(?P\d+)\s+(?P\w+)\svalue\s+\=\s+(?P\d+)") +duration_regex = re.compile(r"Phase\s+(?P\d+)[\s\w\(\)]+\:\s+(?P\d+)") + + +class Meabo(Workload): + + name = 'meabo' + description = ''' + A multi-phased multi-purpose micro-benchmark. The micro-benchmark is + composed of 10 phases that perform various generic calculations (from + memory to compute intensive). + + It is a highly configurable tool which can be used for energy efficiency + studies, ARM big.LITTLE Linux scheduler analysis and DVFS studies. It can + be used for other benchmarking as well. + + All floating-point calculations are double-precision. + + | Phase 1: Floating-point & integer computations with good data locality + | Phase 2: Vector multiplication & addition, 1 level of indirection in 1 + | source vector + | Phase 3: Vector scalar addition and reductions + | Phase 4: Vector addition + | Phase 5: Vector addition, 1 level of indirection in both source vectors + | Phase 6: Sparse matrix-vector multiplication + | Phase 7: Linked-list traversal + | Phase 8: Electrostatic force calculations + | Phase 9: Palindrome calculations + | Phase 10: Random memory accesses + + For more details and benchmark source, see: + + https://github.com/ARM-software/meabo + + .. note:: current implementation of automation relies on the executable to + be either statically linked or for all necessary depencies to be + installed on the target. + + ''' + + parameters = [ + Parameter( + 'array_size', + kind=int, + description=''' + Size of arrays used in Phases 1, 2, 3, 4 and 5. + ''', + constraint=lambda x: x > 0, + default=1048576, + ), + Parameter( + 'nrow', + kind=int, + description=''' + Number of rows for the sparse matrix used in Phase 6. + ''', + constraint=lambda x: x > 0, + default=16384, + ), + Parameter( + 'ncol', + kind=int, + description=''' + Number of columns for the sparse matrix used in Phase 6. + ''', + constraint=lambda x: x > 0, + default=16384, + ), + Parameter( + 'num_iterations', + kind=int, + description=''' + Number of iterations that core loop is executed. + ''', + constraint=lambda x: x > 0, + default=1000, + ), + Parameter( + 'block_size', + kind=int, + description=''' + Block size used in Phase 1. + ''', + constraint=lambda x: x > 0, + default=8, + ), + Parameter( + 'num_cpus', + kind=int, + description=''' + Number of total CPUs that the application can bind threads to. + ''', + constraint=lambda x: x > 0, + default=6, + ), + Parameter( + 'per_phase_cpu_ids', + kind=list_of_ints, + description=''' + Sets which cores each phase is run on. + ''', + constraint=lambda x: all(v>=-1 for v in x), + default=[-1]*10, + ), + Parameter( + 'num_hwcntrs', + kind=int, + description=''' + Only available when using PAPI. Controls how many hardware counters + PAPI will get access to. + ''', + constraint=lambda x: x >= 0, + default=7, + ), + Parameter( + 'run_phases', + kind=list_of_ints, + description=''' + Controls which phases to run. + ''', + constraint=lambda x: all(0 < v <=10 for v in x), + default=range(1, 11), + ), + Parameter( + 'num_threads', + kind=int, + description=''' + Controls how many threads the application will be using. + ''', + constraint=lambda x: x >= 0, + default=0, + ), + Parameter( + 'bind_to_cpu_set', + kind=int, + description=''' + Controls whether threads will be bound to a core set, or each + individual thread will be bound to a specific core within the core + set. + ''', + constraint=lambda x: 0 <= x <= 1, + default=1, + ), + Parameter( + 'llist_size', + kind=int, + description=''' + Size of the linked list available for each thread. + ''', + constraint=lambda x: x > 0, + default=16777216, + ), + Parameter( + 'num_particles', + kind=int, + description=''' + Number of particles used in Phase 8. + ''', + constraint=lambda x: x > 0, + default=1048576, + ), + Parameter( + 'num_palindromes', + kind=int, + description=''' + Number of palindromes used in Phase 9. + ''', + constraint=lambda x: x > 0, + default=1024, + ), + Parameter( + 'num_randomloc', + kind=int, + description=''' + Number of random memory locations accessed in Phase 10. + ''', + constraint=lambda x: x > 0, + default=2097152, + ), + Parameter( + 'run_timeout', + kind=int, + description=""" + Timeout for execution of the test. + """, + constraint=lambda x: x > 0, + default=60 * 45, + ), + ] + + options = [ + ('-s', 'array_size'), + ('-B', 'bind_to_cpu_set'), + ('-b', 'block_size'), + ('-l', 'llist_size'), + ('-c', 'ncol'), + ('-r', 'nrow'), + ('-C', 'num_cpus'), + ('-H', 'num_hwcntrs'), + ('-i', 'num_iterations'), + ('-x', 'num_palindromes'), + ('-p', 'num_particles'), + ('-R', 'num_randomloc'), + ('-T', 'num_threads'), + ] + + def validate(self): + if len(self.run_phases) != len(self.per_phase_cpu_ids): + msg = "Number of phases doesn't match the number of CPU mappings" + raise ConfigError(msg) + + def initialize(self, context): + self._install_executable(context) + self._build_command() + + def setup(self, context): + self.output = None + + def run(self, context): + self.output = self.target.execute(self.command, + timeout=self.run_timeout) + + def update_output(self, context): + if self.output is None: + self.logger.warning('Did not collect output') + return + + outfile = os.path.join(context.output_directory, 'meabo-output.txt') + with open(outfile, 'wb') as wfh: + wfh.write(self.output) + context.add_artifact('meabo-output', outfile, kind='raw') + + cur_phase = 0 + for line in self.output.split('\n'): + line = line.strip() + + match = phase_start_regex.search(line) + if match: + cur_phase = match.group('phase') + + match = counter_value_regex.search(line) + if match: + if cur_phase == 0: + msg = 'Matched thread performance counters outside of phase!' + raise WorkloadError(msg) + name = 'phase_{}_thread_{}_{}'.format(cur_phase, + match.group('thread'), + match.group('name')) + context.result.add_metric(name, int(match.group('value'))) + + match = duration_regex.search(line) + if match: + context.add_metric("phase_{}_duration".format(match.group('phase')), + int(match.group('duration')), units="ns") + + def finalize(self, context): + self._uninstall_executable() + + def _build_command(self): + self.command = self.target_exe + + # We need to calculate the phase mask + phase_mask = 0 + for phase in self.run_phases: + phase_mask |= 1<<(phase-1) + + self.command += ' -P {:d}'.format(phase_mask) + + # Set the CPU ids for each phase we are running + for phase, cpu_id in zip(self.run_phases, self.per_phase_cpu_ids): + self.command += ' -{0:1d} {1:d}'.format(phase, cpu_id) + + # We need to append extra arguments to the command based on the + # parameters passed in from the agenda. + for option, param_name in self.options: + param_value = getattr(self, param_name, None) + if param_value is not None: + self.command += ' {} {}'.format(option, param_value) + + @once + def _install_executable(self, context): + resource = Executable(self, self.target.abi, 'meabo') + host_exe = context.resolver.get(resource) + Meabo.target_exe = self.target.install(host_exe) + + @once + def _uninstall_executable(self): + self.target.uninstall_executable(self.target_exe) + +