diff --git a/wa/workloads/memcpy/__init__.py b/wa/workloads/memcpy/__init__.py
new file mode 100644
index 00000000..cc8a32f4
--- /dev/null
+++ b/wa/workloads/memcpy/__init__.py
@@ -0,0 +1,81 @@
+#    Copyright 2013-2015 ARM Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# pylint: disable=E1101,W0201
+
+import os
+import re
+
+from wa import Workload, Parameter, Executable
+
+
+THIS_DIR = os.path.dirname(__file__)
+
+
+RESULT_REGEX = re.compile('Total time: ([\d.]+) s.*Bandwidth: ([\d.]+) MB/s', re.S)
+
+
+class Memcpy(Workload):
+
+    name = 'memcpy'
+    description = """
+    Runs memcpy in a loop.
+
+    This will run memcpy in a loop for a specified number of times on a buffer
+    of a specified size. Additionally, the affinity of the test can be set to
+    one or more specific cores.
+
+    This workload is single-threaded. It genrates no scores or metrics by
+    itself.
+
+    """
+
+    parameters = [
+        Parameter('buffer_size', kind=int, default=1024 * 1024 * 5,
+                  description='''
+                  Specifies the size, in bytes, of the buffer to be copied.
+                  '''),
+        Parameter('iterations', kind=int, default=1000,
+                  description='''
+                  Specfies the number of iterations that will be performed.
+                  '''),
+        Parameter('cpus', kind=list,
+                  description='''
+                  A list of integers specifying ordinals of cores to which the
+                  affinity of the test process should be set. If not specified,
+                  all avaiable cores will be used.
+                  '''),
+    ]
+
+    def initialize(self, context):
+        self.binary_name = 'memcpy'
+        resource = Executable(self, self.target.abi, self.binary_name)
+        host_binary = context.resolver.get(resource)
+        Memcpy.target_exe = self.target.install_if_needed(host_binary)
+
+    def setup(self, context):
+        self.command = '{} -i {} -s {}'.format(self.target_exe, self.iterations, self.buffer_size)
+        for c in (self.cpus or []):
+            self.command += ' -c {}'.format(c)
+        self.result = None
+
+    def run(self, context):
+        self.result = self.target.execute(self.command, timeout=300)
+
+    def extract_results(self, context):
+        if self.result:
+            match = RESULT_REGEX.search(self.result)
+            context.add_metric('time', float(match.group(1)), 'seconds', lower_is_better=True)
+            context.add_metric('bandwidth', float(match.group(2)), 'MB/s')
diff --git a/wa/workloads/memcpy/bin/arm64/memcpy b/wa/workloads/memcpy/bin/arm64/memcpy
new file mode 100755
index 00000000..39982df8
Binary files /dev/null and b/wa/workloads/memcpy/bin/arm64/memcpy differ
diff --git a/wa/workloads/memcpy/bin/armeabi/memcpy b/wa/workloads/memcpy/bin/armeabi/memcpy
new file mode 100755
index 00000000..4af3239a
Binary files /dev/null and b/wa/workloads/memcpy/bin/armeabi/memcpy differ
diff --git a/wa/workloads/memcpy/src/build.sh b/wa/workloads/memcpy/src/build.sh
new file mode 100755
index 00000000..bb1895c5
--- /dev/null
+++ b/wa/workloads/memcpy/src/build.sh
@@ -0,0 +1,16 @@
+#    Copyright 2013-2015 ARM Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+${CROSS_COMPILE}gcc -static memcopy.c -o memcopy
diff --git a/wa/workloads/memcpy/src/memcopy.c b/wa/workloads/memcpy/src/memcopy.c
new file mode 100644
index 00000000..19f569d3
--- /dev/null
+++ b/wa/workloads/memcpy/src/memcopy.c
@@ -0,0 +1,114 @@
+/*    Copyright 2013-2015 ARM Limited
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sched.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <pthread.h>
+#include <time.h>
+
+const int MAX_CPUS = 8;
+const int DEFAULT_ITERATIONS = 1000;
+const int DEFAULT_BUFFER_SIZE = 1024 * 1024 * 5;
+
+int set_affinity(size_t cpus_size, int* cpus)
+{
+	int i;
+	int mask = 0;
+
+	for(i = 0; i < cpus_size; ++i)
+	{
+		mask |= 1 << cpus[i];
+	}
+	
+	return syscall(__NR_sched_setaffinity, 0, sizeof(mask), &mask);
+}
+
+int main(int argc, char** argv)
+{
+	int cpus[MAX_CPUS];
+	int next_cpu = 0;
+	int iterations = DEFAULT_ITERATIONS;
+	int buffer_size = DEFAULT_BUFFER_SIZE;
+	
+	int c;
+	while ((c = getopt(argc, argv, "i:c:s:")) != -1)
+		switch (c)
+		{
+		case 'c':
+			cpus[next_cpu++] = atoi(optarg);
+			if (next_cpu == MAX_CPUS)
+			{
+				fprintf(stderr, "Max CPUs exceeded.");
+				abort();
+			}
+			break;
+		case 'i':
+			iterations = atoi(optarg);
+			break;
+		case 's':
+			buffer_size = atoi(optarg);
+			break;
+		default:
+			abort();
+			break;
+		}
+
+	int ret;
+	if (next_cpu != 0)
+		if (ret = set_affinity(next_cpu, cpus))
+		{
+			fprintf(stderr, "sched_setaffinity returnred %i.", ret);
+			abort();
+		}
+	
+	char* source  = malloc(buffer_size);
+	char* dest = malloc(buffer_size);
+
+	struct timespec before, after;
+	if (clock_gettime(CLOCK_MONOTONIC, &before))
+	{
+	 	fprintf(stderr, "Could not get start time.");
+		abort();
+	}
+
+	int i;
+	for (i = 0; i < iterations; ++i)
+	{
+		memcpy(dest, source, buffer_size);
+	}
+
+	if (clock_gettime(CLOCK_MONOTONIC, &after))
+	{
+	 	fprintf(stderr, "Could not get end time.");
+		abort();
+	}
+
+	free(dest);
+	free(source);
+
+	long delta_sec =  (long)(after.tv_sec - before.tv_sec);
+	long delta_nsec = after.tv_nsec - before.tv_nsec;
+	double delta = (double)delta_sec + delta_nsec / 1e9;
+	printf("Total time: %f s\n", delta);
+	printf("Bandwidth: %f MB/s\n", buffer_size / delta * iterations / 1e6);
+
+	return 0;
+}