From 9aaaab7c6e4176e61c59b0a63c6ba906d875dc0e Mon Sep 17 00:00:00 2001
From: Andreas Schwab <schwab@suse.de>
Date: Tue, 22 May 2018 10:37:59 +0200
Subject: [PATCH] Don't write beyond destination in
 __mempcpy_avx512_no_vzeroupper (bug 23196)

When compiled as mempcpy, the return value is the end of the destination
buffer, thus it cannot be used to refer to the start of it.
---
 ChangeLog                                               | 9 +++++++++
 string/test-mempcpy.c                                   | 1 +
 sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S | 5 +++--
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/string/test-mempcpy.c b/string/test-mempcpy.c
index c08fba8..d98ecdd 100644
--- a/string/test-mempcpy.c
+++ b/string/test-mempcpy.c
@@ -18,6 +18,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 #define MEMCPY_RESULT(dst, len) (dst) + (len)
+#define MIN_PAGE_SIZE 131072
 #define TEST_MAIN
 #define TEST_NAME "mempcpy"
 #include "test-string.h"
diff --git a/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S
index 23c0f7a..effc3ac 100644
--- a/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S
+++ b/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S
@@ -336,6 +336,7 @@ L(preloop_large):
 	vmovups	(%rsi), %zmm4
 	vmovups	0x40(%rsi), %zmm5
 
+	mov	%rdi, %r11
 /* Align destination for access with non-temporal stores in the loop.  */
 	mov	%rdi, %r8
 	and	$-0x80, %rdi
@@ -366,8 +367,8 @@ L(gobble_256bytes_nt_loop):
 	cmp	$256, %rdx
 	ja	L(gobble_256bytes_nt_loop)
 	sfence
-	vmovups	%zmm4, (%rax)
-	vmovups	%zmm5, 0x40(%rax)
+	vmovups	%zmm4, (%r11)
+	vmovups	%zmm5, 0x40(%r11)
 	jmp	L(check)
 
 L(preloop_large_bkw):

diff --git a/string/test-memcpy.c b/string/test-memcpy.c
index 45f20a6..3c8066d 100644
--- a/string/test-memcpy.c
+++ b/string/test-memcpy.c
@@ -212,6 +212,50 @@ do_random_tests (void)
     }
 }
 
+static void
+do_test1 (void)
+{
+  size_t size = 0x100000;
+  void *large_buf;
+
+  large_buf = mmap (NULL, size * 2 + page_size, PROT_READ | PROT_WRITE,
+		    MAP_PRIVATE | MAP_ANON, -1, 0);
+  if (large_buf == MAP_FAILED)
+    {
+      puts ("Failed to allocat large_buf, skipping do_test1");
+      return;
+    }
+
+  if (mprotect (large_buf + size, page_size, PROT_NONE))
+    error (EXIT_FAILURE, errno, "mprotect failed");
+
+  size_t arrary_size = size / sizeof (uint32_t);
+  uint32_t *dest = large_buf;
+  uint32_t *src = large_buf + size + page_size;
+  size_t i;
+
+  for (i = 0; i < arrary_size; i++)
+    src[i] = (uint32_t) i;
+
+  FOR_EACH_IMPL (impl, 0)
+    {
+      memset (dest, -1, size);
+      CALL (impl, (char *) dest, (char *) src, size);
+      for (i = 0; i < arrary_size; i++)
+	if (dest[i] != src[i])
+	  {
+	    error (0, 0,
+		   "Wrong result in function %s dst \"%p\" src \"%p\" offset \"%zd\"",
+		   impl->name, dest, src, i);
+	    ret = 1;
+	    break;
+	  }
+    }
+
+  munmap ((void *) dest, size);
+  munmap ((void *) src, size);
+}
+
 int
 test_main (void)
 {
@@ -253,6 +297,9 @@ test_main (void)
   do_test (0, 0, getpagesize ());
 
   do_random_tests ();
+
+  do_test1 ();
+
   return ret;
 }
 
-- 
2.9.3