From cec71705406f0b2790422f0c1aa0ff3b4b464b1b Mon Sep 17 00:00:00 2001
From: Philip Withnall <withnall@endlessm.com>
Date: Mon, 30 Jul 2018 18:10:25 +0100
Subject: [PATCH] gmarkup: Fix unvalidated UTF-8 read in markup parsing error
 paths
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When formatting the error messages for markup parsing errors, the parser
was unconditionally reading a UTF-8 character from the input buffer —
but the buffer might end with a partial code sequence, resulting in
reading off the end of the buffer by up to three bytes.

Fix this and add a test case, courtesy of pdknsk.

Signed-off-by: Philip Withnall <withnall@endlessm.com>

---
 glib/gmarkup.c                      | 14 +++++++++++++-
 glib/tests/Makefile.am              |  3 ++-
 glib/tests/markups/fail-50.expected |  1 +
 glib/tests/markups/fail-50.gmarkup  |  1 +
 4 files changed, 17 insertions(+), 2 deletions(-)
 create mode 100644 glib/tests/markups/fail-50.expected
 create mode 100644 glib/tests/markups/fail-50.gmarkup

diff --git a/glib/gmarkup.c b/glib/gmarkup.c
index a159d7b..a2b1ed0 100644
--- a/glib/gmarkup.c
+++ b/glib/gmarkup.c
@@ -557,11 +557,23 @@ char_str (gunichar c,
   return buf;
 }
 
+/* Format the next UTF-8 character as a gchar* for printing in error output
+ * when we encounter a syntax error. This correctly handles invalid UTF-8,
+ * emitting it as hex escapes. */
 static gchar*
 utf8_str (const gchar *utf8,
           gchar       *buf)
 {
-  char_str (g_utf8_get_char (utf8), buf);
+  gunichar c = g_utf8_get_char_validated (utf8, -1);
+  if (c == (gunichar) -1 || c == (gunichar) -2)
+    {
+      gchar *temp = g_strdup_printf ("\\x%02x", (guint)(guchar)*utf8);
+      memset (buf, 0, 8);
+      memcpy (buf, temp, strlen (temp));
+      g_free (temp);
+    }
+  else
+    char_str (c, buf);
   return buf;
 }
 
diff --git a/glib/tests/Makefile.am b/glib/tests/Makefile.am
index a690064..4c8028b 100644
--- a/glib/tests/Makefile.am
+++ b/glib/tests/Makefile.am
@@ -155,7 +155,8 @@ markup_tests = \
 	fail-31 fail-32 fail-33 fail-34 fail-35 \
 	fail-36 fail-37 fail-38 fail-39 fail-40 \
 	fail-41 fail-42 fail-43 fail-44 fail-45 \
-	fail-46 fail-47 fail-48 fail-49 fail-51 \
+	fail-46 fail-47 fail-48 fail-49 fail-50 \
+	fail-51 \
 	valid-1 valid-2 valid-3 valid-4 valid-5 \
 	valid-6 valid-7 valid-8 valid-9 valid-10 \
 	valid-11 valid-12 valid-13 valid-14 valid-15 \
diff --git a/glib/tests/markups/fail-50.expected b/glib/tests/markups/fail-50.expected
new file mode 100644
index 0000000..70d4498
--- /dev/null
+++ b/glib/tests/markups/fail-50.expected
@@ -0,0 +1 @@
+ERROR Error on line 1 char 5: Odd character '\xfc', expected an open quote mark after the equals sign when giving value for attribute 'r' of element ''
diff --git a/glib/tests/markups/fail-50.gmarkup b/glib/tests/markups/fail-50.gmarkup
new file mode 100644
index 0000000..f110f15
--- /dev/null
+++ b/glib/tests/markups/fail-50.gmarkup
@@ -0,0 +1 @@
+<	r=�
\ No newline at end of file
-- 
2.7.4