From cec71705406f0b2790422f0c1aa0ff3b4b464b1b Mon Sep 17 00:00:00 2001
From: Philip Withnall <withnall@endlessm.com>
Date: Mon, 30 Jul 2018 18:10:25 +0100
Subject: [PATCH] gmarkup: Fix unvalidated UTF-8 read in markup parsing error
 paths
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When formatting the error messages for markup parsing errors, the parser
was unconditionally reading a UTF-8 character from the input buffer —
but the buffer might end with a partial code sequence, resulting in
reading off the end of the buffer by up to three bytes.

Fix this and add a test case, courtesy of pdknsk.

Signed-off-by: Philip Withnall <withnall@endlessm.com>

---
 glib/gmarkup.c                      | 14 +++++++++++++-
 glib/tests/markups/fail-50.expected |  1 +
 glib/tests/markups/fail-50.gmarkup  |  1 +
 3 files changed, 15 insertions(+), 1 deletions(-)
 create mode 100644 glib/tests/markups/fail-50.expected
 create mode 100644 glib/tests/markups/fail-50.gmarkup

diff --git a/glib/gmarkup.c b/glib/gmarkup.c
index a159d7b..a2b1ed0 100644
--- a/glib/gmarkup.c
+++ b/glib/gmarkup.c
@@ -557,11 +557,23 @@ char_str (gunichar c,
   return buf;
 }
 
+/* Format the next UTF-8 character as a gchar* for printing in error output
+ * when we encounter a syntax error. This correctly handles invalid UTF-8,
+ * emitting it as hex escapes. */
 static gchar*
 utf8_str (const gchar *utf8,
           gchar       *buf)
 {
-  char_str (g_utf8_get_char (utf8), buf);
+  gunichar c = g_utf8_get_char_validated (utf8, -1);
+  if (c == (gunichar) -1 || c == (gunichar) -2)
+    {
+      gchar *temp = g_strdup_printf ("\\x%02x", (guint)(guchar)*utf8);
+      memset (buf, 0, 8);
+      memcpy (buf, temp, strlen (temp));
+      g_free (temp);
+    }
+  else
+    char_str (c, buf);
   return buf;
 }
 
diff --git a/glib/tests/markups/fail-50.expected b/glib/tests/markups/fail-50.expected
new file mode 100644
index 0000000..70d4498
--- /dev/null
+++ b/glib/tests/markups/fail-50.expected
@@ -0,0 +1 @@
+ERROR Error on line 1 char 5: Odd character '\xfc', expected an open quote mark after the equals sign when giving value for attribute 'r' of element ''
diff --git a/glib/tests/markups/fail-50.gmarkup b/glib/tests/markups/fail-50.gmarkup
new file mode 100644
index 0000000..f110f15
--- /dev/null
+++ b/glib/tests/markups/fail-50.gmarkup
@@ -0,0 +1 @@
+<	r=�
\ No newline at end of file
-- 
2.7.4