Browse code

Check to see if the string we just read in via indirect reference is likely UTF prior to attempting to convert from UTF

Shawn Webb authored on 2014/08/28 03:39:38
Showing 1 changed files
... ...
@@ -229,6 +229,7 @@ char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
229 229
     char *res;
230 230
     int likelyutf = 0;
231 231
     uint32_t objid;
232
+    size_t i;
232 233
 
233 234
     /*
234 235
      * Yes, all of this is required to find the start and end of a potentially UTF-* string
... ...
@@ -349,7 +350,15 @@ char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
349 349
                     free(begin);
350 350
                     break;
351 351
                 default:
352
-                    res = pdf_convert_utf(begin, sb.st_size);
352
+                    for (i=0; i < sb.st_size; i++) {
353
+                        if (begin[i] >= 0x7f) {
354
+                            likelyutf=1;
355
+                            break;
356
+                        }
357
+                    }
358
+
359
+                    res = likelyutf ? pdf_convert_utf(begin, sb.st_size) : NULL;
360
+
353 361
                     if (!(res))
354 362
                         res = begin;
355 363
                     else