Browse code

bb #6643: pdf dictionary fix

David Raynor authored on 2013/01/25 04:43:58
Showing 1 changed files
... ...
@@ -1273,61 +1273,103 @@ static void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj)
1273 1273
 {
1274 1274
     /* enough to hold common pdf names, we don't need all the names */
1275 1275
     char pdfname[64];
1276
-    const char *q2, *q3, *q4;
1276
+    const char *q2, *q3;
1277
+    const char *nextobj, *nextopen, *nextclose;
1277 1278
     const char *q = obj->start + pdf->map;
1278
-    const char *dict, *start;
1279
+    const char *dict, *enddict, *start;
1279 1280
     off_t dict_length, full_dict_length;
1280
-    off_t bytesleft = obj_size(pdf, obj, 1);
1281
+    off_t objsize = obj_size(pdf, obj, 1);
1282
+    off_t bytesleft;
1281 1283
     unsigned i, filters=0;
1284
+    unsigned blockopens=0;
1282 1285
     enum objstate objstate = STATE_NONE;
1283 1286
 
1284
-    if (bytesleft < 0)
1287
+    if (objsize < 0)
1285 1288
 	return;
1286 1289
     start = q;
1290
+    bytesleft = objsize;
1291
+
1287 1292
     /* find start of dictionary */
1288 1293
     do {
1289
-	q2 = pdf_nextobject(q, bytesleft);
1290
-	bytesleft -= q2 -q;
1291
-	if (!q2 || bytesleft < 0) {
1294
+	nextobj = pdf_nextobject(q, bytesleft);
1295
+	bytesleft -= nextobj -q;
1296
+	if (!nextobj || bytesleft < 0) {
1292 1297
 	    return;
1293 1298
 	}
1294
-	q3 = memchr(q-1, '<', q2-q+1);
1295
-	q2++;
1299
+	q3 = memchr(q-1, '<', nextobj-q+1);
1300
+	nextobj++;
1296 1301
 	bytesleft--;
1297
-	q = q2;
1302
+	q = nextobj;
1298 1303
     } while (!q3 || q3[1] != '<');
1299 1304
     dict = q3+2;
1300 1305
     q = dict;
1301
-    bytesleft = obj_size(pdf, obj, 1) - (q - start);
1302
-    /* find end of dictionary */
1306
+    blockopens++;
1307
+    bytesleft = objsize - (q - start);
1308
+
1309
+    /* find end of dictionary block */
1303 1310
     do {
1304
-	q2 = pdf_nextobject(q, bytesleft);
1305
-	bytesleft -= q2 -q;
1306
-	if (!q2 || bytesleft < 0) {
1311
+        /* find end of object within bytesleft */
1312
+	nextobj = pdf_nextobject(q, bytesleft);
1313
+	if (!nextobj)
1314
+            return;
1315
+	bytesleft -= nextobj - q;
1316
+	if (bytesleft < 0) {
1307 1317
 	    return;
1308 1318
 	}
1309
-	q3 = memchr(q-1, '>', q2-q+1);
1310
-	q2++;
1311
-	bytesleft--;
1312
-	q = q2;
1313
-    } while (!q3 || q3[1] != '>');
1314
-    q = q3 + 2;
1315
-    q4 = NULL;
1316
-    /* find real end of dictionary (in case of nested one)*/
1317
-    do {
1318
-	q2 = pdf_nextobject(q, bytesleft);
1319
-	bytesleft -= q2 -q;
1320
-	if (!q2 || bytesleft < 0) {
1321
-	    break;
1322
-	}
1323
-	q4 = memchr(q-1, '>', q2-q+1);
1324
-	q2++;
1319
+
1320
+        /* while still looking ... */
1321
+        while ((q+1 < nextobj) && (blockopens > 0)) {
1322
+            /* find next close */
1323
+            nextclose = memchr(q-1, '>', nextobj-q+1);
1324
+            if (nextclose && (nextclose[1] == '>')) {
1325
+                /* check for nested open */
1326
+                while (nextopen = memchr(q-1, '<', nextclose-q+1)) {
1327
+                    if (nextopen[1] == '<') {
1328
+                        /* nested open */
1329
+                        blockopens++;
1330
+                        q = nextopen + 2;
1331
+                    }
1332
+                    else {
1333
+                        /* unmatched < */
1334
+                        q = nextopen + 2;
1335
+                    }
1336
+                }
1337
+                /* close block */
1338
+                blockopens--;
1339
+                q = nextclose + 2;
1340
+            }
1341
+            else {
1342
+                /* unmatched > */
1343
+                if (nextclose)
1344
+                    q = nextclose + 2;
1345
+                else {
1346
+                    break;
1347
+                }
1348
+            }
1349
+        }
1350
+
1351
+        /* prepare for next object check */
1352
+	nextobj++;
1325 1353
 	bytesleft--;
1326
-	q = q2;
1327
-    } while (!q4 || q4[1] != '>');
1328
-    if (!q4) q4 = q3;
1354
+	q = nextobj;
1355
+    } while (blockopens > 0);
1356
+
1357
+    /* End of dictionary found, would have early returned otherwise */
1358
+    enddict = nextclose;
1329 1359
     obj->flags |= 1 << OBJ_DICT;
1330
-    full_dict_length = dict_length = q4 - dict;
1360
+    full_dict_length = dict_length = enddict - dict;
1361
+
1362
+    /* This code prints the dictionary content.
1363
+    {
1364
+        char * dictionary = malloc(dict_length + 1);
1365
+        if (dictionary) {
1366
+            strncpy(dictionary, dict, dict_length);
1367
+            dictionary[dict_length] = '\0';
1368
+            cli_dbgmsg("cli_pdf: dictionary is <<%s>>\n", dictionary);
1369
+            free(dictionary);
1370
+        }
1371
+    }
1372
+    */
1331 1373
 
1332 1374
     /*  process pdf names */
1333 1375
     for (q = dict;dict_length > 0;) {