Browse code

Fix regression in new PDF code

Shawn Webb authored on 2014/07/31 03:20:45
Showing 1 changed files
... ...
@@ -1294,11 +1294,15 @@ enum objstate {
1294 1294
     STATE_ANY /* for actions table below */
1295 1295
 };
1296 1296
 
1297
+#define NAMEFLAG_NONE       0x0
1298
+#define NAMEFLAG_HEURISTIC  0x1
1299
+
1297 1300
 struct pdfname_action {
1298 1301
     const char *pdfname;
1299 1302
     enum pdf_objflags set_objflag;/* OBJ_DICT is noop */
1300 1303
     enum objstate from_state;/* STATE_NONE is noop */
1301 1304
     enum objstate to_state;
1305
+    uint32_t nameflags;
1302 1306
 #if HAVE_JSON
1303 1307
     void (*pdf_stats_cb)(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act);
1304 1308
 #endif
... ...
@@ -1306,88 +1310,88 @@ struct pdfname_action {
1306 1306
 
1307 1307
 #if HAVE_JSON
1308 1308
 static struct pdfname_action pdfname_actions[] = {
1309
-    {"ASCIIHexDecode", OBJ_FILTER_AH, STATE_FILTER, STATE_FILTER, ASCIIHexDecode_cb},
1310
-    {"ASCII85Decode", OBJ_FILTER_A85, STATE_FILTER, STATE_FILTER, ASCII85Decode_cb},
1311
-    {"A85", OBJ_FILTER_A85, STATE_FILTER, STATE_FILTER, ASCII85Decode_cb},
1312
-    {"AHx", OBJ_FILTER_AH, STATE_FILTER, STATE_FILTER, ASCIIHexDecode_cb},
1313
-    {"EmbeddedFile", OBJ_EMBEDDED_FILE, STATE_NONE, STATE_NONE, EmbeddedFile_cb},
1314
-    {"FlateDecode", OBJ_FILTER_FLATE, STATE_FILTER, STATE_FILTER, FlateDecode_cb},
1315
-    {"Fl", OBJ_FILTER_FLATE, STATE_FILTER, STATE_FILTER, FlateDecode_cb},
1316
-    {"Image", OBJ_IMAGE, STATE_NONE, STATE_NONE, Image_cb},
1317
-    {"LZWDecode", OBJ_FILTER_LZW, STATE_FILTER, STATE_FILTER, LZWDecode_cb},
1318
-    {"LZW", OBJ_FILTER_LZW, STATE_FILTER, STATE_FILTER, LZWDecode_cb},
1319
-    {"RunLengthDecode", OBJ_FILTER_RL, STATE_FILTER, STATE_FILTER, RunLengthDecode_cb},
1320
-    {"RL", OBJ_FILTER_RL, STATE_FILTER, STATE_FILTER, RunLengthDecode_cb},
1321
-    {"CCITTFaxDecode", OBJ_FILTER_FAX, STATE_FILTER, STATE_FILTER, CCITTFaxDecode_cb},
1322
-    {"CCF", OBJ_FILTER_FAX, STATE_FILTER, STATE_FILTER, CCITTFaxDecode_cb},
1323
-    {"JBIG2Decode", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER, JBIG2Decode_cb},
1324
-    {"DCTDecode", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER, DCTDecode_cb},
1325
-    {"DCT", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER, DCTDecode_cb},
1326
-    {"JPXDecode", OBJ_FILTER_JPX, STATE_FILTER, STATE_FILTER, JPXDecode_cb},
1327
-    {"Crypt",  OBJ_FILTER_CRYPT, STATE_FILTER, STATE_NONE, Crypt_cb},
1328
-    {"Standard", OBJ_FILTER_STANDARD, STATE_FILTER, STATE_FILTER, Standard_cb},
1329
-    {"Sig",    OBJ_SIGNED, STATE_ANY, STATE_NONE, Sig_cb},
1330
-    {"V",     OBJ_SIGNED, STATE_ANY, STATE_NONE, NULL},
1331
-    {"R",     OBJ_SIGNED, STATE_ANY, STATE_NONE, NULL},
1332
-    {"Linearized", OBJ_DICT, STATE_NONE, STATE_LINEARIZED, NULL},
1333
-    {"Filter", OBJ_HASFILTERS, STATE_ANY, STATE_FILTER, NULL},
1334
-    {"JavaScript", OBJ_JAVASCRIPT, STATE_S, STATE_JAVASCRIPT, JavaScript_cb},
1335
-    {"Length", OBJ_DICT, STATE_FILTER, STATE_NONE, NULL},
1336
-    {"S", OBJ_DICT, STATE_NONE, STATE_S, NULL},
1337
-    {"Type", OBJ_DICT, STATE_NONE, STATE_NONE, NULL},
1338
-    {"OpenAction", OBJ_OPENACTION, STATE_ANY, STATE_OPENACTION, OpenAction_cb},
1339
-    {"Launch", OBJ_LAUNCHACTION, STATE_ANY, STATE_LAUNCHACTION, Launch_cb},
1340
-    {"Page", OBJ_PAGE, STATE_NONE, STATE_NONE, Page_cb},
1341
-    {"Contents", OBJ_CONTENTS, STATE_NONE, STATE_CONTENTS, NULL},
1342
-    {"Author", OBJ_DICT, STATE_NONE, STATE_NONE, Author_cb},
1343
-    {"Producer", OBJ_DICT, STATE_NONE, STATE_NONE, Producer_cb},
1344
-    {"CreationDate", OBJ_DICT, STATE_NONE, STATE_NONE, CreationDate_cb},
1345
-    {"ModDate", OBJ_DICT, STATE_NONE, STATE_NONE, ModificationDate_cb},
1346
-    {"Creator", OBJ_DICT, STATE_NONE, STATE_NONE, Creator_cb},
1347
-    {"Title", OBJ_DICT, STATE_NONE, STATE_NONE, Title_cb},
1348
-    {"Keywords", OBJ_DICT, STATE_NONE, STATE_NONE, Keywords_cb},
1349
-    {"Subject", OBJ_DICT, STATE_NONE, STATE_NONE, Subject_cb},
1350
-    {"Pages", OBJ_DICT, STATE_NONE, STATE_NONE, Pages_cb},
1351
-    {"Colors", OBJ_DICT, STATE_NONE, STATE_NONE, Colors_cb},
1352
-    {"RichMedia", OBJ_DICT, STATE_NONE, STATE_NONE, RichMedia_cb},
1353
-    {"AcroForm", OBJ_DICT, STATE_NONE, STATE_NONE, AcroForm_cb},
1354
-    {"XFA", OBJ_DICT, STATE_NONE, STATE_NONE, XFA_cb}
1309
+    {"ASCIIHexDecode", OBJ_FILTER_AH, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, ASCIIHexDecode_cb},
1310
+    {"ASCII85Decode", OBJ_FILTER_A85, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, ASCII85Decode_cb},
1311
+    {"A85", OBJ_FILTER_A85, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, ASCII85Decode_cb},
1312
+    {"AHx", OBJ_FILTER_AH, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, ASCIIHexDecode_cb},
1313
+    {"EmbeddedFile", OBJ_EMBEDDED_FILE, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC, EmbeddedFile_cb},
1314
+    {"FlateDecode", OBJ_FILTER_FLATE, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, FlateDecode_cb},
1315
+    {"Fl", OBJ_FILTER_FLATE, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, FlateDecode_cb},
1316
+    {"Image", OBJ_IMAGE, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC, Image_cb},
1317
+    {"LZWDecode", OBJ_FILTER_LZW, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, LZWDecode_cb},
1318
+    {"LZW", OBJ_FILTER_LZW, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, LZWDecode_cb},
1319
+    {"RunLengthDecode", OBJ_FILTER_RL, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, RunLengthDecode_cb},
1320
+    {"RL", OBJ_FILTER_RL, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, RunLengthDecode_cb},
1321
+    {"CCITTFaxDecode", OBJ_FILTER_FAX, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, CCITTFaxDecode_cb},
1322
+    {"CCF", OBJ_FILTER_FAX, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, CCITTFaxDecode_cb},
1323
+    {"JBIG2Decode", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, JBIG2Decode_cb},
1324
+    {"DCTDecode", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, DCTDecode_cb},
1325
+    {"DCT", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, DCTDecode_cb},
1326
+    {"JPXDecode", OBJ_FILTER_JPX, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, JPXDecode_cb},
1327
+    {"Crypt",  OBJ_FILTER_CRYPT, STATE_FILTER, STATE_NONE, NAMEFLAG_HEURISTIC, Crypt_cb},
1328
+    {"Standard", OBJ_FILTER_STANDARD, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, Standard_cb},
1329
+    {"Sig",    OBJ_SIGNED, STATE_ANY, STATE_NONE, NAMEFLAG_HEURISTIC, Sig_cb},
1330
+    {"V",     OBJ_SIGNED, STATE_ANY, STATE_NONE, NAMEFLAG_HEURISTIC, NULL},
1331
+    {"R",     OBJ_SIGNED, STATE_ANY, STATE_NONE, NAMEFLAG_HEURISTIC, NULL},
1332
+    {"Linearized", OBJ_DICT, STATE_NONE, STATE_LINEARIZED, NAMEFLAG_HEURISTIC, NULL},
1333
+    {"Filter", OBJ_HASFILTERS, STATE_ANY, STATE_FILTER, NAMEFLAG_HEURISTIC, NULL},
1334
+    {"JavaScript", OBJ_JAVASCRIPT, STATE_S, STATE_JAVASCRIPT, NAMEFLAG_HEURISTIC, JavaScript_cb},
1335
+    {"Length", OBJ_DICT, STATE_FILTER, STATE_NONE, NAMEFLAG_HEURISTIC, NULL},
1336
+    {"S", OBJ_DICT, STATE_NONE, STATE_S, NAMEFLAG_HEURISTIC, NULL},
1337
+    {"Type", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC, NULL},
1338
+    {"OpenAction", OBJ_OPENACTION, STATE_ANY, STATE_OPENACTION, NAMEFLAG_HEURISTIC, OpenAction_cb},
1339
+    {"Launch", OBJ_LAUNCHACTION, STATE_ANY, STATE_LAUNCHACTION, NAMEFLAG_HEURISTIC, Launch_cb},
1340
+    {"Page", OBJ_PAGE, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC, Page_cb},
1341
+    {"Contents", OBJ_CONTENTS, STATE_NONE, STATE_CONTENTS, NAMEFLAG_HEURISTIC, NULL},
1342
+    {"Author", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Author_cb},
1343
+    {"Producer", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Producer_cb},
1344
+    {"CreationDate", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, CreationDate_cb},
1345
+    {"ModDate", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, ModificationDate_cb},
1346
+    {"Creator", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Creator_cb},
1347
+    {"Title", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Title_cb},
1348
+    {"Keywords", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Keywords_cb},
1349
+    {"Subject", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Subject_cb},
1350
+    {"Pages", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Pages_cb},
1351
+    {"Colors", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Colors_cb},
1352
+    {"RichMedia", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, RichMedia_cb},
1353
+    {"AcroForm", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, AcroForm_cb},
1354
+    {"XFA", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, XFA_cb}
1355 1355
 };
1356 1356
 #else
1357 1357
 static struct pdfname_action pdfname_actions[] = {
1358
-    {"ASCIIHexDecode", OBJ_FILTER_AH, STATE_FILTER, STATE_FILTER},
1359
-    {"ASCII85Decode", OBJ_FILTER_A85, STATE_FILTER, STATE_FILTER},
1360
-    {"A85", OBJ_FILTER_A85, STATE_FILTER, STATE_FILTER},
1361
-    {"AHx", OBJ_FILTER_AH, STATE_FILTER, STATE_FILTER},
1362
-    {"EmbeddedFile", OBJ_EMBEDDED_FILE, STATE_NONE, STATE_NONE},
1363
-    {"FlateDecode", OBJ_FILTER_FLATE, STATE_FILTER, STATE_FILTER},
1364
-    {"Fl", OBJ_FILTER_FLATE, STATE_FILTER, STATE_FILTER},
1365
-    {"Image", OBJ_IMAGE, STATE_NONE, STATE_NONE},
1366
-    {"LZWDecode", OBJ_FILTER_LZW, STATE_FILTER, STATE_FILTER},
1367
-    {"LZW", OBJ_FILTER_LZW, STATE_FILTER, STATE_FILTER},
1368
-    {"RunLengthDecode", OBJ_FILTER_RL, STATE_FILTER, STATE_FILTER},
1369
-    {"RL", OBJ_FILTER_RL, STATE_FILTER, STATE_FILTER},
1370
-    {"CCITTFaxDecode", OBJ_FILTER_FAX, STATE_FILTER, STATE_FILTER},
1371
-    {"CCF", OBJ_FILTER_FAX, STATE_FILTER, STATE_FILTER},
1372
-    {"JBIG2Decode", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER},
1373
-    {"DCTDecode", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER},
1374
-    {"DCT", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER},
1375
-    {"JPXDecode", OBJ_FILTER_JPX, STATE_FILTER, STATE_FILTER},
1376
-    {"Crypt",  OBJ_FILTER_CRYPT, STATE_FILTER, STATE_NONE},
1377
-    {"Standard", OBJ_FILTER_STANDARD, STATE_FILTER, STATE_FILTER},
1378
-    {"Sig",    OBJ_SIGNED, STATE_ANY, STATE_NONE},
1379
-    {"V",     OBJ_SIGNED, STATE_ANY, STATE_NONE},
1380
-    {"R",     OBJ_SIGNED, STATE_ANY, STATE_NONE},
1381
-    {"Linearized", OBJ_DICT, STATE_NONE, STATE_LINEARIZED},
1382
-    {"Filter", OBJ_HASFILTERS, STATE_ANY, STATE_FILTER},
1383
-    {"JavaScript", OBJ_JAVASCRIPT, STATE_S, STATE_JAVASCRIPT},
1384
-    {"Length", OBJ_DICT, STATE_FILTER, STATE_NONE},
1385
-    {"S", OBJ_DICT, STATE_NONE, STATE_S},
1386
-    {"Type", OBJ_DICT, STATE_NONE, STATE_NONE},
1387
-    {"OpenAction", OBJ_OPENACTION, STATE_ANY, STATE_OPENACTION},
1388
-    {"Launch", OBJ_LAUNCHACTION, STATE_ANY, STATE_LAUNCHACTION},
1389
-    {"Page", OBJ_PAGE, STATE_NONE, STATE_NONE},
1390
-    {"Contents", OBJ_CONTENTS, STATE_NONE, STATE_CONTENTS}
1358
+    {"ASCIIHexDecode", OBJ_FILTER_AH, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1359
+    {"ASCII85Decode", OBJ_FILTER_A85, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1360
+    {"A85", OBJ_FILTER_A85, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1361
+    {"AHx", OBJ_FILTER_AH, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1362
+    {"EmbeddedFile", OBJ_EMBEDDED_FILE, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC},
1363
+    {"FlateDecode", OBJ_FILTER_FLATE, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1364
+    {"Fl", OBJ_FILTER_FLATE, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1365
+    {"Image", OBJ_IMAGE, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC},
1366
+    {"LZWDecode", OBJ_FILTER_LZW, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1367
+    {"LZW", OBJ_FILTER_LZW, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1368
+    {"RunLengthDecode", OBJ_FILTER_RL, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1369
+    {"RL", OBJ_FILTER_RL, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1370
+    {"CCITTFaxDecode", OBJ_FILTER_FAX, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1371
+    {"CCF", OBJ_FILTER_FAX, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1372
+    {"JBIG2Decode", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1373
+    {"DCTDecode", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1374
+    {"DCT", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1375
+    {"JPXDecode", OBJ_FILTER_JPX, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1376
+    {"Crypt",  OBJ_FILTER_CRYPT, STATE_FILTER, STATE_NONE, NAMEFLAG_HEURISTIC},
1377
+    {"Standard", OBJ_FILTER_STANDARD, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1378
+    {"Sig",    OBJ_SIGNED, STATE_ANY, STATE_NONE, NAMEFLAG_HEURISTIC},
1379
+    {"V",     OBJ_SIGNED, STATE_ANY, STATE_NONE, NAMEFLAG_HEURISTIC},
1380
+    {"R",     OBJ_SIGNED, STATE_ANY, STATE_NONE, NAMEFLAG_HEURISTIC},
1381
+    {"Linearized", OBJ_DICT, STATE_NONE, STATE_LINEARIZED, NAMEFLAG_HEURISTIC},
1382
+    {"Filter", OBJ_HASFILTERS, STATE_ANY, STATE_FILTER, NAMEFLAG_HEURISTIC},
1383
+    {"JavaScript", OBJ_JAVASCRIPT, STATE_S, STATE_JAVASCRIPT, NAMEFLAG_HEURISTIC},
1384
+    {"Length", OBJ_DICT, STATE_FILTER, STATE_NONE, NAMEFLAG_HEURISTIC},
1385
+    {"S", OBJ_DICT, STATE_NONE, STATE_S, NAMEFLAG_HEURISTIC},
1386
+    {"Type", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC},
1387
+    {"OpenAction", OBJ_OPENACTION, STATE_ANY, STATE_OPENACTION, NAMEFLAG_HEURISTIC},
1388
+    {"Launch", OBJ_LAUNCHACTION, STATE_ANY, STATE_LAUNCHACTION, NAMEFLAG_HEURISTIC},
1389
+    {"Page", OBJ_PAGE, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC},
1390
+    {"Contents", OBJ_CONTENTS, STATE_NONE, STATE_CONTENTS, NAMEFLAG_HEURISTIC}
1391 1391
 };
1392 1392
 #endif
1393 1393
 
... ...
@@ -1407,13 +1411,6 @@ static void handle_pdfname(struct pdf_struct *pdf, struct pdf_obj *obj, const ch
1407 1407
         }
1408 1408
     }
1409 1409
 
1410
-    if (escapes) {
1411
-        /* if a commonly used PDF name is escaped that is certainly
1412
-           suspicious. */
1413
-        cli_dbgmsg("cli_pdf: pdfname %s is escaped\n", pdfname);
1414
-        pdfobj_flag(pdf, obj, ESCAPED_COMMON_PDFNAME);
1415
-    }
1416
-
1417 1410
     if (!act) {
1418 1411
         /* these are digital signature objects, filter doesn't matter,
1419 1412
          * we don't need them anyway */
... ...
@@ -1425,6 +1422,13 @@ static void handle_pdfname(struct pdf_struct *pdf, struct pdf_obj *obj, const ch
1425 1425
         return;
1426 1426
     }
1427 1427
 
1428
+    if ((act->nameflags & NAMEFLAG_HEURISTIC) && escapes) {
1429
+        /* if a commonly used PDF name is escaped that is certainly
1430
+           suspicious. */
1431
+        cli_dbgmsg("cli_pdf: pdfname %s is escaped\n", pdfname);
1432
+        pdfobj_flag(pdf, obj, ESCAPED_COMMON_PDFNAME);
1433
+    }
1434
+
1428 1435
 #if HAVE_JSON
1429 1436
     if ((act->pdf_stats_cb))
1430 1437
         act->pdf_stats_cb(pdf, obj, act);