Browse code

Scan_all: scan more inside PDF

David Raynor authored on 2013/06/21 02:43:46
Showing 1 changed files
... ...
@@ -174,6 +174,7 @@ static int find_stream_bounds(const char *start, off_t bytesleft, off_t byteslef
174 174
     return 0;
175 175
 }
176 176
 
177
+/* Expected returns: 1 if success, 0 if no more objects, -1 if error */
177 178
 static int pdf_findobj(struct pdf_struct *pdf)
178 179
 {
179 180
     const char *start, *q, *q2, *q3, *eof;
... ...
@@ -1095,12 +1096,13 @@ static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj)
1095 1095
 	rc2 = cli_magic_scandesc(fout, pdf->ctx);
1096 1096
 	if (rc2 == CL_VIRUS || rc == CL_SUCCESS)
1097 1097
 	    rc = rc2;
1098
-	if (rc == CL_CLEAN) {
1098
+	if ((rc == CL_CLEAN) || ((rc == CL_VIRUS) && (pdf->ctx->options & CL_SCAN_ALLMATCHES))) {
1099 1099
 	    rc2 = run_pdf_hooks(pdf, PDF_PHASE_POSTDUMP, fout, obj - pdf->objs);
1100 1100
 	    if (rc2 == CL_VIRUS)
1101 1101
 		rc = rc2;
1102 1102
 	}
1103
-	if (rc == CL_CLEAN && (obj->flags & (1 << OBJ_CONTENTS))) {
1103
+	if (((rc == CL_CLEAN) || ((rc == CL_VIRUS) && (pdf->ctx->options & CL_SCAN_ALLMATCHES)))
1104
+		&& (obj->flags & (1 << OBJ_CONTENTS))) {
1104 1105
 	    lseek(fout, 0, SEEK_SET);
1105 1106
 	    cli_dbgmsg("cli_pdf: dumping contents %u %u\n", obj->id>>8, obj->id&0xff);
1106 1107
 	    rc2 = pdf_scan_contents(fout, pdf);
... ...
@@ -2044,7 +2046,7 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
2044 2044
     long xref;
2045 2045
     const char *pdfver, *start, *eofmap, *q, *eof;
2046 2046
     int rc, badobjects = 0;
2047
-    unsigned i;
2047
+    unsigned i, alerts = 0;
2048 2048
 
2049 2049
     cli_dbgmsg("in cli_pdf(%s)\n", dir);
2050 2050
     memset(&pdf, 0, sizeof(pdf));
... ...
@@ -2138,10 +2140,16 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
2138 2138
 	return CL_EMAP;
2139 2139
     }
2140 2140
     rc = run_pdf_hooks(&pdf, PDF_PHASE_PRE, -1, -1);
2141
-    if (rc) {
2141
+    if ((rc == CL_VIRUS) && SCAN_ALL) {
2142
+        cli_dbgmsg("cli_pdf: (pre hooks) returned %d\n", rc);
2143
+        alerts++;
2144
+        rc = CL_CLEAN;
2145
+    }
2146
+    else if (rc) {
2142 2147
 	cli_dbgmsg("cli_pdf: (pre hooks) returning %d\n", rc);
2143 2148
 	return rc == CL_BREAK ? CL_CLEAN : rc;
2144 2149
     }
2150
+
2145 2151
     /* parse PDF and find obj offsets */
2146 2152
     while ((rc = pdf_findobj(&pdf)) > 0) {
2147 2153
 	struct pdf_obj *obj = &pdf.objs[pdf.nobjs-1];
... ...
@@ -2171,20 +2179,41 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
2171 2171
 	 * This doesn't trigger for PDFs that are encrypted but don't need
2172 2172
 	 * a password to decrypt */
2173 2173
 	cli_append_virus(ctx, "Heuristics.Encrypted.PDF");
2174
-	rc = CL_VIRUS;
2174
+	alerts++;
2175
+        if (!SCAN_ALL)
2176
+            rc = CL_VIRUS;
2175 2177
     }
2176 2178
 
2177
-    if (!rc)
2179
+    if (!rc) {
2178 2180
 	rc = run_pdf_hooks(&pdf, PDF_PHASE_PARSED, -1, -1);
2181
+        cli_dbgmsg("cli_pdf: (parsed hooks) returned %d\n", rc);
2182
+        if (rc == CL_VIRUS) {
2183
+            alerts++;
2184
+            if (SCAN_ALL) {
2185
+                rc = CL_CLEAN;
2186
+            }
2187
+        }
2188
+    }
2189
+
2179 2190
     /* extract PDF objs */
2180 2191
     for (i=0;!rc && i<pdf.nobjs;i++) {
2181
-	struct pdf_obj *obj = &pdf.objs[i];
2182
-	rc = pdf_extract_obj(&pdf, obj);
2183
-	if (rc == CL_EFORMAT) {
2184
-            /* Don't halt on one bad object */
2185
-            cli_dbgmsg("cli_pdf: bad format object, skipping to next\n");
2186
-            badobjects++;
2187
-            rc = CL_CLEAN;
2192
+        struct pdf_obj *obj = &pdf.objs[i];
2193
+        rc = pdf_extract_obj(&pdf, obj);
2194
+        switch (rc) {
2195
+            case CL_EFORMAT:
2196
+                /* Don't halt on one bad object */
2197
+                cli_dbgmsg("cli_pdf: bad format object, skipping to next\n");
2198
+                badobjects++;
2199
+                rc = CL_CLEAN;
2200
+                break;
2201
+            case CL_VIRUS:
2202
+                alerts++;
2203
+                if (SCAN_ALL) {
2204
+                    rc = CL_CLEAN;
2205
+                }
2206
+                break;
2207
+            default:
2208
+                break;
2188 2209
         }
2189 2210
     }
2190 2211
 
... ...
@@ -2195,13 +2224,19 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
2195 2195
    if (pdf.flags && !rc) {
2196 2196
 	cli_dbgmsg("cli_pdf: flags 0x%02x\n", pdf.flags);
2197 2197
 	rc = run_pdf_hooks(&pdf, PDF_PHASE_END, -1, -1);
2198
-	if (!rc && (ctx->options & CL_SCAN_ALGORITHMIC)) {
2199
-	    if (pdf.flags & (1 << ESCAPED_COMMON_PDFNAME)) {
2200
-		/* for example /Fl#61te#44#65#63#6f#64#65 instead of /FlateDecode */
2201
-		cli_append_virus(ctx, "Heuristics.PDF.ObfuscatedNameObject");
2202
-		rc = cli_found_possibly_unwanted(ctx);
2203
-	    }
2204
-	}
2198
+        if (rc == CL_VIRUS) {
2199
+            alerts++;
2200
+            if (SCAN_ALL) {
2201
+                rc = CL_CLEAN;
2202
+            }
2203
+        }
2204
+        if (!rc && (ctx->options & CL_SCAN_ALGORITHMIC)) {
2205
+            if (pdf.flags & (1 << ESCAPED_COMMON_PDFNAME)) {
2206
+                /* for example /Fl#61te#44#65#63#6f#64#65 instead of /FlateDecode */
2207
+                cli_append_virus(ctx, "Heuristics.PDF.ObfuscatedNameObject");
2208
+                rc = cli_found_possibly_unwanted(ctx);
2209
+            }
2210
+        }
2205 2211
 #if 0
2206 2212
 	/* TODO: find both trailers, and /Encrypt settings */
2207 2213
 	if (pdf.flags & (1 << LINEARIZED_PDF))
... ...
@@ -2217,7 +2252,10 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
2217 2217
 #endif
2218 2218
     }
2219 2219
 
2220
-    if (!rc && badobjects) {
2220
+    if (alerts) {
2221
+        rc = CL_VIRUS;
2222
+    }
2223
+    else if (!rc && badobjects) {
2221 2224
         rc = CL_EFORMAT;
2222 2225
     }
2223 2226