Browse code

Fixes to a handful of bugs identified during regression testing of PDF and UnRAR changes.

Fix for minor memory leak in fmap_dump_to_file().
Fix to PDF object stream logic, accounting for a realloc() issue when the only pdf object stream fails to parse, and for when pdf objects in a stream appear to extend further than the size of the stream.
Fix for memory leak cleaning up PDF object stream buffer in error condition.
Fix to bug in pdf_decodestream wherein objects were found in an object stream, but the object stream could later be free'd if max scansize was exceeded, resulting in a NULL dereference.
General cleanup of pdf_decodestream/pdf_decodestream_internal exit code logic.

Micah Snyder authored on 2018/10/02 08:46:23
Showing 4 changed files
... ...
@@ -750,7 +750,6 @@ cl_error_t fmap_dump_to_file(fmap_t* map, const char* filepath, const char* tmpd
750 750
 
751 751
     char* filebase = NULL;
752 752
     char* prefix = NULL;
753
-    uint32_t prefix_allocated = 0;
754 753
 
755 754
     char* tmpname = NULL;
756 755
     int tmpfd = -1;
... ...
@@ -782,11 +781,13 @@ cl_error_t fmap_dump_to_file(fmap_t* map, const char* filepath, const char* tmpd
782 782
                 return CL_EMEM;
783 783
             }
784 784
             snprintf(prefix, prefix_len, "%s.%zu-%zu", filebase, start_offset, end_offset);
785
-            prefix_allocated = 1;
785
+
786
+            free(filebase);
787
+            filebase = NULL;
786 788
         } else {
787 789
             /* Else if we're dumping the whole thing, use the filebase as the prefix */
788 790
             prefix = filebase;
789
-            prefix_allocated = 0;
791
+            filebase = NULL;
790 792
         }
791 793
     }
792 794
 
... ...
@@ -794,18 +795,16 @@ cl_error_t fmap_dump_to_file(fmap_t* map, const char* filepath, const char* tmpd
794 794
     ret = cli_gentempfd_with_prefix(tmpdir, prefix, &tmpname, &tmpfd);
795 795
     if (ret != CL_SUCCESS) {
796 796
         cli_dbgmsg("fmap_dump_to_file: failed to generate temporary file.\n");
797
-        if ((NULL != prefix) && (prefix_allocated)) {
797
+        if (NULL != prefix) {
798 798
             free(prefix);
799 799
             prefix = NULL;
800
-            prefix_allocated = 0;
801 800
         }
802 801
         return ret;
803 802
     }
804 803
 
805
-    if ((NULL != prefix) && (prefix_allocated)) {
804
+    if (NULL != prefix) {
806 805
         free(prefix);
807 806
         prefix = NULL;
808
-        prefix_allocated = 0;
809 807
     }
810 808
 
811 809
     do {
... ...
@@ -397,8 +397,20 @@ int pdf_findobj_in_objstm(struct pdf_struct *pdf, struct objstm_struct *objstm,
397 397
             status = CL_EPARSE;
398 398
             goto done;
399 399
         }
400
+        else if (next_objoff <= objoff) {
401
+            /* Failed to find obj offset for next obj */
402
+            cli_dbgmsg("pdf_findobj_in_objstm: Found next obj offset for obj in object stream but it's less than or equal to the current one!\n");
403
+            status = CL_EPARSE;
404
+            goto done;
405
+        }
406
+        else if (objstm->first + next_objoff > objstm->streambuf_len) {
407
+            /* Failed to find obj offset for next obj */
408
+            cli_dbgmsg("pdf_findobj_in_objstm: Found next obj offset for obj in object stream but it's further out than the size of the stream!\n");
409
+            status = CL_EPARSE;
410
+            goto done;
411
+        }
400 412
 
401
-        obj->size = objstm->first + next_objoff - obj->start;
413
+        obj->size = next_objoff - objoff;
402 414
     } 
403 415
     else 
404 416
     {
... ...
@@ -1364,15 +1376,20 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
1364 1364
 {
1365 1365
     cli_ctx *ctx = pdf->ctx;
1366 1366
     char fullname[NAME_MAX + 1];
1367
-    int fout;
1368
-    ptrdiff_t sum = 0;
1367
+    int fout = -1;
1368
+    size_t sum = 0;
1369 1369
     cl_error_t rc = CL_SUCCESS;
1370 1370
     int dump = 1;
1371 1371
 
1372 1372
     cli_dbgmsg("pdf_extract_obj: obj %u %u\n", obj->id>>8, obj->id&0xff);
1373 1373
 
1374
-    if (obj->objstm)
1374
+    if (obj->objstm) {
1375 1375
         cli_dbgmsg("pdf_extract_obj: extracting obj found in objstm.\n");
1376
+        if (obj->objstm->streambuf == NULL) {
1377
+            cli_warnmsg("pdf_extract_obj: object in object stream has null stream buffer!\n");
1378
+            return CL_EFORMAT;
1379
+        }
1380
+    }
1376 1381
 
1377 1382
     /* TODO: call bytecode hook here, allow override dumpability */
1378 1383
     if ((!(obj->flags & (1 << OBJ_STREAM)) || (obj->flags & (1 << OBJ_HASFILTERS))) && !(obj->flags & DUMP_MASK)) {
... ...
@@ -1584,17 +1601,25 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
1584 1584
                 }
1585 1585
 
1586 1586
                 sum = pdf_decodestream(pdf, obj, dparams, start + p_stream, (uint32_t)length, xref, fout, &rc, objstm);
1587
-                if (sum < 0) {
1588
-                    /*
1589
-                    * If we were expecting an objstm and there was a failure...
1590
-                    *   discard the memory for last object stream.
1591
-                    */
1592
-                    if (NULL != objstm)
1593
-                    {
1587
+                if ((CL_SUCCESS != rc) && (CL_VIRUS != rc)) {
1588
+                    cli_dbgmsg("Error decoding stream! Error code: %d\n", rc);
1589
+
1590
+                    /* It's ok if we couldn't decode the stream,
1591
+                     *   make a best effort to keep parsing. */
1592
+                    if (CL_EPARSE == rc)
1593
+                        rc = CL_SUCCESS;
1594
+
1595
+                    if (NULL != objstm) {
1596
+                        /*
1597
+                         * If we were expecting an objstm and there was a failure...
1598
+                         *   discard the memory for last object stream.
1599
+                         */
1594 1600
                         if (NULL != pdf->objstms) {
1595 1601
                             if (NULL != pdf->objstms[pdf->nobjstms - 1]) {
1596
-                                pdf->objstms[pdf->nobjstms - 1]->streambuf = NULL;
1597
-
1602
+                                if (NULL != pdf->objstms[pdf->nobjstms - 1]->streambuf) {
1603
+                                    free(pdf->objstms[pdf->nobjstms - 1]->streambuf);
1604
+                                    pdf->objstms[pdf->nobjstms - 1]->streambuf = NULL;
1605
+                                }
1598 1606
                                 free(pdf->objstms[pdf->nobjstms - 1]);
1599 1607
                                 pdf->objstms[pdf->nobjstms - 1] = NULL;
1600 1608
                             }
... ...
@@ -1602,11 +1627,16 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
1602 1602
                             /* Pop the objstm off the end of the pdf->objstms array. */
1603 1603
                             if (pdf->nobjstms > 0) {
1604 1604
                                 pdf->nobjstms--;
1605
-                                pdf->objstms = cli_realloc2(pdf->objstms, sizeof(struct objstm_struct*) * pdf->nobjstms);
1606
-
1607
-                                if (!pdf->objstms) {
1608
-                                    cli_warnmsg("pdf_extract_obj: out of memory when shrinking down objstm array\n");
1609
-                                    return CL_EMEM;
1605
+                                if (0 == pdf->nobjstms) {
1606
+                                    free(pdf->objstms);
1607
+                                    pdf->objstms = NULL;
1608
+                                } else {
1609
+                                    pdf->objstms = cli_realloc2(pdf->objstms, sizeof(struct objstm_struct*) * pdf->nobjstms);
1610
+
1611
+                                    if (!pdf->objstms) {
1612
+                                        cli_warnmsg("pdf_extract_obj: out of memory when shrinking down objstm array\n");
1613
+                                        return CL_EMEM;
1614
+                                    }
1610 1615
                                 }
1611 1616
                             } else {
1612 1617
                                 /* hm.. this shouldn't happen */
... ...
@@ -1619,7 +1649,7 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
1619 1619
                 if (dparams)
1620 1620
                     pdf_free_dict(dparams);
1621 1621
 
1622
-                if (sum < 0 || ((rc == CL_VIRUS) && !SCAN_ALLMATCHES)) {
1622
+                if ((rc == CL_VIRUS) && !SCAN_ALLMATCHES) {
1623 1623
                     sum = 0; /* prevents post-filter scan */
1624 1624
                     break;
1625 1625
                 }
... ...
@@ -76,7 +76,7 @@ struct pdf_token {
76 76
     uint8_t *content;  /* content stream */
77 77
 };
78 78
 
79
-static ptrdiff_t pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token, int fout, cl_error_t *status, struct objstm_struct *objstm);
79
+static size_t pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token, int fout, cl_error_t *status, struct objstm_struct *objstm);
80 80
 static cl_error_t pdf_decode_dump(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token, int lvl);
81 81
 
82 82
 static cl_error_t filter_ascii85decode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token);
... ...
@@ -101,34 +101,36 @@ static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj,
101 101
  * @param fout      File descriptor to write to to be scanned.
102 102
  * @param[out] rc   Return code ()
103 103
  * @param objstm    (optional) Object stream context structure.
104
- * @return ptrdiff_t   The number of bytes written to 'fout' to be scanned. -1 if failed out.
104
+ * @return size_t   The number of bytes written to 'fout' to be scanned.
105 105
  */
106
-ptrdiff_t pdf_decodestream(
106
+size_t pdf_decodestream(
107 107
     struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params,
108 108
     const char *stream, uint32_t streamlen, int xref, int fout, cl_error_t *status,
109 109
     struct objstm_struct *objstm)
110 110
 {
111 111
     struct pdf_token *token = NULL;
112
-    ptrdiff_t bytes_scanned = -1;
113
-    cl_error_t retval = CL_SUCCESS;
112
+    size_t bytes_scanned = 0;
113
+    cli_ctx *ctx = pdf->ctx;
114 114
 
115 115
     if (!status) {
116 116
         /* invalid args, and no way to pass back the status code */
117
-        return -1;
117
+        return 0;
118 118
     }
119 119
 
120 120
     if (!pdf || !obj) {
121 121
         /* Invalid args */
122
-        retval = CL_EARG;
122
+        *status = CL_EARG;
123 123
         goto done;
124 124
     }
125 125
 
126 126
     if (!stream || !streamlen || fout < 0) {
127 127
         cli_dbgmsg("pdf_decodestream: no filters or stream on obj %u %u\n", obj->id>>8, obj->id&0xff);
128
-        retval = CL_ENULLARG;
128
+        *status = CL_ENULLARG;
129 129
         goto done;
130 130
     }
131 131
 
132
+    *status = CL_SUCCESS;
133
+
132 134
 #if 0
133 135
     if (params)
134 136
         pdf_print_dict(params, 0);
... ...
@@ -136,7 +138,7 @@ ptrdiff_t pdf_decodestream(
136 136
 
137 137
     token = cli_malloc(sizeof(struct pdf_token));
138 138
     if (!token) {
139
-        retval = CL_EMEM;
139
+        *status = CL_EMEM;
140 140
         goto done;
141 141
     }
142 142
 
... ...
@@ -149,7 +151,7 @@ ptrdiff_t pdf_decodestream(
149 149
     token->content = cli_malloc(streamlen);
150 150
     if (!token->content) {
151 151
         free(token);
152
-        retval = CL_EMEM;
152
+        *status = CL_EMEM;
153 153
         goto done;
154 154
     }
155 155
     memcpy(token->content, stream, streamlen);
... ...
@@ -157,38 +159,35 @@ ptrdiff_t pdf_decodestream(
157 157
 
158 158
     cli_dbgmsg("pdf_decodestream: detected %lu applied filters\n", (long unsigned)(obj->numfilters));
159 159
 
160
-    bytes_scanned = pdf_decodestream_internal(pdf, obj, params, token, fout, &retval, objstm);
161
-    /* 
162
-     * Pass back the return value, though we really only care
163
-     * if it is CV_VIRUS or CL_SUCCESS.
164
-     */
165
-    if (retval == CL_VIRUS)
166
-        retval = CL_VIRUS;
167
-    else
168
-        retval = CL_SUCCESS;
160
+    bytes_scanned = pdf_decodestream_internal(pdf, obj, params, token, fout, status, objstm);
161
+
162
+    if ((CL_VIRUS == *status) && !SCAN_ALLMATCHES) {
163
+        goto done;
164
+    }
169 165
 
170
-    if (!token->success) {
166
+    if (0 == token->success) {
171 167
         /*
172
-         * If it was successful, the internal() function calls cli_writen()
173
-         * However, in this case... no non-forced filter are decoded, 
174
-         *   so return the raw stream.
168
+         * Either:
169
+         *  a) it failed to decode any filters, or
170
+         *  b) there were no filters.
171
+         *
172
+         * Write out the raw stream to be scanned.
173
+         *
174
+         * Nota bene: If it did decode any filters, the internal() function would
175
+         *            have written out the decoded stream to be scanned.
175 176
          */
176 177
         if (!cli_checklimits("pdf", pdf->ctx, streamlen, 0, 0)) {
177 178
             cli_dbgmsg("pdf_decodestream: no non-forced filters decoded, returning raw stream\n");
178 179
 
179 180
             if (cli_writen(fout, stream, streamlen) != streamlen) {
180
-                cli_errmsg("pdf_decodestream: failed to write output file\n");
181
-                retval = CL_EWRITE;
182
-                bytes_scanned = -1;
183
-                goto done;
181
+                cli_errmsg("pdf_decodestream: failed to write raw stream to output file\n");
182
+            } else {
183
+                bytes_scanned = streamlen;
184 184
             }
185
-            bytes_scanned = streamlen;
186 185
         }
187 186
     }
188 187
 
189 188
 done:
190
-    *status = retval;
191
-
192 189
     /*
193 190
      * Free up the token, and token content, if any.
194 191
      */
... ...
@@ -220,28 +219,30 @@ done:
220 220
  * @param objstm        (optional) Object stream context structure.
221 221
  * @return ptrdiff_t    The number of bytes we wrote to 'fout'. -1 if failed out.
222 222
  */
223
-static ptrdiff_t pdf_decodestream_internal(
223
+static size_t pdf_decodestream_internal(
224 224
     struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params,
225 225
     struct pdf_token *token, int fout, cl_error_t *status, struct objstm_struct *objstm)
226 226
 {
227 227
     cl_error_t vir = CL_CLEAN;
228 228
     cl_error_t retval = CL_SUCCESS;
229
-    ptrdiff_t bytes_scanned = -1;
229
+    size_t bytes_scanned = 0;
230 230
     cli_ctx *ctx = pdf->ctx;
231 231
     const char *filter = NULL;
232 232
     int i;
233 233
 
234 234
     if (!status) {
235 235
         /* invalid args, and no way to pass back the status code */
236
-        return -1;
236
+        return 0;
237 237
     }
238 238
 
239 239
     if (!pdf || !obj || !token) {
240 240
         /* Invalid args */
241
-        retval = CL_EARG;
241
+        *status = CL_EARG;
242 242
         goto done;
243 243
     }
244 244
     
245
+    *status = CL_SUCCESS;
246
+    
245 247
     /*
246 248
      * if pdf is decryptable, scan for CRYPT filter
247 249
      * if none, force a DECRYPT filter application
... ...
@@ -253,6 +254,7 @@ static ptrdiff_t pdf_decodestream_internal(
253 253
             cli_dbgmsg("pdf_decodestream_internal: decoding => non-filter CRYPT\n");
254 254
             retval = filter_decrypt(pdf, obj, params, token, 1);
255 255
             if (retval != CL_SUCCESS) {
256
+                *status = CL_EPARSE;
256 257
                 goto done;
257 258
             }
258 259
         }
... ...
@@ -323,12 +325,15 @@ static ptrdiff_t pdf_decodestream_internal(
323 323
 
324 324
                 switch (retval) {
325 325
                 case CL_VIRUS:
326
+                    *status = CL_VIRUS;
326 327
                     reason = "detection";
327 328
                     break;
328 329
                 case CL_BREAK:
330
+                    *status = CL_SUCCESS;
329 331
                     reason = "decoding break";
330 332
                     break;
331 333
                 default:
334
+                    *status = CL_EPARSE;
332 335
                     reason = "decoding error";
333 336
                     break;
334 337
                 }
... ...
@@ -341,31 +346,35 @@ static ptrdiff_t pdf_decodestream_internal(
341 341
 
342 342
         /* Dump the stream content to a text file if keeptmp is enabled. */
343 343
         if (pdf->ctx->engine->keeptmp) {
344
-            retval = pdf_decode_dump(pdf, obj, token, i+1);
345
-            if (retval != CL_SUCCESS) {
346
-                goto done;
344
+            if (CL_SUCCESS != pdf_decode_dump(pdf, obj, token, i+1)) {
345
+                cli_errmsg("pdf_decodestream_internal: failed to write decoded stream content to temp file\n");
347 346
             }
348 347
         }
349 348
     }
350 349
 
351 350
     if (token->success > 0) {
352 351
         /*
353
-         * Looks like we successfully decoded the stream, so lets write it out.
354
-         *   In the failure case, the caller will deal with the raw stream.
352
+         * Looks like we successfully decoded some or all of the stream filters,
353
+         * so lets write it out to a file descriptor we scan.
354
+         *
355
+         * In the event that we didn't decode any filters (or maybe there
356
+         * weren't any filters), the calling function will do the same with
357
+         * the raw stream.
355 358
          */
356
-        if (!cli_checklimits("pdf", pdf->ctx, token->length, 0, 0)) {
359
+        if (CL_SUCCESS == cli_checklimits("pdf", pdf->ctx, token->length, 0, 0)) {
357 360
             if (cli_writen(fout, token->content, token->length) != token->length) {
358
-                cli_errmsg("pdf_decodestream_internal: failed to write output file\n");
359
-                retval = CL_EWRITE;
360
-                bytes_scanned = -1;
361
-                goto done;
361
+                cli_errmsg("pdf_decodestream_internal: failed to write decoded stream content to output file\n");
362
+            } else {
363
+                bytes_scanned = token->length;
362 364
             }
363
-            bytes_scanned = token->length;
364 365
         }
365 366
     }
366 367
 
367
-    if (NULL != objstm)
368
+    if ((NULL != objstm) &&
369
+        ((CL_SUCCESS == *status) || ((CL_VIRUS == *status) && SCAN_ALLMATCHES)))
368 370
     {
371
+        int objs_found = pdf->nobjs;
372
+
369 373
         /*
370 374
          * The caller indicated that the decoded data is an object stream.
371 375
          * Perform experimental object stream parsing to extract objects from the stream.
... ...
@@ -377,7 +386,9 @@ static ptrdiff_t pdf_decodestream_internal(
377 377
         token->content = NULL;
378 378
         token->length = 0;
379 379
 
380
-        int objs_found = pdf->nobjs;
380
+        /* Don't store the result. It's ok if some or all objects failed to parse.
381
+           It would be far worse to add objects from a stream to the list, and then free
382
+           the stream buffer due to an "error". */
381 383
         if (CL_SUCCESS != pdf_find_and_parse_objs_in_objstm(pdf, objstm))
382 384
         {
383 385
             cli_dbgmsg("pdf_decodestream_internal: pdf_find_and_parse_objs_in_objstm failed!\n");
... ...
@@ -392,14 +403,9 @@ static ptrdiff_t pdf_decodestream_internal(
392 392
 
393 393
 done:
394 394
 
395
-    *status = retval;
396
-
397 395
     if (vir == CL_VIRUS)
398 396
         *status = CL_VIRUS;
399 397
 
400
-    if (*status == CL_BREAK)
401
-        *status = CL_SUCCESS;
402
-
403 398
     return bytes_scanned;
404 399
 }
405 400
 
... ...
@@ -51,9 +51,9 @@
51 51
  * @param fout      File descriptor to write to a temp file.
52 52
  * @param[out] rc   Return code ()
53 53
  * @param objstm    Object stream context structure.
54
- * @return ptrdiff_t 
54
+ * @return size_t   The number of bytes written to fout to be scanned.
55 55
  */
56
-ptrdiff_t pdf_decodestream(
56
+size_t pdf_decodestream(
57 57
     struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params,
58 58
     const char *stream, uint32_t streamlen, int xref, int fout, cl_error_t *status,
59 59
     struct objstm_struct *objstm);