Browse code

pdfdecode: implement rldecode filter handler

Kevin Lin authored on 2016/03/30 01:52:08
Showing 1 changed files
... ...
@@ -69,7 +69,7 @@ struct pdf_token {
69 69
 };
70 70
 
71 71
 static  int filter_ascii85decode(struct pdf_token *token);
72
-//static  int filter_rldecode(struct pdf_token *token)
72
+static  int filter_rldecode(struct pdf_token *token);
73 73
 static  int filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token);
74 74
 static  int filter_asciihexdecode(struct pdf_token *token);
75 75
 
... ...
@@ -125,6 +125,11 @@ static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj
125 125
             rc = filter_ascii85decode(token);
126 126
             break;
127 127
 
128
+        case OBJ_FILTER_RL:
129
+            cli_dbgmsg("cli_pdf: decoding [%d] => RLDECODE\n", obj->filterlist[i]);
130
+            rc = filter_rldecode(token);
131
+            break;
132
+
128 133
         case OBJ_FILTER_FLATE:
129 134
             cli_dbgmsg("cli_pdf: decoding [%d] => FLATEDECODE\n", obj->filterlist[i]);
130 135
             rc = filter_flatedecode(pdf, obj, token);
... ...
@@ -135,7 +140,6 @@ static int pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj
135 135
             rc = filter_asciihexdecode(token);
136 136
             break;
137 137
 
138
-        case OBJ_FILTER_RL:
139 138
         case OBJ_FILTER_JPX:
140 139
         case OBJ_FILTER_DCT: //OBJ_FILTER_JBIG2
141 140
         case OBJ_FILTER_LZW:
... ...
@@ -256,11 +260,92 @@ static int filter_ascii85decode(struct pdf_token *token)
256 256
     return rc;
257 257
 }
258 258
 
259
-/*
259
+/* imported from razorback */
260 260
 static int filter_rldecode(struct pdf_token *token)
261 261
 {
262
+    uint8_t *decoded, *temp;
263
+    uint32_t declen = 0, capacity = 0;
264
+
265
+    uint8_t *content = (uint8_t *)token->content;
266
+    uint32_t length = token->length;
267
+    uint32_t offset = 0;
268
+    int rc = CL_SUCCESS;
269
+
270
+    if (!(decoded = cli_calloc(BUFSIZ, sizeof(uint8_t)))) {
271
+        cli_errmsg("cli_pdf: cannot allocate memory for decoded output\n");
272
+        return CL_EMEM;
273
+    }
274
+    capacity = BUFSIZ;
275
+
276
+    while (offset < length) {
277
+        uint8_t srclen = content[offset++];
278
+        if (srclen < 128) {
279
+            /* direct copy of (srclen + 1) bytes */
280
+            if (offset + srclen + 1 > length) {
281
+                cli_dbgmsg("cli_pdf: required source length (%lu) exceeds remaining length (%lu)\n",
282
+                           (long unsigned)(offset+srclen+1), (long unsigned)(length-offset));
283
+                rc = CL_EFORMAT;
284
+                break;
285
+            }
286
+            if (declen + srclen + 1 > capacity) {
287
+                /* TODO - limit check */
288
+                if (!(temp = cli_realloc(decoded, capacity + BUFSIZ))) {
289
+                    cli_errmsg("cli_pdf: cannot reallocate memory for decoded output\n");
290
+                    rc = CL_EMEM;
291
+                    break;
292
+                }
293
+                decoded = temp;
294
+                capacity += BUFSIZ;
295
+            }
296
+
297
+            memcpy(decoded+declen, content+offset, srclen+1);
298
+            offset += srclen + 1;
299
+            declen += srclen + 1;
300
+        } else if (srclen > 128) {
301
+            /* copy the next byte (257 - srclen) times */
302
+            if (offset + 1 > length) {
303
+                cli_dbgmsg("cli_pdf: required source length (%lu) exceeds remaining length (%lu)\n",
304
+                           (long unsigned)(offset+srclen+1), (long unsigned)(length-offset));
305
+                rc = CL_EFORMAT;
306
+                break;
307
+            }
308
+            if (declen + (257 - srclen) + 1 > capacity) {
309
+                /* TODO - limit check */
310
+                if (!(temp = cli_realloc(decoded, capacity + BUFSIZ))) {
311
+                    cli_errmsg("cli_pdf: cannot reallocate memory for decoded output\n");
312
+                    rc = CL_EMEM;
313
+                    break;
314
+                }
315
+                decoded = temp;
316
+                capacity += BUFSIZ;
317
+            }
318
+
319
+            memset(decoded+declen, content[offset], 257-srclen);
320
+            offset++;
321
+            declen += 257 - srclen;
322
+        } else { /* srclen == 128 */
323
+            /* end of data */
324
+            cli_dbgmsg("cli_pdf: end-of-stream marker @ offset %lu (%lu bytes remaining)\n",
325
+                       (unsigned long)offset, (long unsigned)(token->length-offset));
326
+            break;
327
+        }
328
+    }
329
+
330
+    if (rc == CL_SUCCESS) {
331
+        free(token->content);
332
+
333
+        cli_dbgmsg("cli_pdf: inflated %lu bytes from %lu total bytes\n",
334
+                   (unsigned long)declen, (unsigned long)(token->length));
335
+
336
+        token->content = decoded;
337
+        token->length = declen;
338
+    } else {
339
+        cli_errmsg("cli_pdf: error occurred parsing byte %lu of %lu\n",
340
+                   (unsigned long)offset, (unsigned long)(token->length));
341
+        free(decoded);
342
+    }
343
+    return rc;
262 344
 }
263
-*/
264 345
 
265 346
 static uint8_t *decode_nextlinestart(uint8_t *content, uint32_t length)
266 347
 {