... | ... |
@@ -69,7 +69,6 @@ static int ascii85decode(const char *buf, off_t len, unsigned char *output); |
69 | 69 |
static const char *pdf_nextlinestart(const char *ptr, size_t len); |
70 | 70 |
static const char *pdf_nextobject(const char *ptr, size_t len); |
71 | 71 |
|
72 |
-#if 1 |
|
73 | 72 |
static int xrefCheck(const char *xref, const char *eof) |
74 | 73 |
{ |
75 | 74 |
const char *q; |
... | ... |
@@ -2145,603 +2144,6 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset) |
2145 | 2145 |
return rc == CL_BREAK ? CL_CLEAN : rc; |
2146 | 2146 |
} |
2147 | 2147 |
|
2148 |
-#else |
|
2149 |
-static int try_flatedecode(unsigned char *buf, off_t real_len, off_t calculated_len, int fout, cli_ctx *ctx); |
|
2150 |
-static int flatedecode(unsigned char *buf, off_t len, int fout, cli_ctx *ctx); |
|
2151 |
-int |
|
2152 |
-cli_pdf(const char *dir, cli_ctx *ctx, off_t offset) |
|
2153 |
-{ |
|
2154 |
- off_t size; /* total number of bytes in the file */ |
|
2155 |
- off_t bytesleft, trailerlength; |
|
2156 |
- char *buf; /* start of memory mapped area */ |
|
2157 |
- const char *p, *q, *trailerstart; |
|
2158 |
- const char *xrefstart; /* cross reference table */ |
|
2159 |
- /*size_t xreflength;*/ |
|
2160 |
- int printed_predictor_message, printed_embedded_font_message, rc; |
|
2161 |
- unsigned int files; |
|
2162 |
- fmap_t *map = *ctx->fmap; |
|
2163 |
- int opt_failed = 0; |
|
2164 |
- |
|
2165 |
- cli_dbgmsg("in cli_pdf(%s)\n", dir); |
|
2166 |
- size = map->len - offset; |
|
2167 |
- |
|
2168 |
- if(size <= 7) /* doesn't even include the file header */ |
|
2169 |
- return CL_CLEAN; |
|
2170 |
- |
|
2171 |
- p = buf = fmap_need_off_once(map, 0, size); /* FIXME: really port to fmap */ |
|
2172 |
- if(!buf) { |
|
2173 |
- cli_errmsg("cli_pdf: mmap() failed\n"); |
|
2174 |
- return CL_EMAP; |
|
2175 |
- } |
|
2176 |
- |
|
2177 |
- cli_dbgmsg("cli_pdf: scanning %lu bytes\n", (unsigned long)size); |
|
2178 |
- |
|
2179 |
- /* Lines are terminated by \r, \n or both */ |
|
2180 |
- |
|
2181 |
- /* File Header */ |
|
2182 |
- bytesleft = size - 5; |
|
2183 |
- for(q = p; bytesleft; bytesleft--, q++) { |
|
2184 |
- if(!strncasecmp(q, "%PDF-", 5)) { |
|
2185 |
- bytesleft = size - (off_t) (q - p); |
|
2186 |
- p = q; |
|
2187 |
- break; |
|
2188 |
- } |
|
2189 |
- } |
|
2190 |
- |
|
2191 |
- if(!bytesleft) { |
|
2192 |
- cli_dbgmsg("cli_pdf: file header not found\n"); |
|
2193 |
- return CL_CLEAN; |
|
2194 |
- } |
|
2195 |
- |
|
2196 |
- /* Find the file trailer */ |
|
2197 |
- for(q = &p[bytesleft - 5]; q > p; --q) |
|
2198 |
- if(strncasecmp(q, "%%EOF", 5) == 0) |
|
2199 |
- break; |
|
2200 |
- |
|
2201 |
- if(q <= p) { |
|
2202 |
- cli_dbgmsg("cli_pdf: trailer not found\n"); |
|
2203 |
- return CL_CLEAN; |
|
2204 |
- } |
|
2205 |
- |
|
2206 |
- for(trailerstart = &q[-7]; trailerstart > p; --trailerstart) |
|
2207 |
- if(memcmp(trailerstart, "trailer", 7) == 0) |
|
2208 |
- break; |
|
2209 |
- |
|
2210 |
- /* |
|
2211 |
- * q points to the end of the trailer section |
|
2212 |
- */ |
|
2213 |
- trailerlength = (long)(q - trailerstart); |
|
2214 |
- if(cli_memstr(trailerstart, trailerlength, "Encrypt", 7)) { |
|
2215 |
- /* |
|
2216 |
- * This tends to mean that the file is, in effect, read-only |
|
2217 |
- * http://www.cs.cmu.edu/~dst/Adobe/Gallery/anon21jul01-pdf-encryption.txt |
|
2218 |
- * http://www.adobe.com/devnet/pdf/ |
|
2219 |
- */ |
|
2220 |
- cli_dbgmsg("cli_pdf: Encrypted PDF files not yet supported\n"); |
|
2221 |
- return CL_CLEAN; |
|
2222 |
- } |
|
2223 |
- |
|
2224 |
- /* |
|
2225 |
- * not true, since edits may put data after the trailer |
|
2226 |
- bytesleft -= trailerlength; |
|
2227 |
- */ |
|
2228 |
- |
|
2229 |
- /* |
|
2230 |
- * FIXME: Handle more than one xref section in the xref table |
|
2231 |
- */ |
|
2232 |
- for(xrefstart = trailerstart; xrefstart > p; --xrefstart) |
|
2233 |
- if(memcmp(xrefstart, "xref", 4) == 0) |
|
2234 |
- /* |
|
2235 |
- * Make sure it's the start of the line, not a startxref |
|
2236 |
- * token |
|
2237 |
- */ |
|
2238 |
- if((xrefstart[-1] == '\n') || (xrefstart[-1] == '\r')) |
|
2239 |
- break; |
|
2240 |
- |
|
2241 |
- if(xrefstart == p) { |
|
2242 |
- cli_dbgmsg("cli_pdf: xref not found\n"); |
|
2243 |
- return CL_CLEAN; |
|
2244 |
- } |
|
2245 |
- |
|
2246 |
- printed_predictor_message = printed_embedded_font_message = 0; |
|
2247 |
- |
|
2248 |
- /* |
|
2249 |
- * not true, since edits may put data after the trailer |
|
2250 |
- xreflength = (size_t)(trailerstart - xrefstart); |
|
2251 |
- bytesleft -= xreflength; |
|
2252 |
- */ |
|
2253 |
- |
|
2254 |
- files = 0; |
|
2255 |
- |
|
2256 |
- rc = CL_CLEAN; |
|
2257 |
- |
|
2258 |
- /* |
|
2259 |
- * The body section consists of a sequence of indirect objects |
|
2260 |
- */ |
|
2261 |
- while((p < xrefstart) && (cli_checklimits("cli_pdf", ctx, 0, 0, 0)==CL_CLEAN) && |
|
2262 |
- ((q = pdf_nextobject(p, bytesleft)) != NULL)) { |
|
2263 |
- int is_ascii85decode, is_flatedecode, fout, len, has_cr; |
|
2264 |
- /*int object_number, generation_number;*/ |
|
2265 |
- const char *objstart, *objend, *streamstart, *streamend; |
|
2266 |
- unsigned long length, objlen, real_streamlen, calculated_streamlen; |
|
2267 |
- int is_embedded_font, predictor; |
|
2268 |
- char fullname[NAME_MAX + 1]; |
|
2269 |
- |
|
2270 |
- rc = CL_CLEAN; |
|
2271 |
- if(q == xrefstart) |
|
2272 |
- break; |
|
2273 |
- if(memcmp(q, "xref", 4) == 0) |
|
2274 |
- break; |
|
2275 |
- |
|
2276 |
- /*object_number = atoi(q);*/ |
|
2277 |
- bytesleft -= (off_t)(q - p); |
|
2278 |
- p = q; |
|
2279 |
- |
|
2280 |
- if(memcmp(q, "endobj", 6) == 0) |
|
2281 |
- continue; |
|
2282 |
- if(!isdigit(*q)) { |
|
2283 |
- cli_dbgmsg("cli_pdf: Object number missing\n"); |
|
2284 |
- break; |
|
2285 |
- } |
|
2286 |
- q = pdf_nextobject(p, bytesleft); |
|
2287 |
- if((q == NULL) || !isdigit(*q)) { |
|
2288 |
- cli_dbgmsg("cli_pdf: Generation number missing\n"); |
|
2289 |
- break; |
|
2290 |
- } |
|
2291 |
- /*generation_number = atoi(q);*/ |
|
2292 |
- bytesleft -= (off_t)(q - p); |
|
2293 |
- p = q; |
|
2294 |
- |
|
2295 |
- q = pdf_nextobject(p, bytesleft); |
|
2296 |
- if((q == NULL) || (memcmp(q, "obj", 3) != 0)) { |
|
2297 |
- cli_dbgmsg("cli_pdf: Indirect object missing \"obj\"\n"); |
|
2298 |
- break; |
|
2299 |
- } |
|
2300 |
- |
|
2301 |
- bytesleft -= (off_t)((q - p) + 3); |
|
2302 |
- objstart = p = &q[3]; |
|
2303 |
- objend = cli_memstr(p, bytesleft, "endobj", 6); |
|
2304 |
- if(objend == NULL) { |
|
2305 |
- cli_dbgmsg("cli_pdf: No matching endobj\n"); |
|
2306 |
- break; |
|
2307 |
- } |
|
2308 |
- bytesleft -= (off_t)((objend - p) + 6); |
|
2309 |
- p = &objend[6]; |
|
2310 |
- objlen = (unsigned long)(objend - objstart); |
|
2311 |
- |
|
2312 |
- /* Is this object a stream? */ |
|
2313 |
- streamstart = cli_memstr(objstart, objlen, "stream", 6); |
|
2314 |
- if(streamstart == NULL) |
|
2315 |
- continue; |
|
2316 |
- |
|
2317 |
- is_embedded_font = length = is_ascii85decode = |
|
2318 |
- is_flatedecode = 0; |
|
2319 |
- predictor = 1; |
|
2320 |
- |
|
2321 |
- /* |
|
2322 |
- * TODO: handle F and FFilter? |
|
2323 |
- */ |
|
2324 |
- q = objstart; |
|
2325 |
- while(q < streamstart) { |
|
2326 |
- if(*q == '/') { /* name object */ |
|
2327 |
- /*cli_dbgmsg("Name object %8.8s\n", q+1, q+1);*/ |
|
2328 |
- if(strncmp(++q, "Length ", 7) == 0) { |
|
2329 |
- q += 7; |
|
2330 |
- length = atoi(q); |
|
2331 |
- while(isdigit(*q)) |
|
2332 |
- q++; |
|
2333 |
- /* |
|
2334 |
- * Note: incremental updates are not |
|
2335 |
- * supported |
|
2336 |
- */ |
|
2337 |
- if((bytesleft > 11) && strncmp(q, " 0 R", 4) == 0) { |
|
2338 |
- const char *r, *nq; |
|
2339 |
- char b[14]; |
|
2340 |
- |
|
2341 |
- q += 4; |
|
2342 |
- cli_dbgmsg("cli_pdf: Length is in indirect obj %lu\n", |
|
2343 |
- length); |
|
2344 |
- snprintf(b, sizeof(b), |
|
2345 |
- "%lu 0 obj", length); |
|
2346 |
- length = (unsigned long)strlen(b); |
|
2347 |
- /* optimization: assume objects |
|
2348 |
- * are sequential */ |
|
2349 |
- if(!opt_failed) { |
|
2350 |
- nq = q; |
|
2351 |
- len = buf + size - q; |
|
2352 |
- } else { |
|
2353 |
- nq = buf; |
|
2354 |
- len = q - buf; |
|
2355 |
- } |
|
2356 |
- do { |
|
2357 |
- r = cli_memstr(nq, len, b, length); |
|
2358 |
- if (r > nq) { |
|
2359 |
- const char x = *(r-1); |
|
2360 |
- if (x == '\n' || x=='\r') { |
|
2361 |
- --r; |
|
2362 |
- break; |
|
2363 |
- } |
|
2364 |
- } |
|
2365 |
- if (r) { |
|
2366 |
- len -= r + length - nq; |
|
2367 |
- nq = r + length; |
|
2368 |
- } else if (!opt_failed) { |
|
2369 |
- /* we failed optimized match, |
|
2370 |
- * try matching from the beginning |
|
2371 |
- */ |
|
2372 |
- len = q - buf; |
|
2373 |
- r = nq = buf; |
|
2374 |
- /* prevent |
|
2375 |
- * infloop */ |
|
2376 |
- opt_failed = 1; |
|
2377 |
- } |
|
2378 |
- } while (r); |
|
2379 |
- if(r) { |
|
2380 |
- r += length - 1; |
|
2381 |
- r = pdf_nextobject(r, bytesleft - (r - q)); |
|
2382 |
- if(r) { |
|
2383 |
- length = atoi(r); |
|
2384 |
- while(isdigit(*r)) |
|
2385 |
- r++; |
|
2386 |
- cli_dbgmsg("cli_pdf: length in '%s' %lu\n", |
|
2387 |
- &b[1], |
|
2388 |
- length); |
|
2389 |
- } |
|
2390 |
- } else |
|
2391 |
- cli_dbgmsg("cli_pdf: Couldn't find '%s'\n", |
|
2392 |
- &b[1]); |
|
2393 |
- } |
|
2394 |
- q--; |
|
2395 |
- } else if(strncmp(q, "Length2 ", 8) == 0) |
|
2396 |
- is_embedded_font = 1; |
|
2397 |
- else if(strncmp(q, "Predictor ", 10) == 0) { |
|
2398 |
- q += 10; |
|
2399 |
- predictor = atoi(q); |
|
2400 |
- while(isdigit(*q)) |
|
2401 |
- q++; |
|
2402 |
- q--; |
|
2403 |
- } else if(strncmp(q, "FlateDecode", 11) == 0) { |
|
2404 |
- is_flatedecode = 1; |
|
2405 |
- q += 11; |
|
2406 |
- } else if(strncmp(q, "ASCII85Decode", 13) == 0) { |
|
2407 |
- is_ascii85decode = 1; |
|
2408 |
- q += 13; |
|
2409 |
- } |
|
2410 |
- } |
|
2411 |
- q = pdf_nextobject(q, (size_t)(streamstart - q)); |
|
2412 |
- if(q == NULL) |
|
2413 |
- break; |
|
2414 |
- } |
|
2415 |
- |
|
2416 |
- if(is_embedded_font) { |
|
2417 |
- /* |
|
2418 |
- * Need some documentation, the only I can find a |
|
2419 |
- * reference to is not free, if some kind soul wishes |
|
2420 |
- * to donate a copy, please contact me! |
|
2421 |
- * (http://safari.adobepress.com/0321304748) |
|
2422 |
- */ |
|
2423 |
- if(!printed_embedded_font_message) { |
|
2424 |
- cli_dbgmsg("cli_pdf: Embedded fonts not yet supported\n"); |
|
2425 |
- printed_embedded_font_message = 1; |
|
2426 |
- } |
|
2427 |
- continue; |
|
2428 |
- } |
|
2429 |
- if(predictor > 1) { |
|
2430 |
- /* |
|
2431 |
- * Needs some thought |
|
2432 |
- */ |
|
2433 |
- if(!printed_predictor_message) { |
|
2434 |
- cli_dbgmsg("cli_pdf: Predictor %d not honoured for embedded image\n", |
|
2435 |
- predictor); |
|
2436 |
- printed_predictor_message = 1; |
|
2437 |
- } |
|
2438 |
- continue; |
|
2439 |
- } |
|
2440 |
- |
|
2441 |
- /* objend points to the end of the object (start of "endobj") */ |
|
2442 |
- streamstart += 6; /* go past the word "stream" */ |
|
2443 |
- len = (int)(objend - streamstart); |
|
2444 |
- q = pdf_nextlinestart(streamstart, len); |
|
2445 |
- if(q == NULL) |
|
2446 |
- break; |
|
2447 |
- len -= (int)(q - streamstart); |
|
2448 |
- streamstart = q; |
|
2449 |
- streamend = cli_memstr(streamstart, len, "endstream\n", 10); |
|
2450 |
- if(streamend == NULL) { |
|
2451 |
- streamend = cli_memstr(streamstart, len, "endstream\r", 10); |
|
2452 |
- if(streamend == NULL) { |
|
2453 |
- cli_dbgmsg("cli_pdf: No endstream\n"); |
|
2454 |
- break; |
|
2455 |
- } |
|
2456 |
- has_cr = 1; |
|
2457 |
- } else |
|
2458 |
- has_cr = 0; |
|
2459 |
- snprintf(fullname, sizeof(fullname), "%s"PATHSEP"pdf%02u", dir, files); |
|
2460 |
- fout = open(fullname, O_RDWR|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600); |
|
2461 |
- if(fout < 0) { |
|
2462 |
- char err[128]; |
|
2463 |
- cli_errmsg("cli_pdf: can't create temporary file %s: %s\n", fullname, cli_strerror(errno, err, sizeof(err))); |
|
2464 |
- rc = CL_ETMPFILE; |
|
2465 |
- break; |
|
2466 |
- } |
|
2467 |
- |
|
2468 |
- /* |
|
2469 |
- * Calculate the length ourself, the Length parameter is often |
|
2470 |
- * wrong |
|
2471 |
- */ |
|
2472 |
- if((*--streamend != '\n') && (*streamend != '\r')) |
|
2473 |
- streamend++; |
|
2474 |
- else if(has_cr && (*--streamend != '\r')) |
|
2475 |
- streamend++; |
|
2476 |
- |
|
2477 |
- if(streamend <= streamstart) { |
|
2478 |
- close(fout); |
|
2479 |
- cli_dbgmsg("cli_pdf: Empty stream\n"); |
|
2480 |
- if (cli_unlink(fullname)) { |
|
2481 |
- rc = CL_EUNLINK; |
|
2482 |
- break; |
|
2483 |
- } |
|
2484 |
- continue; |
|
2485 |
- } |
|
2486 |
- calculated_streamlen = (int)(streamend - streamstart); |
|
2487 |
- real_streamlen = length; |
|
2488 |
- |
|
2489 |
- cli_dbgmsg("cli_pdf: length %lu, calculated_streamlen %lu isFlate %d isASCII85 %d\n", |
|
2490 |
- length, calculated_streamlen, |
|
2491 |
- is_flatedecode, is_ascii85decode); |
|
2492 |
- |
|
2493 |
- if(calculated_streamlen != real_streamlen) { |
|
2494 |
- cli_dbgmsg("cli_pdf: Incorrect Length field in file attempting to recover\n"); |
|
2495 |
- if(real_streamlen > calculated_streamlen) |
|
2496 |
- real_streamlen = calculated_streamlen; |
|
2497 |
- } |
|
2498 |
-#if 0 |
|
2499 |
- /* FIXME: this isn't right... */ |
|
2500 |
- if(length) |
|
2501 |
- /*streamlen = (is_flatedecode) ? length : MIN(length, streamlen);*/ |
|
2502 |
- streamlen = MIN(length, streamlen); |
|
2503 |
-#endif |
|
2504 |
- |
|
2505 |
- if(is_ascii85decode) { |
|
2506 |
- unsigned char *tmpbuf; |
|
2507 |
- int ret = cli_checklimits("cli_pdf", ctx, calculated_streamlen * 5, calculated_streamlen, real_streamlen); |
|
2508 |
- |
|
2509 |
- if(ret != CL_CLEAN) { |
|
2510 |
- close(fout); |
|
2511 |
- if (cli_unlink(fullname)) { |
|
2512 |
- rc = CL_EUNLINK; |
|
2513 |
- break; |
|
2514 |
- } |
|
2515 |
- continue; |
|
2516 |
- } |
|
2517 |
- |
|
2518 |
- tmpbuf = cli_malloc(calculated_streamlen * 5); |
|
2519 |
- |
|
2520 |
- if(tmpbuf == NULL) { |
|
2521 |
- close(fout); |
|
2522 |
- if (cli_unlink(fullname)) { |
|
2523 |
- rc = CL_EUNLINK; |
|
2524 |
- break; |
|
2525 |
- } |
|
2526 |
- continue; |
|
2527 |
- } |
|
2528 |
- |
|
2529 |
- ret = ascii85decode(streamstart, calculated_streamlen, tmpbuf); |
|
2530 |
- |
|
2531 |
- if(ret == -1) { |
|
2532 |
- free(tmpbuf); |
|
2533 |
- close(fout); |
|
2534 |
- if (cli_unlink(fullname)) { |
|
2535 |
- rc = CL_EUNLINK; |
|
2536 |
- break; |
|
2537 |
- } |
|
2538 |
- continue; |
|
2539 |
- } |
|
2540 |
- if(ret) { |
|
2541 |
- unsigned char *t; |
|
2542 |
- unsigned size; |
|
2543 |
- |
|
2544 |
- real_streamlen = ret; |
|
2545 |
- /* free unused trailing bytes */ |
|
2546 |
- size = real_streamlen > calculated_streamlen ? real_streamlen : calculated_streamlen; |
|
2547 |
- t = (unsigned char *)cli_realloc(tmpbuf,size); |
|
2548 |
- if(t == NULL) { |
|
2549 |
- free(tmpbuf); |
|
2550 |
- close(fout); |
|
2551 |
- if (cli_unlink(fullname)) { |
|
2552 |
- rc = CL_EUNLINK; |
|
2553 |
- break; |
|
2554 |
- } |
|
2555 |
- continue; |
|
2556 |
- } |
|
2557 |
- tmpbuf = t; |
|
2558 |
- /* |
|
2559 |
- * Note that it will probably be both |
|
2560 |
- * ascii85encoded and flateencoded |
|
2561 |
- */ |
|
2562 |
- |
|
2563 |
- if(is_flatedecode) |
|
2564 |
- rc = try_flatedecode((unsigned char *)tmpbuf, real_streamlen, real_streamlen, fout, ctx); |
|
2565 |
- else |
|
2566 |
- rc = (unsigned long)cli_writen(fout, (const char *)streamstart, real_streamlen)==real_streamlen ? CL_CLEAN : CL_EWRITE; |
|
2567 |
- } |
|
2568 |
- free(tmpbuf); |
|
2569 |
- } else if(is_flatedecode) { |
|
2570 |
- rc = try_flatedecode((unsigned char *)streamstart, real_streamlen, calculated_streamlen, fout, ctx); |
|
2571 |
- |
|
2572 |
- } else { |
|
2573 |
- cli_dbgmsg("cli_pdf: writing %lu bytes from the stream\n", |
|
2574 |
- (unsigned long)real_streamlen); |
|
2575 |
- if((rc = cli_checklimits("cli_pdf", ctx, real_streamlen, 0, 0))==CL_CLEAN) |
|
2576 |
- rc = (unsigned long)cli_writen(fout, (const char *)streamstart, real_streamlen) == real_streamlen ? CL_CLEAN : CL_EWRITE; |
|
2577 |
- } |
|
2578 |
- |
|
2579 |
- if (rc == CL_CLEAN) { |
|
2580 |
- cli_dbgmsg("cli_pdf: extracted file %u to %s\n", files, fullname); |
|
2581 |
- files++; |
|
2582 |
- |
|
2583 |
- lseek(fout, 0, SEEK_SET); |
|
2584 |
- rc = cli_magic_scandesc(fout, ctx); |
|
2585 |
- } |
|
2586 |
- close(fout); |
|
2587 |
- if(!ctx->engine->keeptmp) |
|
2588 |
- if (cli_unlink(fullname)) rc = CL_EUNLINK; |
|
2589 |
- if(rc != CL_CLEAN) break; |
|
2590 |
- } |
|
2591 |
- |
|
2592 |
- |
|
2593 |
- cli_dbgmsg("cli_pdf: returning %d\n", rc); |
|
2594 |
- return rc; |
|
2595 |
-} |
|
2596 |
- |
|
2597 |
-/* |
|
2598 |
- * flate inflation |
|
2599 |
- */ |
|
2600 |
-static int |
|
2601 |
-try_flatedecode(unsigned char *buf, off_t real_len, off_t calculated_len, int fout, cli_ctx *ctx) |
|
2602 |
-{ |
|
2603 |
- int ret = cli_checklimits("cli_pdf", ctx, real_len, 0, 0); |
|
2604 |
- |
|
2605 |
- if (ret==CL_CLEAN && flatedecode(buf, real_len, fout, ctx) == CL_SUCCESS) |
|
2606 |
- return CL_CLEAN; |
|
2607 |
- |
|
2608 |
- if(real_len == calculated_len) { |
|
2609 |
- /* |
|
2610 |
- * Nothing more we can do to inflate |
|
2611 |
- */ |
|
2612 |
- cli_dbgmsg("cli_pdf: Bad compression in flate stream\n"); |
|
2613 |
- return CL_CLEAN; |
|
2614 |
- } |
|
2615 |
- |
|
2616 |
- if(cli_checklimits("cli_pdf", ctx, calculated_len, 0, 0)!=CL_CLEAN) |
|
2617 |
- return CL_CLEAN; |
|
2618 |
- |
|
2619 |
- ret = flatedecode(buf, calculated_len, fout, ctx); |
|
2620 |
- if(ret == CL_CLEAN) |
|
2621 |
- return CL_CLEAN; |
|
2622 |
- |
|
2623 |
- /* i.e. the PDF file is broken :-( */ |
|
2624 |
- cli_dbgmsg("cli_pdf: Bad compressed block length in flate stream\n"); |
|
2625 |
- |
|
2626 |
- return ret; |
|
2627 |
-} |
|
2628 |
- |
|
2629 |
-static int |
|
2630 |
-flatedecode(unsigned char *buf, off_t len, int fout, cli_ctx *ctx) |
|
2631 |
-{ |
|
2632 |
- int zstat, ret; |
|
2633 |
- off_t nbytes; |
|
2634 |
- z_stream stream; |
|
2635 |
- unsigned char output[BUFSIZ]; |
|
2636 |
-#ifdef SAVE_TMP |
|
2637 |
- char tmpfilename[16]; |
|
2638 |
- int tmpfd; |
|
2639 |
-#endif |
|
2640 |
- |
|
2641 |
- cli_dbgmsg("cli_pdf: flatedecode %lu bytes\n", (unsigned long)len); |
|
2642 |
- |
|
2643 |
- if(len == 0) { |
|
2644 |
- cli_dbgmsg("cli_pdf: flatedecode len == 0\n"); |
|
2645 |
- return CL_CLEAN; |
|
2646 |
- } |
|
2647 |
- |
|
2648 |
-#ifdef SAVE_TMP |
|
2649 |
- /* |
|
2650 |
- * Copy the embedded area for debugging, so that if it falls over |
|
2651 |
- * we have a copy of the offending data. This is debugging code |
|
2652 |
- * that you shouldn't of course install in a live environment. I am |
|
2653 |
- * not interested in hearing about security issues with this section |
|
2654 |
- * of the parser. |
|
2655 |
- */ |
|
2656 |
- strcpy(tmpfilename, "/tmp/pdfXXXXXX"); |
|
2657 |
- tmpfd = mkstemp(tmpfilename); |
|
2658 |
- if(tmpfd < 0) { |
|
2659 |
- perror(tmpfilename); |
|
2660 |
- cli_errmsg("cli_pdf: Can't make debugging file\n"); |
|
2661 |
- } else { |
|
2662 |
- FILE *tmpfp = fdopen(tmpfd, "w"); |
|
2663 |
- |
|
2664 |
- if(tmpfp) { |
|
2665 |
- fwrite(buf, sizeof(char), len, tmpfp); |
|
2666 |
- fclose(tmpfp); |
|
2667 |
- cli_dbgmsg("cli_pdf: flatedecode: debugging file is %s\n", |
|
2668 |
- tmpfilename); |
|
2669 |
- } else |
|
2670 |
- cli_errmsg("cli_pdf: can't fdopen debugging file\n"); |
|
2671 |
- } |
|
2672 |
-#endif |
|
2673 |
- stream.zalloc = (alloc_func)Z_NULL; |
|
2674 |
- stream.zfree = (free_func)Z_NULL; |
|
2675 |
- stream.opaque = (void *)NULL; |
|
2676 |
- stream.next_in = (Bytef *)buf; |
|
2677 |
- stream.avail_in = len; |
|
2678 |
- stream.next_out = output; |
|
2679 |
- stream.avail_out = sizeof(output); |
|
2680 |
- |
|
2681 |
- zstat = inflateInit(&stream); |
|
2682 |
- if(zstat != Z_OK) { |
|
2683 |
- cli_warnmsg("cli_pdf: inflateInit failed\n"); |
|
2684 |
- return CL_EMEM; |
|
2685 |
- } |
|
2686 |
- |
|
2687 |
- nbytes = 0; |
|
2688 |
- |
|
2689 |
- while(stream.avail_in) { |
|
2690 |
- zstat = inflate(&stream, Z_NO_FLUSH); /* zlib */ |
|
2691 |
- switch(zstat) { |
|
2692 |
- case Z_OK: |
|
2693 |
- if(stream.avail_out == 0) { |
|
2694 |
- int written; |
|
2695 |
- if ((written=cli_writen(fout, output, sizeof(output)))!=sizeof(output)) { |
|
2696 |
- cli_errmsg("cli_pdf: failed to write output file\n"); |
|
2697 |
- inflateEnd(&stream); |
|
2698 |
- return CL_EWRITE; |
|
2699 |
- } |
|
2700 |
- nbytes += written; |
|
2701 |
- |
|
2702 |
- if((ret=cli_checklimits("cli_pdf", ctx, nbytes, 0, 0))!=CL_CLEAN) { |
|
2703 |
- inflateEnd(&stream); |
|
2704 |
- return ret; |
|
2705 |
- } |
|
2706 |
- stream.next_out = output; |
|
2707 |
- stream.avail_out = sizeof(output); |
|
2708 |
- } |
|
2709 |
- continue; |
|
2710 |
- case Z_STREAM_END: |
|
2711 |
- break; |
|
2712 |
- default: |
|
2713 |
- if(stream.msg) |
|
2714 |
- cli_dbgmsg("cli_pdf: after writing %lu bytes, got error \"%s\" inflating PDF attachment\n", |
|
2715 |
- (unsigned long)nbytes, |
|
2716 |
- stream.msg); |
|
2717 |
- else |
|
2718 |
- cli_dbgmsg("cli_pdf: after writing %lu bytes, got error %d inflating PDF attachment\n", |
|
2719 |
- (unsigned long)nbytes, zstat); |
|
2720 |
- inflateEnd(&stream); |
|
2721 |
- return CL_CLEAN; |
|
2722 |
- } |
|
2723 |
- break; |
|
2724 |
- } |
|
2725 |
- |
|
2726 |
- if(stream.avail_out != sizeof(output)) { |
|
2727 |
- if(cli_writen(fout, output, sizeof(output) - stream.avail_out) < 0) { |
|
2728 |
- cli_errmsg("cli_pdf: failed to write output file\n"); |
|
2729 |
- inflateEnd(&stream); |
|
2730 |
- return CL_EWRITE; |
|
2731 |
- } |
|
2732 |
- } |
|
2733 |
- |
|
2734 |
-#ifdef SAVE_TMP |
|
2735 |
- if (cli_unlink(tmpfilename)) { |
|
2736 |
- inflateEnd(&stream); |
|
2737 |
- return CL_EUNLINK; |
|
2738 |
- } |
|
2739 |
-#endif |
|
2740 |
- inflateEnd(&stream); |
|
2741 |
- return CL_CLEAN; |
|
2742 |
-} |
|
2743 |
-#endif |
|
2744 |
- |
|
2745 | 2148 |
static int asciihexdecode(const char *buf, off_t len, char *output) |
2746 | 2149 |
{ |
2747 | 2150 |
unsigned i,j; |