Browse code

First draft of ascii85encoder

git-svn: trunk@1566

Nigel Horne authored on 2005/05/22 07:07:14
Showing 2 changed files
... ...
@@ -1,3 +1,7 @@
1
+Sat May 21 23:06:03 BST 2005 (njh)
2
+---------------------------------
3
+  * libclamav/pdf.c:	Added decoder for ascii85 encoded files
4
+
1 5
 Sat May 21 02:17:08 CEST 2005 (tk)
2 6
 ----------------------------------
3 7
   * Remove the use of tmpfile(). Patch by Mark Pizzolato
... ...
@@ -15,7 +15,7 @@
15 15
  *  along with this program; if not, write to the Free Software
16 16
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 17
  */
18
-static	char	const	rcsid[] = "$Id: pdf.c,v 1.13 2005/05/21 19:48:29 nigelhorne Exp $";
18
+static	char	const	rcsid[] = "$Id: pdf.c,v 1.14 2005/05/21 22:04:22 nigelhorne Exp $";
19 19
 
20 20
 #if HAVE_CONFIG_H
21 21
 #include "clamav-config.h"
... ...
@@ -54,6 +54,9 @@ static	char	const	rcsid[] = "$Id: pdf.c,v 1.13 2005/05/21 19:48:29 nigelhorne Ex
54 54
 #define	MIN(a, b)	(((a) < (b)) ? (a) : (b))
55 55
 #endif
56 56
 
57
+static	int	flatedecode(const unsigned char *buf, size_t len, int fout);
58
+static	int	ascii85decode(const char *buf, size_t len, unsigned char *output);
59
+
57 60
 int
58 61
 cli_pdf(const char *dir, int desc)
59 62
 {
... ...
@@ -87,7 +90,7 @@ cli_pdf(const char *dir, int desc)
87 87
 	cli_dbgmsg("cli_pdf: scanning %lu bytes\n", bytesleft);
88 88
 
89 89
 	while((q = cli_pmemstr(p, bytesleft, "obj", 3)) != NULL) {
90
-		int length, ascii85decode, flatedecode, fout;
90
+		int length, is_ascii85decode, is_flatedecode, fout;
91 91
 		const char *s, *t, *u, *obj;
92 92
 		size_t objlen;
93 93
 		char fullname[NAME_MAX + 1];
... ...
@@ -110,7 +113,7 @@ cli_pdf(const char *dir, int desc)
110 110
 				continue;
111 111
 		}
112 112
 
113
-		length = ascii85decode = flatedecode = 0;
113
+		length = is_ascii85decode = is_flatedecode = 0;
114 114
 		for(s = obj; s < t; s++)
115 115
 			if(*s == '/') {
116 116
 				if(strncmp(++s, "Length ", 7) == 0) {
... ...
@@ -120,20 +123,15 @@ cli_pdf(const char *dir, int desc)
120 120
 						s++;
121 121
 				} else if((strncmp(s, "FlateDecode ", 12) == 0) ||
122 122
 					  (strncmp(s, "FlateDecode\n", 12) == 0)) {
123
-					flatedecode = 1;
123
+					is_flatedecode = 1;
124
+					s += 12;
125
+				} else if((strncmp(s, "ASCII85Decode ", 12) == 0) ||
126
+					  (strncmp(s, "ASCII85Decode\n", 12) == 0)) {
127
+					is_ascii85decode = 1;
124 128
 					s += 12;
125
-				} else if(strncmp(s, "ASCII85Decode ", 12) == 0) {
126
-					ascii85decode++;
127
-					break;
128 129
 				}
129
-			
130 130
 			}
131 131
 
132
-		if(ascii85decode) {
133
-			cli_warnmsg("ASCII85Decode not yet supported\n");
134
-			continue;
135
-		}
136
-
137 132
 		t += 7;
138 133
 		u = cli_pmemstr(t, objlen - 7, "endstream\n", 10);
139 134
 		if(u == NULL) {
... ...
@@ -163,59 +161,35 @@ cli_pdf(const char *dir, int desc)
163 163
 			break;
164 164
 		}
165 165
 
166
-		if(flatedecode) {
167
-			z_stream stream;
168
-			size_t len = (size_t)(u - t);
169
-			unsigned char output[BUFSIZ];
170
-
171
-			cli_dbgmsg("cli_pdf: flatedecode %lu bytes\n", len);
166
+		if(is_ascii85decode) {
167
+			int len = (int)(u - t);
168
+			unsigned char *tmpbuf = cli_malloc(len * 2);
172 169
 
173
-			while(strchr("\r\n", *t)) {
174
-				len--;
175
-				t++;
176
-			}
170
+			len = ascii85decode(t, (size_t)len, tmpbuf);
177 171
 
178
-			stream.zalloc = (alloc_func)Z_NULL;
179
-			stream.zfree = (free_func)Z_NULL;
180
-			stream.opaque = (void *)NULL;
181
-			stream.next_in = (unsigned char *)t;
182
-			stream.avail_in = len;
183
-
184
-			if(inflateInit(&stream) != Z_OK) {
185
-				cli_warnmsg("cli_pdf: inflateInit failed");
186
-				close(fout);
172
+			if(len == -1) {
173
+				free(tmpbuf);
174
+				rc = CL_EFORMAT;
187 175
 				continue;
188 176
 			}
189
-			stream.next_out = output;
190
-			stream.avail_out = sizeof(output);
191
-			for(;;) {
192
-				int zstat;
193
-
194
-				if(stream.avail_out == 0) {
195
-					cli_dbgmsg("write BUFSIZ\n");
196
-					write(fout, output, BUFSIZ);
197
-					stream.next_out = output;
198
-					stream.avail_out = BUFSIZ;
199
-				}
200
-				zstat = inflate(&stream, Z_NO_FLUSH);
201
-				switch(zstat) {
202
-					case Z_OK:
203
-						continue;
204
-					case Z_STREAM_END:
205
-						break;
206
-					default:
207
-						cli_warnmsg("Error %d inflating PDF attachment\n", zstat);
208
-						rc = CL_EZIP;
209
-						break;
210
-				}
211
-				break;
177
+			/* free unused traling bytes */
178
+			tmpbuf = cli_realloc(tmpbuf, len);
179
+			/*
180
+			 * Note that it will probably be both ascii85encoded
181
+			 * and flateencoded
182
+			 */
183
+			if(is_flatedecode) {
184
+				const int zstat = flatedecode((unsigned char *)tmpbuf, (size_t)len, fout);
185
+
186
+				if(zstat != Z_OK)
187
+					rc = zstat;
212 188
 			}
189
+			free(tmpbuf);
190
+		} else if(is_flatedecode) {
191
+			const int zstat = flatedecode((unsigned char *)t, (size_t)(u - t), fout);
213 192
 
214
-			if(stream.avail_out != sizeof(output)) {
215
-				cli_dbgmsg("flush %lu\n", sizeof(output) - stream.avail_out);
216
-				write(fout, output, sizeof(output) - stream.avail_out);
217
-			}
218
-			inflateEnd(&stream);
193
+			if(zstat != Z_OK)
194
+				rc = zstat;
219 195
 		} else
220 196
 			write(fout, t, (size_t)(u - t));
221 197
 
... ...
@@ -229,3 +203,126 @@ cli_pdf(const char *dir, int desc)
229 229
 	return rc;
230 230
 #endif
231 231
 }
232
+
233
+/* flate inflation - returns zlib status, e.g. Z_OK */
234
+static int
235
+flatedecode(const unsigned char *buf, size_t len, int fout)
236
+{
237
+	int zstat;
238
+	z_stream stream;
239
+	unsigned char output[BUFSIZ];
240
+
241
+	cli_dbgmsg("cli_pdf: flatedecode %lu bytes\n", len);
242
+
243
+	while(strchr("\r\n", *buf)) {
244
+		len--;
245
+		buf++;
246
+	}
247
+
248
+	stream.zalloc = (alloc_func)Z_NULL;
249
+	stream.zfree = (free_func)Z_NULL;
250
+	stream.opaque = (void *)NULL;
251
+	stream.next_in = (unsigned char *)buf;
252
+	stream.avail_in = len;
253
+
254
+	zstat = inflateInit(&stream);
255
+	if(zstat != Z_OK) {
256
+		cli_warnmsg("cli_pdf: inflateInit failed");
257
+		return zstat;
258
+	}
259
+	stream.next_out = output;
260
+	stream.avail_out = sizeof(output);
261
+	for(;;) {
262
+		if(stream.avail_out == 0) {
263
+			cli_dbgmsg("write BUFSIZ\n");
264
+			write(fout, output, BUFSIZ);
265
+			stream.next_out = output;
266
+			stream.avail_out = BUFSIZ;
267
+		}
268
+		zstat = inflate(&stream, Z_NO_FLUSH);
269
+		switch(zstat) {
270
+			case Z_OK:
271
+				continue;
272
+			case Z_STREAM_END:
273
+				break;
274
+			default:
275
+				cli_warnmsg("Error %d inflating PDF attachment\n", zstat);
276
+				inflateEnd(&stream);
277
+				return zstat;
278
+		}
279
+		break;
280
+	}
281
+
282
+	if(stream.avail_out != sizeof(output)) {
283
+		cli_dbgmsg("flush %lu\n", sizeof(output) - stream.avail_out);
284
+		write(fout, output, sizeof(output) - stream.avail_out);
285
+	}
286
+	return inflateEnd(&stream);
287
+}
288
+
289
+/*
290
+ * http://cvs.gnome.org/viewcvs/sketch/Filter/ascii85filter.c?rev=1.2
291
+ */
292
+/* ascii85 inflation, returns number of bytes in output, -1 for error */
293
+static int
294
+ascii85decode(const char *buf, size_t len, unsigned char *output)
295
+{
296
+	const char *ptr = buf;
297
+	uint32_t sum = 0;
298
+	int quintet = 0;
299
+	int ret = 0;
300
+
301
+	cli_dbgmsg("cli_pdf: ascii85decode %lu bytes\n", len);
302
+
303
+	for(;;) {
304
+		int byte = (len--) ? *ptr++ : EOF;
305
+
306
+		if((byte == '~') && (*ptr == '>'))
307
+			byte = EOF;
308
+
309
+		if(byte >= '!' && byte <= 'u') {
310
+			sum = sum * 85 + ((unsigned long)byte - '!');
311
+			if(++quintet == 5) {
312
+				*output++ = sum >> 24;
313
+				*output++ = (sum >> 16) & 0xFF;
314
+				*output++ = (sum >> 8) & 0xFF;
315
+				*output++ = sum & 0xFF;
316
+				ret += 4;
317
+				quintet = 0;
318
+				sum = 0;
319
+			}
320
+		} else if(byte == 'z') {
321
+			if(quintet) {
322
+				cli_warnmsg("ascii85decode: z in wrong position\n");
323
+				return -1;
324
+			}
325
+			*output++ = '\0';
326
+			*output++ = '\0';
327
+			*output++ = '\0';
328
+			*output++ = '\0';
329
+			ret += 4;
330
+		} else if(byte == EOF) {
331
+			if(quintet) {
332
+				int i;
333
+
334
+				if(quintet == 1) {
335
+					cli_warnmsg("ascii85Decode: only 1 byte in last quintet\n");
336
+					return -1;
337
+				}
338
+				for(i = 0; i < 5 - quintet; i++)
339
+					sum *= 85;
340
+				if(quintet > 1)
341
+					sum += (0xFFFFFF >> ((quintet - 2) * 8));
342
+				ret += quintet;
343
+				for(i = 0; i < quintet - 1; i++)
344
+					*output++ = (sum >> (24 - 8 * i)) & 0xFF;
345
+				quintet = 0;
346
+			}
347
+			break;
348
+		} else if(!isspace(byte)) {
349
+			cli_warnmsg("ascii85Decode: invalid character 0x%x, len %lu\n", byte, len);
350
+			return -1;
351
+		}
352
+	}
353
+	return ret;
354
+}