Browse code

Handle RFC2047

git-svn-id: file:///var/lib/svn/clamav-devel/trunk/clamav-devel@884 77e5149b-7576-45b1-b177-96237e5ba77b

Nigel Horne authored on 2004/09/17 03:03:25
Showing 2 changed files
... ...
@@ -1,3 +1,7 @@
1
+Thu Sep 16 19:02:38 BST 2004 (njh)
2
+----------------------------------
3
+  * libclamav/mbox.c:	Handle RFC2047 encoded headers
4
+
1 5
 Thu Sep 16 16:57:11 BST 2004 (njh)
2 6
 ----------------------------------
3 7
   * libclamav/message.c:	Handle double colons e.g.:
... ...
@@ -17,6 +17,9 @@
17 17
  *
18 18
  * Change History:
19 19
  * $Log: mbox.c,v $
20
+ * Revision 1.127  2004/09/16 18:00:43  nigelhorne
21
+ * Handle RFC2047
22
+ *
20 23
  * Revision 1.126  2004/09/16 14:23:57  nigelhorne
21 24
  * Handle quotes around mime type
22 25
  *
... ...
@@ -366,7 +369,7 @@
366 366
  * Compilable under SCO; removed duplicate code with message.c
367 367
  *
368 368
  */
369
-static	char	const	rcsid[] = "$Id: mbox.c,v 1.126 2004/09/16 14:23:57 nigelhorne Exp $";
369
+static	char	const	rcsid[] = "$Id: mbox.c,v 1.127 2004/09/16 18:00:43 nigelhorne Exp $";
370 370
 
371 371
 #if HAVE_CONFIG_H
372 372
 #include "clamav-config.h"
... ...
@@ -480,6 +483,7 @@ static	size_t	strip(char *buf, int len);
480 480
 static	bool	continuationMarker(const char *line);
481 481
 static	int	parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg);
482 482
 static	void	saveTextPart(message *m, const char *dir);
483
+static	char	*rfc2047(const char *in);
483 484
 
484 485
 static	void	checkURLs(message *m, const char *dir);
485 486
 #ifdef	WITH_CURL
... ...
@@ -871,7 +875,7 @@ parseEmailHeader(message *m, const char *line, const table_t *rfc821)
871 871
 	char *strptr;
872 872
 #endif
873 873
 	const char *separater;
874
-	char copy[LINE_LENGTH+1], tokenseparater[2];
874
+	char *copy, tokenseparater[2];
875 875
 
876 876
 	cli_dbgmsg("parseEmailHeader '%s'\n", line);
877 877
 
... ...
@@ -890,7 +894,9 @@ parseEmailHeader(message *m, const char *line, const table_t *rfc821)
890 890
 
891 891
 	assert(strlen(line) <= LINE_LENGTH);	/* RFC 821 */
892 892
 
893
-	strcpy(copy, line);
893
+	copy = rfc2047(line);
894
+	if(copy == NULL)
895
+		return -1;
894 896
 
895 897
 	tokenseparater[0] = *separater;
896 898
 	tokenseparater[1] = '\0';
... ...
@@ -910,6 +916,7 @@ parseEmailHeader(message *m, const char *line, const table_t *rfc821)
910 910
 			 */
911 911
 			ret = parseMimeHeader(m, cmd, rfc821, arg);
912 912
 	}
913
+	free(copy);
913 914
 	return ret;
914 915
 }
915 916
 
... ...
@@ -2255,6 +2262,105 @@ saveTextPart(message *m, const char *dir)
2255 2255
 	}
2256 2256
 }
2257 2257
 
2258
+
2259
+/*
2260
+ * Handle RFC2047 encoding. Returns a malloc'd buffer that the caller must
2261
+ * free, or NULL on error
2262
+ */
2263
+static char *
2264
+rfc2047(const char *in)
2265
+{
2266
+	char *out, *pout;
2267
+	size_t len;
2268
+
2269
+	if(strstr(in, "=?") == NULL)
2270
+		return strdup(in);
2271
+
2272
+	cli_dbgmsg("rfc2047 '%s'\n", in);
2273
+	out = cli_malloc(strlen(in) + 1);
2274
+
2275
+	if(out == NULL)
2276
+		return NULL;
2277
+
2278
+	pout = out;
2279
+
2280
+	/* For each RFC2047 string */
2281
+	while(*in) {
2282
+		char encoding, *enctext, *ptr;
2283
+		message *m;
2284
+		blob *b;
2285
+
2286
+		/* Find next RFC2047 string */
2287
+		while(*in) {
2288
+			if((*in == '=') && (in[1] == '?')) {
2289
+				in += 2;
2290
+				break;
2291
+			}
2292
+			*pout++ = *in++;
2293
+		}
2294
+		/* Skip over charset, find encoding */
2295
+		while((*in != '?') && *in)
2296
+			in++;
2297
+		if(*in == '\0')
2298
+			break;
2299
+		encoding = *++in;
2300
+		encoding = tolower(encoding);
2301
+
2302
+		if((encoding != 'q') && (encoding != 'b')) {
2303
+			cli_warnmsg("Unsupported RFC2047 encoding type - report to bugs@clamav.net\n");
2304
+			break;
2305
+		}
2306
+		/* Skip to encoded text */
2307
+		if(*++in != '?')
2308
+			break;
2309
+		if(*++in == '\0')
2310
+			break;
2311
+
2312
+		enctext = strdup(in);
2313
+		in = strstr(in, "?=");
2314
+		if(in == NULL) {
2315
+			free(enctext);
2316
+			break;
2317
+		}
2318
+		in += 2;
2319
+		ptr = strstr(enctext, "?=");
2320
+		assert(ptr != NULL);
2321
+		*ptr = '\0';
2322
+		/*cli_dbgmsg("Need to decode '%s' with method '%c'\n", enctext, encoding);*/
2323
+
2324
+		m = messageCreate();
2325
+		if(m == NULL) {
2326
+			free(enctext);
2327
+			break;
2328
+		}
2329
+		messageAddStr(m, enctext);
2330
+		free(enctext);
2331
+		switch(tolower(encoding)) {
2332
+			case 'q':
2333
+				messageSetEncoding(m, "quoted-printable");
2334
+				break;
2335
+			case 'b':
2336
+				messageSetEncoding(m, "base64");
2337
+				break;
2338
+		}
2339
+		b = messageToBlob(m);
2340
+		len = blobGetDataSize(b);
2341
+		cli_dbgmsg("Decoded as '%*.*s'\n", len, len, blobGetData(b));
2342
+		memcpy(pout, blobGetData(b), len);
2343
+		blobDestroy(b);
2344
+		messageDestroy(m);
2345
+		if(pout[len - 1] == '\n')
2346
+			pout += len - 1;
2347
+		else
2348
+			pout += len;
2349
+
2350
+	}
2351
+	*pout = '\0';
2352
+
2353
+	cli_dbgmsg("rfc2047 returns '%s'\n", out);
2354
+	return out;
2355
+}
2356
+
2258 2357
 #ifdef	FOLLOWURLS
2259 2358
 static void
2260 2359
 checkURLs(message *m, const char *dir)