Browse code

Performance work

git-svn-id: file:///var/lib/svn/clamav-devel/trunk/clamav-devel@1117 77e5149b-7576-45b1-b177-96237e5ba77b

Nigel Horne authored on 2004/11/23 00:19:43
Showing 4 changed files
... ...
@@ -1,3 +1,7 @@
1
+Mon Nov 22 15:20:07 GMT 2004 (njh)
2
+----------------------------------
3
+  * libclamav:	General performance enhancements
4
+
1 5
 Sat Nov 20 23:04:59 GMT 2004 (njh)
2 6
 ----------------------------------
3 7
   * clamav-milter:	Better parsing of clamd's reply
... ...
@@ -17,6 +17,9 @@
17 17
  *
18 18
  * Change History:
19 19
  * $Log: mbox.c,v $
20
+ * Revision 1.181  2004/11/22 15:18:51  nigelhorne
21
+ * Performance work
22
+ *
20 23
  * Revision 1.180  2004/11/19 11:32:16  nigelhorne
21 24
  * Scan email footers (portions after the last MIME boundary
22 25
  *
... ...
@@ -528,7 +531,7 @@
528 528
  * Compilable under SCO; removed duplicate code with message.c
529 529
  *
530 530
  */
531
-static	char	const	rcsid[] = "$Id: mbox.c,v 1.180 2004/11/19 11:32:16 nigelhorne Exp $";
531
+static	char	const	rcsid[] = "$Id: mbox.c,v 1.181 2004/11/22 15:18:51 nigelhorne Exp $";
532 532
 
533 533
 #if HAVE_CONFIG_H
534 534
 #include "clamav-config.h"
... ...
@@ -1043,14 +1046,14 @@ parseEmailHeaders(const message *m, const table_t *rfc821)
1043 1043
 				cli_dbgmsg("End of header information\n");
1044 1044
 				inHeader = FALSE;
1045 1045
 			} else {
1046
-				char *ptr, *p;
1047
-				bool inquotes = FALSE;
1048
-				bool arequotes = FALSE;
1046
+				char *ptr;
1049 1047
 				const char *qptr;
1050 1048
 				int quotes;
1051
-#ifdef CL_THREAD_SAFE
1052
-				char *strptr;
1053
-#endif
1049
+
1050
+				if(buffer == NULL) {
1051
+					contMarker = FALSE;
1052
+					continue;
1053
+				}
1054 1054
 
1055 1055
 				if(fullline == NULL) {
1056 1056
 					char cmd[LINE_LENGTH + 1];
... ...
@@ -1066,18 +1069,28 @@ parseEmailHeaders(const message *m, const table_t *rfc821)
1066 1066
 					/*
1067 1067
 					 * Is this a header we're interested in?
1068 1068
 					 */
1069
-					if(cli_strtokbuf(buffer, 0, ":", cmd) == NULL)
1069
+					if((strchr(buffer, ':') == NULL) ||
1070
+					   (cli_strtokbuf(buffer, 0, ":", cmd) == NULL)) {
1071
+						if(strncmp(buffer, "From ", 5) == 0)
1072
+							anyHeadersFound = TRUE;
1070 1073
 						continue;
1074
+					}
1071 1075
 
1072
-					anyHeadersFound = TRUE;
1073 1076
 					commandNumber = tableFind(rfc821, cmd);
1074 1077
 
1075 1078
 					switch(commandNumber) {
1076 1079
 						case CONTENT_TRANSFER_ENCODING:
1077 1080
 						case CONTENT_DISPOSITION:
1078 1081
 						case CONTENT_TYPE:
1082
+							anyHeadersFound = TRUE;
1079 1083
 							break;
1080 1084
 						default:
1085
+							if(strcasecmp(cmd, "From") == 0)
1086
+								anyHeadersFound = TRUE;
1087
+							else if(strcasecmp(cmd, "Received") == 0)
1088
+								anyHeadersFound = TRUE;
1089
+							else if(strcasecmp(cmd, "De") == 0)
1090
+								anyHeadersFound = TRUE;
1081 1091
 							continue;
1082 1092
 					}
1083 1093
 					fullline = strdup(buffer);
... ...
@@ -1086,9 +1099,6 @@ parseEmailHeaders(const message *m, const table_t *rfc821)
1086 1086
 					fulllinelength += strlen(buffer);
1087 1087
 					fullline = cli_realloc(fullline, fulllinelength);
1088 1088
 					strcat(fullline, buffer);
1089
-				} else {
1090
-					contMarker = FALSE;
1091
-					continue;
1092 1089
 				}
1093 1090
 
1094 1091
 				contMarker = continuationMarker(buffer);
... ...
@@ -1100,8 +1110,9 @@ parseEmailHeaders(const message *m, const table_t *rfc821)
1100 1100
 					const char *next = lineGetData(t->t_next->t_line);
1101 1101
 
1102 1102
 					/*
1103
-					 * Section B.2 of RFC822 says TAB or SPACE means
1104
-					 * a continuation of the previous entry.
1103
+					 * Section B.2 of RFC822 says TAB or
1104
+					 * SPACE means a continuation of the
1105
+					 * previous entry.
1105 1106
 					 *
1106 1107
 					 * Add all the arguments on the line
1107 1108
 					 */
... ...
@@ -1126,49 +1137,8 @@ parseEmailHeaders(const message *m, const table_t *rfc821)
1126 1126
 				if(parseEmailHeader(ret, fullline, rfc821) < 0)
1127 1127
 					continue;
1128 1128
 
1129
-				/*
1130
-				 * Ensure that the colon in headers such as
1131
-				 * this doesn't get mistaken for a token
1132
-				 * separator
1133
-				 *	boundary="=.J:gysAG)N(3_zv"
1134
-				 */
1135
-				for(ptr = fullline; *ptr; ptr++)
1136
-					if(*ptr == '\"')
1137
-						inquotes = !inquotes;
1138
-					else if(inquotes) {
1139
-						*ptr |= '\200';
1140
-						arequotes = TRUE;
1141
-					}
1142
-
1143
-				p = cli_strtok(fullline, 1, ":");
1144
-
1145 1129
 				free(fullline);
1146 1130
 				fullline = NULL;
1147
-
1148
-				if(p == NULL)
1149
-					continue;
1150
-#ifdef	CL_THREAD_SAFE
1151
-				for(ptr = strtok_r(p, ";", &strptr); ptr; ptr = strtok_r(NULL, ":", &strptr))
1152
-					if(strchr(ptr, '=')) {
1153
-						if(arequotes) {
1154
-							char *p2;
1155
-							for(p2 = ptr; *p2; p2++)
1156
-								*p2 &= '\177';
1157
-						}
1158
-						messageAddArguments(ret, ptr);
1159
-					}
1160
-#else
1161
-				for(ptr = strtok(p, ";"); ptr; ptr = strtok(NULL, ":"))
1162
-					if(strchr(ptr, '=')) {
1163
-						if(arequotes) {
1164
-							char *p2;
1165
-							for(p2 = ptr; *p2; p2++)
1166
-								*p2 &= '\177';
1167
-						}
1168
-						messageAddArguments(ret, ptr);
1169
-					}
1170
-#endif
1171
-				free(p);
1172 1131
 			}
1173 1132
 		} else
1174 1133
 			/*cli_dbgmsg("Add line to body '%s'\n", buffer);*/
... ...
@@ -1381,10 +1351,10 @@ parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t
1381 1381
 					if(boundaryStart(lineGetData(t_line->t_line), boundary))
1382 1382
 						break;
1383 1383
 					/*
1384
-					 * Found a uuencoded file before the first multipart
1385
-					 * TODO: check yEnc and binhex here
1384
+					 * Found a uuencoded/binhex file before the first multipart
1385
+					 * TODO: check yEnc
1386 1386
 					 */
1387
-					if(uuencodeBegin(mainMessage) == t_line)
1387
+					if(uuencodeBegin(mainMessage) == t_line) {
1388 1388
 						if(messageGetEncoding(mainMessage) == NOENCODING) {
1389 1389
 							messageSetEncoding(mainMessage, "x-uuencode");
1390 1390
 							fb = messageToFileblob(mainMessage, dir);
... ...
@@ -1392,6 +1362,15 @@ parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t
1392 1392
 							if(fb)
1393 1393
 								fileblobDestroy(fb);
1394 1394
 						}
1395
+					} else if(binhexBegin(mainMessage) == t_line) {
1396
+						if(messageGetEncoding(mainMessage) == NOENCODING) {
1397
+							messageSetEncoding(mainMessage, "x-binhex");
1398
+							fb = messageToFileblob(mainMessage, dir);
1399
+
1400
+							if(fb)
1401
+								fileblobDestroy(fb);
1402
+						}
1403
+					}
1395 1404
 				}
1396 1405
 			while((t_line = t_line->t_next) != NULL);
1397 1406
 
... ...
@@ -1923,7 +1902,7 @@ parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t
1923 1923
 							continue;
1924 1924
 						}
1925 1925
 						messageAddStrAtTop(aMessage,
1926
-							"Received: by clamd");
1926
+							"Received: by clamd (message/rfc822)");
1927 1927
 #ifdef	SAVE_TO_DISC
1928 1928
 						/*
1929 1929
 						 * Save this embedded message
... ...
@@ -2058,10 +2037,6 @@ parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t
2058 2058
 				rc = 0;
2059 2059
 			}
2060 2060
 
2061
-			for(i = 0; i < multiparts; i++)
2062
-				if(messages[i])
2063
-					messageDestroy(messages[i]);
2064
-
2065 2061
 			if(mainMessage && (mainMessage != messageIn))
2066 2062
 				messageDestroy(mainMessage);
2067 2063
 
... ...
@@ -2069,7 +2044,7 @@ parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t
2069 2069
 				if((fb = fileblobCreate()) != NULL) {
2070 2070
 					cli_dbgmsg("Save non mime part\n");
2071 2071
 					fileblobSetFilename(fb, dir, "textpart");
2072
-					fileblobAddData(fb, "Received: by clamd\n", 19);
2072
+					fileblobAddData(fb, "Received: by clamd (textpart)\n", 30);
2073 2073
 
2074 2074
 					fb = textToFileblob(aText, fb);
2075 2075
 
... ...
@@ -2078,6 +2053,10 @@ parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t
2078 2078
 				textDestroy(aText);
2079 2079
 			}
2080 2080
 
2081
+			for(i = 0; i < multiparts; i++)
2082
+				if(messages[i])
2083
+					messageDestroy(messages[i]);
2084
+
2081 2085
 			if(messages)
2082 2086
 				free(messages);
2083 2087
 
... ...
@@ -2169,7 +2148,16 @@ parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t
2169 2169
 	}
2170 2170
 
2171 2171
 	if(aText && (textIn == NULL)) {
2172
-		cli_dbgmsg("Non mime part not saved - report to bugs@clamav.net\n");
2172
+		cli_dbgmsg("Non mime part not scanned - if you believe this file contains a virus report to bugs@clamav.net\n");
2173
+		/*if((fb = fileblobCreate()) != NULL) {
2174
+			cli_dbgmsg("Save non mime part\n");
2175
+			fileblobSetFilename(fb, dir, "textpart");
2176
+			fileblobAddData(fb, "Received: by clamd (textpart)\n", 30);
2177
+
2178
+			fb = textToFileblob(aText, fb);
2179
+
2180
+			fileblobDestroy(fb);
2181
+		}*/
2173 2182
 		textDestroy(aText);
2174 2183
 		aText = NULL;
2175 2184
 	}
... ...
@@ -2208,10 +2196,10 @@ parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t
2208 2208
 					cli_dbgmsg("Found uuencoded message %s\n", cptr);
2209 2209
 				fileblobDestroy(fb);
2210 2210
 			}
2211
+			rc = 1;
2211 2212
 		} else if((encodingLine(mainMessage) != NULL) &&
2212 2213
 			  ((t_line = bounceBegin(mainMessage)) != NULL)) {
2213
-			const text *t;
2214
-			static const char type[] = "Content-Type:";
2214
+			const text *t, *start;
2215 2215
 			/*
2216 2216
 			 * Attempt to save the original (unbounced)
2217 2217
 			 * message - clamscan will find that in the
... ...
@@ -2231,22 +2219,42 @@ parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t
2231 2231
 			 * must remain otherwise non bounce messages
2232 2232
 			 * won't be scanned
2233 2233
 			 */
2234
-			for(t = t_line; t; t = t->t_next) {
2234
+			for(t = start = t_line; t; t = t->t_next) {
2235
+				char cmd[LINE_LENGTH + 1];
2235 2236
 				const char *txt = lineGetData(t->t_line);
2236 2237
 
2237
-				if(txt == NULL) {
2238
-					t = NULL;
2239
-					break;
2238
+				if(txt == NULL)
2239
+					continue;
2240
+				if(cli_strtokbuf(txt, 0, ":", cmd) == NULL)
2241
+					continue;
2242
+
2243
+				switch(tableFind(rfc821Table, cmd)) {
2244
+					case CONTENT_TRANSFER_ENCODING:
2245
+						if((strstr(txt, "7bit") == NULL) &&
2246
+						   (strstr(txt, "8bit") == NULL))
2247
+							break;
2248
+						continue;
2249
+					case CONTENT_DISPOSITION:
2250
+						break;
2251
+					case CONTENT_TYPE:
2252
+						if(strstr(txt, "text/plain") != NULL)
2253
+							t = NULL;
2254
+						break;
2255
+					default:
2256
+						if(strcasecmp(cmd, "From") == 0)
2257
+							start = t_line;
2258
+						else if(strcasecmp(cmd, "Received") == 0)
2259
+							start = t_line;
2260
+						continue;
2240 2261
 				}
2241
-				if(txt &&
2242
-				  (strncasecmp(txt, type, sizeof(type) - 1)))
2243
-					break;
2262
+				break;
2244 2263
 			}
2245 2264
 			if(t && ((fb = fileblobCreate()) != NULL)) {
2246 2265
 				cli_dbgmsg("Found a bounce message\n");
2247 2266
 				fileblobSetFilename(fb, dir, "bounce");
2248
-				fb = textToFileblob(t_line, fb);
2267
+				fb = textToFileblob(start, fb);
2249 2268
 				fileblobDestroy(fb);
2269
+				rc = 1;
2250 2270
 			} else
2251 2271
 				cli_dbgmsg("Not found a bounce message\n");
2252 2272
 		} else {
... ...
@@ -2273,7 +2281,7 @@ parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t
2273 2273
 				if((fb = fileblobCreate()) != NULL) {
2274 2274
 					cli_dbgmsg("Found a bounce message with no header\n");
2275 2275
 					fileblobSetFilename(fb, dir, "bounce");
2276
-					fileblobAddData(fb, "Received: by clamd\n", 19);
2276
+					fileblobAddData(fb, "Received: by clamd (bounce)\n", 28);
2277 2277
 
2278 2278
 					fb = textToFileblob(t_line, fb);
2279 2279
 
... ...
@@ -3394,17 +3402,9 @@ getURL(struct arg *arg)
3394 3394
 	 * Perhaps Curl_resolv() isn't thread safe?
3395 3395
 	 */
3396 3396
 	/*
3397
-	 * Curl 7.12.1 has a memory leak here :-(
3398
-	 *	==10634==    at 0x1B904A90: malloc (vg_replace_malloc.c:131)
3399
-	 *	==10634==    by 0x1BCA2AEF: strdup (in /lib/tls/libc-2.3.3.so)
3400
-	 *	==10634==    by 0x1BCE7CDD: gaih_inet (in /lib/tls/libc-2.3.3.so)
3401
-	 *	==10634==    by 0x1BCE96D0: getaddrinfo (in /lib/tls/libc-2.3.3.so)
3402
-	 *	==10634==    by 0x1B9CCF23: Curl_getaddrinfo (in /usr/lib/libcurl.so.3.0.0)
3403
-	 *	==10634==    by 0x1B9AC7C9: Curl_resolv (in /usr/lib/libcurl.so.3.0.0)
3404
-	 *	==10634==    by 0x1B9BC1BD: Curl_connect (in /usr/lib/libcurl.so.3.0.0)
3405
-	 *	==10634==    by 0x1B9C734C: (within /usr/lib/libcurl.so.3.0.0)
3406
-	 *	==10634==    by 0x1B9C7573: Curl_perform (in /usr/lib/libcurl.so.3.0.0)
3407
-	 *	==10634==    by 0x1B9C7CC1: curl_easy_perform (in /usr/lib/libcurl.so.3.0.0)
3397
+	 * On some C libraries (notably with FC3) you get a memory leak
3398
+	 * here in getaddrinfo(), see
3399
+	 *	https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=139559
3408 3400
 	 */
3409 3401
 
3410 3402
 	if(curl_easy_perform(curl) != CURLE_OK) {
... ...
@@ -17,6 +17,9 @@
17 17
  *
18 18
  * Change History:
19 19
  * $Log: message.c,v $
20
+ * Revision 1.119  2004/11/22 15:18:51  nigelhorne
21
+ * Performance work
22
+ *
20 23
  * Revision 1.118  2004/11/18 18:09:08  nigelhorne
21 24
  * First draft of binhex.c
22 25
  *
... ...
@@ -351,7 +354,7 @@
351 351
  * uuencodebegin() no longer static
352 352
  *
353 353
  */
354
-static	char	const	rcsid[] = "$Id: message.c,v 1.118 2004/11/18 18:09:08 nigelhorne Exp $";
354
+static	char	const	rcsid[] = "$Id: message.c,v 1.119 2004/11/22 15:18:51 nigelhorne Exp $";
355 355
 
356 356
 #if HAVE_CONFIG_H
357 357
 #include "clamav-config.h"
... ...
@@ -402,7 +405,6 @@ static	char	const	rcsid[] = "$Id: message.c,v 1.118 2004/11/18 18:09:08 nigelhor
402 402
 typedef enum { FALSE = 0, TRUE = 1 } bool;
403 403
 
404 404
 static	void	messageIsEncoding(message *m);
405
-static	const	text	*binhexBegin(const message *m);
406 405
 static	unsigned char	*decodeLine(message *m, encoding_type enctype, const char *line, unsigned char *buf, size_t buflen);
407 406
 static unsigned char *decode(message *m, const char *in, unsigned char *out, unsigned char (*decoder)(char), bool isFast);
408 407
 static	void	sanitiseBase64(char *s);
... ...
@@ -1228,6 +1230,13 @@ messageIsEncoding(message *m)
1228 1228
 	   (strstr(line, "7bit") == NULL))
1229 1229
 		m->encoding = m->body_last;
1230 1230
 	else if((m->bounce == NULL) &&
1231
+			/*
1232
+			 * Don't match received since emails have a lot of
1233
+			 * these headersa and one could get a match for each
1234
+			 * header which would generate umpteen false bounce
1235
+			 * matches
1236
+			 */
1237
+		(strncasecmp(line, "Received: ", 10) == NULL) &&
1231 1238
 		(cli_filetype(line, strlen(line)) == CL_TYPE_MAIL))
1232 1239
 			m->bounce = m->body_last;
1233 1240
 	else if((m->uuencode == NULL) &&
... ...
@@ -2020,7 +2029,7 @@ binhexBegin(const message *m)
2020 2020
 	return NULL;
2021 2021
 }
2022 2022
 #else
2023
-static const text *
2023
+const text *
2024 2024
 binhexBegin(const message *m)
2025 2025
 {
2026 2026
 	return m->binhex;
... ...
@@ -16,6 +16,9 @@
16 16
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 17
  *
18 18
  * $Log: message.h,v $
19
+ * Revision 1.22  2004/11/22 15:18:51  nigelhorne
20
+ * Performance work
21
+ *
19 22
  * Revision 1.21  2004/10/16 17:24:15  nigelhorne
20 23
  * Handle colons in quotes in headers
21 24
  *
... ...
@@ -121,6 +124,7 @@ void	messageClean(message *m);
121 121
 fileblob	*messageToFileblob(message *m, const char *dir);
122 122
 blob	*messageToBlob(message *m);
123 123
 text	*messageToText(message *m);
124
+const	text	*binhexBegin(const message *m);
124 125
 const	text	*uuencodeBegin(const message *m);
125 126
 const	text	*yEncBegin(const message *m);
126 127
 const	text	*bounceBegin(const message *m);