Browse code

Avoid scanning the same file twice

git-svn-id: file:///var/lib/svn/clamav-devel/trunk/clamav-devel@287 77e5149b-7576-45b1-b177-96237e5ba77b

Nigel Horne authored on 2004/02/15 17:49:05
Showing 4 changed files
... ...
@@ -1,3 +1,13 @@
1
+Sun Feb 15 08:50:21 GMT 2004 (njh)
2
+----------------------------------
3
+  * libclamav:	added blobClose and blobCmp to reduce likelyhood of scanning the
4
+			same file twice and reducing the runtime memory usage
5
+	support video within multiparts
6
+	better warning of unsupported types that may need to be added later
7
+	removed some unneeded recursiveness that resulted in some needless
8
+		scanning twice of the same attachment on messages without
9
+		any text content
10
+
1 11
 Sun Feb 15 02:03:47 CET 2004 (tk)
2 12
 ---------------------------------
3 13
   * freshclam: use O_BINARY when saving database under Cygwin
... ...
@@ -16,11 +16,14 @@
16 16
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 17
  *
18 18
  * $Log: blob.c,v $
19
+ * Revision 1.7  2004/02/15 08:45:53  nigelhorne
20
+ * Avoid scanning the same file twice
21
+ *
19 22
  * Revision 1.6  2004/02/10 19:23:54  nigelhorne
20 23
  * Change LOG to Log
21 24
  *
22 25
  */
23
-static	char	const	rcsid[] = "$Id: blob.c,v 1.6 2004/02/10 19:23:54 nigelhorne Exp $";
26
+static	char	const	rcsid[] = "$Id: blob.c,v 1.7 2004/02/15 08:45:53 nigelhorne Exp $";
24 27
 
25 28
 #if HAVE_CONFIG_H
26 29
 #include "clamav-config.h"
... ...
@@ -139,6 +142,14 @@ blobAddData(blob *b, const unsigned char *data, size_t len)
139 139
 	if(len == 0)
140 140
 		return;
141 141
 
142
+	if(b->isClosed) {
143
+		/*
144
+		 * Should be cli_dbgmsg, but I want to see them for now,
145
+		 * and cli_dbgmsg doesn't support debug levels
146
+		 */
147
+		cli_warnmsg("Reopening closed blob\n");
148
+		b->isClosed = 0;
149
+	}
142 150
 	if(b->data == NULL) {
143 151
 		assert(b->len == 0);
144 152
 		b->size = len * 4;
... ...
@@ -171,3 +182,37 @@ blobGetDataSize(const blob *b)
171 171
 
172 172
 	return(b->len);
173 173
 }
174
+
175
+void
176
+blobClose(blob *b)
177
+{
178
+	b->isClosed = 1;
179
+
180
+	if(b->size != b->len) {
181
+		b->size = b->len;
182
+		b->data = cli_realloc(b->data, b->size);
183
+	}
184
+}
185
+
186
+/*
187
+ * Returns 0 if the blobs are the same
188
+ */
189
+int
190
+blobcmp(const blob *b1, const blob *b2)
191
+{
192
+	unsigned long s1, s2;
193
+
194
+	assert(b1 != NULL);
195
+	assert(b2 != NULL);
196
+
197
+	if(b1 == b2)
198
+		return 0;
199
+
200
+	s1 = blobGetDataSize(b1);
201
+	s2 = blobGetDataSize(b2);
202
+
203
+	if(s1 != s2)
204
+		return 1;
205
+
206
+	return memcmp(blobGetData(b1), blobGetData(b2), s1);
207
+}
... ...
@@ -23,6 +23,7 @@ typedef struct blob {
23 23
 	unsigned	char	*data;	/* the stuff itself */
24 24
 	unsigned	long	len;	/* number of bytes of data so far */
25 25
 	unsigned	long	size;	/* number of bytes allocated to data so far */
26
+	int	isClosed;
26 27
 #ifdef	CL_DEBUG
27 28
 	object_type	magic;	/* verify that this is a blob */
28 29
 #endif
... ...
@@ -36,3 +37,5 @@ const	char	*blobGetFilename(const blob *b);
36 36
 void	blobAddData(blob *b, const unsigned char *data, size_t len);
37 37
 unsigned char *blobGetData(const blob *b);
38 38
 unsigned	long	blobGetDataSize(const blob *b);
39
+void	blobClose(blob *b);
40
+int	blobcmp(const blob *b1, const blob *b2);
... ...
@@ -17,6 +17,9 @@
17 17
  *
18 18
  * Change History:
19 19
  * $Log: mbox.c,v $
20
+ * Revision 1.44  2004/02/15 08:45:54  nigelhorne
21
+ * Avoid scanning the same file twice
22
+ *
20 23
  * Revision 1.43  2004/02/14 19:04:05  nigelhorne
21 24
  * Handle spaces in boundaries
22 25
  *
... ...
@@ -120,7 +123,7 @@
120 120
  * Compilable under SCO; removed duplicate code with message.c
121 121
  *
122 122
  */
123
-static	char	const	rcsid[] = "$Id: mbox.c,v 1.43 2004/02/14 19:04:05 nigelhorne Exp $";
123
+static	char	const	rcsid[] = "$Id: mbox.c,v 1.44 2004/02/15 08:45:54 nigelhorne Exp $";
124 124
 
125 125
 #if HAVE_CONFIG_H
126 126
 #include "clamav-config.h"
... ...
@@ -548,7 +551,7 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con
548 548
 
549 549
 	/* Anything left to be parsed? */
550 550
 	if(mainMessage && (messageGetBody(mainMessage) != NULL)) {
551
-		int numberOfAttachments = 0;
551
+		int numberOfAttachments = 0, numberOfNewAttachments;
552 552
 		mime_type mimeType;
553 553
 		const char *mimeSubtype;
554 554
 		const text *t_line;
... ...
@@ -988,6 +991,7 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con
988 988
 						continue;
989 989
 					case AUDIO:
990 990
 					case IMAGE:
991
+					case VIDEO:
991 992
 						/*
992 993
 						 * TODO: it may be nice to
993 994
 						 * have an option to throw
... ...
@@ -997,7 +1001,7 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con
997 997
 						addAttachment = TRUE;
998 998
 						break;
999 999
 					default:
1000
-						cli_dbgmsg("Only text and application attachments are supported, type = %d\n",
1000
+						cli_warnmsg("Only text and application attachments are supported, type = %d\n",
1001 1001
 							messageGetMimeType(aMessage));
1002 1002
 						continue;
1003 1003
 					}
... ...
@@ -1016,8 +1020,8 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con
1016 1016
 
1017 1017
 						if(aBlob) {
1018 1018
 							assert(blobGetFilename(aBlob) != NULL);
1019
-							/*if(blobGetDataSize(aBlob) > 0)*/
1020
-								blobList[numberOfAttachments++] = aBlob;
1019
+							blobClose(aBlob);
1020
+							blobList[numberOfAttachments++] = aBlob;
1021 1021
 						}
1022 1022
 					}
1023 1023
 				}
... ...
@@ -1027,18 +1031,44 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con
1027 1027
 					rc = parseEmailBody(NULL, NULL, 0, aText, dir, rfc821Table, subtypeTable);
1028 1028
 					break;
1029 1029
 				}
1030
+
1030 1031
 				/*
1031 1032
 				 * Store any existing attachments at the end of
1032 1033
 				 * the list we've just built up
1033 1034
 				 */
1035
+				numberOfNewAttachments = 0;
1034 1036
 				for(i = 0; i < nBlobs; i++) {
1037
+					int j;
1035 1038
 #ifdef	CL_DEBUG
1036 1039
 					assert(blobs[i]->magic == BLOB);
1037 1040
 #endif
1038
-					blobList[numberOfAttachments++] = blobs[i];
1041
+					for(j = 0; j < numberOfAttachments; j++)
1042
+						if(blobcmp(blobs[i], blobList[j]) == 0)
1043
+							break;
1044
+					if(j >= numberOfAttachments) {
1045
+						assert(numberOfAttachments < MAX_ATTACHMENTS);
1046
+						cli_dbgmsg("Attaching %s to list of blobs\n",
1047
+							blobGetFilename(blobs[i]));
1048
+						blobClose(blobs[i]);
1049
+						blobList[numberOfAttachments++] = blobs[i];
1050
+						numberOfNewAttachments++;
1051
+					} else {
1052
+						cli_warnmsg("Don't scan the same file twice as '%s' and '%s'\n",
1053
+							blobGetFilename(blobs[i]),
1054
+							blobGetFilename(blobList[j]));
1055
+						blobDestroy(blobs[i]);
1056
+					}
1039 1057
 				}
1040 1058
 
1041 1059
 				/*
1060
+				 * If we've found nothing new save what we have
1061
+				 * and quit - that's this part all done.
1062
+				 */
1063
+				if(numberOfNewAttachments == 0) {
1064
+					rc = parseEmailBody(NULL, blobList, numberOfAttachments, NULL, dir, rfc821Table, subtypeTable);
1065
+					break;
1066
+				}
1067
+				/*
1042 1068
 				 * If there's only one part of the MULTIPART
1043 1069
 				 * we already have the body to decode so
1044 1070
 				 * there's no more work to do.
... ...
@@ -1199,6 +1229,7 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con
1199 1199
 					for(i = 0; i < nBlobs; i++)
1200 1200
 						if(blobs[i] == NULL)
1201 1201
 							break;
1202
+					blobClose(aBlob);
1202 1203
 					blobs[i] = aBlob;
1203 1204
 					if(i == nBlobs) {
1204 1205
 						nBlobs++;