git-svn-id: file:///var/lib/svn/clamav-devel/trunk/clamav-devel@287 77e5149b-7576-45b1-b177-96237e5ba77b
Nigel Horne authored on 2004/02/15 17:49:05... | ... |
@@ -1,3 +1,13 @@ |
1 |
+Sun Feb 15 08:50:21 GMT 2004 (njh) |
|
2 |
+---------------------------------- |
|
3 |
+ * libclamav: added blobClose and blobCmp to reduce likelyhood of scanning the |
|
4 |
+ same file twice and reducing the runtime memory usage |
|
5 |
+ support video within multiparts |
|
6 |
+ better warning of unsupported types that may need to be added later |
|
7 |
+ removed some unneeded recursiveness that resulted in some needless |
|
8 |
+ scanning twice of the same attachment on messages without |
|
9 |
+ any text content |
|
10 |
+ |
|
1 | 11 |
Sun Feb 15 02:03:47 CET 2004 (tk) |
2 | 12 |
--------------------------------- |
3 | 13 |
* freshclam: use O_BINARY when saving database under Cygwin |
... | ... |
@@ -16,11 +16,14 @@ |
16 | 16 |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
17 | 17 |
* |
18 | 18 |
* $Log: blob.c,v $ |
19 |
+ * Revision 1.7 2004/02/15 08:45:53 nigelhorne |
|
20 |
+ * Avoid scanning the same file twice |
|
21 |
+ * |
|
19 | 22 |
* Revision 1.6 2004/02/10 19:23:54 nigelhorne |
20 | 23 |
* Change LOG to Log |
21 | 24 |
* |
22 | 25 |
*/ |
23 |
-static char const rcsid[] = "$Id: blob.c,v 1.6 2004/02/10 19:23:54 nigelhorne Exp $"; |
|
26 |
+static char const rcsid[] = "$Id: blob.c,v 1.7 2004/02/15 08:45:53 nigelhorne Exp $"; |
|
24 | 27 |
|
25 | 28 |
#if HAVE_CONFIG_H |
26 | 29 |
#include "clamav-config.h" |
... | ... |
@@ -139,6 +142,14 @@ blobAddData(blob *b, const unsigned char *data, size_t len) |
139 | 139 |
if(len == 0) |
140 | 140 |
return; |
141 | 141 |
|
142 |
+ if(b->isClosed) { |
|
143 |
+ /* |
|
144 |
+ * Should be cli_dbgmsg, but I want to see them for now, |
|
145 |
+ * and cli_dbgmsg doesn't support debug levels |
|
146 |
+ */ |
|
147 |
+ cli_warnmsg("Reopening closed blob\n"); |
|
148 |
+ b->isClosed = 0; |
|
149 |
+ } |
|
142 | 150 |
if(b->data == NULL) { |
143 | 151 |
assert(b->len == 0); |
144 | 152 |
b->size = len * 4; |
... | ... |
@@ -171,3 +182,37 @@ blobGetDataSize(const blob *b) |
171 | 171 |
|
172 | 172 |
return(b->len); |
173 | 173 |
} |
174 |
+ |
|
175 |
+void |
|
176 |
+blobClose(blob *b) |
|
177 |
+{ |
|
178 |
+ b->isClosed = 1; |
|
179 |
+ |
|
180 |
+ if(b->size != b->len) { |
|
181 |
+ b->size = b->len; |
|
182 |
+ b->data = cli_realloc(b->data, b->size); |
|
183 |
+ } |
|
184 |
+} |
|
185 |
+ |
|
186 |
+/* |
|
187 |
+ * Returns 0 if the blobs are the same |
|
188 |
+ */ |
|
189 |
+int |
|
190 |
+blobcmp(const blob *b1, const blob *b2) |
|
191 |
+{ |
|
192 |
+ unsigned long s1, s2; |
|
193 |
+ |
|
194 |
+ assert(b1 != NULL); |
|
195 |
+ assert(b2 != NULL); |
|
196 |
+ |
|
197 |
+ if(b1 == b2) |
|
198 |
+ return 0; |
|
199 |
+ |
|
200 |
+ s1 = blobGetDataSize(b1); |
|
201 |
+ s2 = blobGetDataSize(b2); |
|
202 |
+ |
|
203 |
+ if(s1 != s2) |
|
204 |
+ return 1; |
|
205 |
+ |
|
206 |
+ return memcmp(blobGetData(b1), blobGetData(b2), s1); |
|
207 |
+} |
... | ... |
@@ -23,6 +23,7 @@ typedef struct blob { |
23 | 23 |
unsigned char *data; /* the stuff itself */ |
24 | 24 |
unsigned long len; /* number of bytes of data so far */ |
25 | 25 |
unsigned long size; /* number of bytes allocated to data so far */ |
26 |
+ int isClosed; |
|
26 | 27 |
#ifdef CL_DEBUG |
27 | 28 |
object_type magic; /* verify that this is a blob */ |
28 | 29 |
#endif |
... | ... |
@@ -36,3 +37,5 @@ const char *blobGetFilename(const blob *b); |
36 | 36 |
void blobAddData(blob *b, const unsigned char *data, size_t len); |
37 | 37 |
unsigned char *blobGetData(const blob *b); |
38 | 38 |
unsigned long blobGetDataSize(const blob *b); |
39 |
+void blobClose(blob *b); |
|
40 |
+int blobcmp(const blob *b1, const blob *b2); |
... | ... |
@@ -17,6 +17,9 @@ |
17 | 17 |
* |
18 | 18 |
* Change History: |
19 | 19 |
* $Log: mbox.c,v $ |
20 |
+ * Revision 1.44 2004/02/15 08:45:54 nigelhorne |
|
21 |
+ * Avoid scanning the same file twice |
|
22 |
+ * |
|
20 | 23 |
* Revision 1.43 2004/02/14 19:04:05 nigelhorne |
21 | 24 |
* Handle spaces in boundaries |
22 | 25 |
* |
... | ... |
@@ -120,7 +123,7 @@ |
120 | 120 |
* Compilable under SCO; removed duplicate code with message.c |
121 | 121 |
* |
122 | 122 |
*/ |
123 |
-static char const rcsid[] = "$Id: mbox.c,v 1.43 2004/02/14 19:04:05 nigelhorne Exp $"; |
|
123 |
+static char const rcsid[] = "$Id: mbox.c,v 1.44 2004/02/15 08:45:54 nigelhorne Exp $"; |
|
124 | 124 |
|
125 | 125 |
#if HAVE_CONFIG_H |
126 | 126 |
#include "clamav-config.h" |
... | ... |
@@ -548,7 +551,7 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con |
548 | 548 |
|
549 | 549 |
/* Anything left to be parsed? */ |
550 | 550 |
if(mainMessage && (messageGetBody(mainMessage) != NULL)) { |
551 |
- int numberOfAttachments = 0; |
|
551 |
+ int numberOfAttachments = 0, numberOfNewAttachments; |
|
552 | 552 |
mime_type mimeType; |
553 | 553 |
const char *mimeSubtype; |
554 | 554 |
const text *t_line; |
... | ... |
@@ -988,6 +991,7 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con |
988 | 988 |
continue; |
989 | 989 |
case AUDIO: |
990 | 990 |
case IMAGE: |
991 |
+ case VIDEO: |
|
991 | 992 |
/* |
992 | 993 |
* TODO: it may be nice to |
993 | 994 |
* have an option to throw |
... | ... |
@@ -997,7 +1001,7 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con |
997 | 997 |
addAttachment = TRUE; |
998 | 998 |
break; |
999 | 999 |
default: |
1000 |
- cli_dbgmsg("Only text and application attachments are supported, type = %d\n", |
|
1000 |
+ cli_warnmsg("Only text and application attachments are supported, type = %d\n", |
|
1001 | 1001 |
messageGetMimeType(aMessage)); |
1002 | 1002 |
continue; |
1003 | 1003 |
} |
... | ... |
@@ -1016,8 +1020,8 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con |
1016 | 1016 |
|
1017 | 1017 |
if(aBlob) { |
1018 | 1018 |
assert(blobGetFilename(aBlob) != NULL); |
1019 |
- /*if(blobGetDataSize(aBlob) > 0)*/ |
|
1020 |
- blobList[numberOfAttachments++] = aBlob; |
|
1019 |
+ blobClose(aBlob); |
|
1020 |
+ blobList[numberOfAttachments++] = aBlob; |
|
1021 | 1021 |
} |
1022 | 1022 |
} |
1023 | 1023 |
} |
... | ... |
@@ -1027,18 +1031,44 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con |
1027 | 1027 |
rc = parseEmailBody(NULL, NULL, 0, aText, dir, rfc821Table, subtypeTable); |
1028 | 1028 |
break; |
1029 | 1029 |
} |
1030 |
+ |
|
1030 | 1031 |
/* |
1031 | 1032 |
* Store any existing attachments at the end of |
1032 | 1033 |
* the list we've just built up |
1033 | 1034 |
*/ |
1035 |
+ numberOfNewAttachments = 0; |
|
1034 | 1036 |
for(i = 0; i < nBlobs; i++) { |
1037 |
+ int j; |
|
1035 | 1038 |
#ifdef CL_DEBUG |
1036 | 1039 |
assert(blobs[i]->magic == BLOB); |
1037 | 1040 |
#endif |
1038 |
- blobList[numberOfAttachments++] = blobs[i]; |
|
1041 |
+ for(j = 0; j < numberOfAttachments; j++) |
|
1042 |
+ if(blobcmp(blobs[i], blobList[j]) == 0) |
|
1043 |
+ break; |
|
1044 |
+ if(j >= numberOfAttachments) { |
|
1045 |
+ assert(numberOfAttachments < MAX_ATTACHMENTS); |
|
1046 |
+ cli_dbgmsg("Attaching %s to list of blobs\n", |
|
1047 |
+ blobGetFilename(blobs[i])); |
|
1048 |
+ blobClose(blobs[i]); |
|
1049 |
+ blobList[numberOfAttachments++] = blobs[i]; |
|
1050 |
+ numberOfNewAttachments++; |
|
1051 |
+ } else { |
|
1052 |
+ cli_warnmsg("Don't scan the same file twice as '%s' and '%s'\n", |
|
1053 |
+ blobGetFilename(blobs[i]), |
|
1054 |
+ blobGetFilename(blobList[j])); |
|
1055 |
+ blobDestroy(blobs[i]); |
|
1056 |
+ } |
|
1039 | 1057 |
} |
1040 | 1058 |
|
1041 | 1059 |
/* |
1060 |
+ * If we've found nothing new save what we have |
|
1061 |
+ * and quit - that's this part all done. |
|
1062 |
+ */ |
|
1063 |
+ if(numberOfNewAttachments == 0) { |
|
1064 |
+ rc = parseEmailBody(NULL, blobList, numberOfAttachments, NULL, dir, rfc821Table, subtypeTable); |
|
1065 |
+ break; |
|
1066 |
+ } |
|
1067 |
+ /* |
|
1042 | 1068 |
* If there's only one part of the MULTIPART |
1043 | 1069 |
* we already have the body to decode so |
1044 | 1070 |
* there's no more work to do. |
... | ... |
@@ -1199,6 +1229,7 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con |
1199 | 1199 |
for(i = 0; i < nBlobs; i++) |
1200 | 1200 |
if(blobs[i] == NULL) |
1201 | 1201 |
break; |
1202 |
+ blobClose(aBlob); |
|
1202 | 1203 |
blobs[i] = aBlob; |
1203 | 1204 |
if(i == nBlobs) { |
1204 | 1205 |
nBlobs++; |