Browse code

use clistrdup/free instead of blobs (bb #828)

git-svn: trunk@4203

Török Edvin authored on 2008/09/24 05:52:33
Showing 6 changed files
... ...
@@ -48,11 +48,6 @@
48 48
 #include "others.h"
49 49
 #include "htmlnorm.h"
50 50
 
51
-typedef enum {
52
-        INVALIDCLASS, BLOBCLASS
53
-} object_type;
54
-#include "blob.h"
55
-
56 51
 #include "entconv.h"
57 52
 #include "jsparse/js-norm.h"
58 53
 
... ...
@@ -101,6 +96,11 @@ typedef struct file_buff_tag {
101 101
 	int length;
102 102
 } file_buff_t;
103 103
 
104
+struct tag_contents {
105
+	unsigned char contents[MAX_TAG_CONTENTS_LENGTH + 1];
106
+	size_t pos;
107
+};
108
+
104 109
 static const int base64_chars[256] = {
105 110
     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
106 111
     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
... ...
@@ -359,7 +359,7 @@ static void html_tag_arg_add(tag_arguments_t *tags,
359 359
 		goto abort;
360 360
 	}
361 361
 	if(tags->scanContents) {
362
-		tags->contents= (blob **) cli_realloc2(tags->contents,
362
+		tags->contents= (unsigned char **) cli_realloc2(tags->contents,
363 363
 				tags->count*sizeof(*tags->contents));
364 364
 		if(!tags->contents) {
365 365
 			goto abort;
... ...
@@ -394,7 +394,7 @@ abort:
394 394
 		}
395 395
 		if(tags->contents) {
396 396
 			if(tags->contents[i])
397
-				blobDestroy(tags->contents[i]);
397
+				free(tags->contents[i]);
398 398
 		}
399 399
 	}
400 400
 	if (tags->tag) {
... ...
@@ -443,7 +443,7 @@ void html_tag_arg_free(tag_arguments_t *tags)
443 443
 		}
444 444
 		if(tags->contents)
445 445
 			if (tags->contents[i])
446
-				blobDestroy(tags->contents[i]);
446
+				free(tags->contents[i]);
447 447
 	}
448 448
 	if (tags->tag) {
449 449
 		free(tags->tag);
... ...
@@ -459,36 +459,30 @@ void html_tag_arg_free(tag_arguments_t *tags)
459 459
 }
460 460
 
461 461
 /**
462
- * this is used for img, and iframe tags. If they are inside an <a href> tag, then set the contents of the image|iframe to the real URL.
463
- */
464
-static inline void html_tag_set_inahref(tag_arguments_t *tags,int idx,int in_ahref)
465
-{
466
-	tags->contents[idx-1]=blobCreate();
467
-	blobAddData(tags->contents[idx-1],tags->value[in_ahref-1],strlen(tags->value[in_ahref-1]));
468
-	blobAddData(tags->contents[idx-1], "",1);
469
-	blobClose(tags->contents[idx-1]);
470
-}
471
-
472
-/**
473 462
  * the displayed text for an <a href> tag
474 463
  */
475
-static inline void html_tag_contents_append(tag_arguments_t *tags,int idx,const unsigned char* begin,const unsigned char *end)
464
+static inline void html_tag_contents_append(struct tag_contents *cont, const unsigned char* begin,const unsigned char *end)
476 465
 {
477
-	if(end && (begin<end)) {
478
-		const size_t blob_len = blobGetDataSize(tags->contents[idx-1]);
479
-		const size_t blob_sizeleft = blob_len <= MAX_TAG_CONTENTS_LENGTH ? (MAX_TAG_CONTENTS_LENGTH - blob_len) : 0;
480
-		const size_t str_len = end - begin;
481
-		if(blob_sizeleft)
482
-			blobAddData(tags->contents[idx-1],begin, blob_sizeleft < str_len ? blob_sizeleft : str_len );
466
+	size_t i;
467
+	if(!begin || !end)
468
+		return;
469
+	for(i = cont->pos; i < MAX_TAG_CONTENTS_LENGTH && (begin < end);i++) {
470
+		cont->contents[i] = *begin++;
483 471
 	}
472
+	cont->pos = i;
484 473
 }
485 474
 
486 475
 
487
-static inline void html_tag_contents_done(tag_arguments_t *tags,int idx)
476
+static inline void html_tag_contents_done(tag_arguments_t *tags,int idx, struct tag_contents *cont)
488 477
 {
489
-	/* append NUL byte */
490
-	blobAddData(tags->contents[idx-1], "", 1);
491
-	blobClose(tags->contents[idx-1]);
478
+	unsigned char *p;
479
+	cont->contents[cont->pos++] = '\0';
480
+	p = cli_malloc(cont->pos);
481
+	if(!p)
482
+		return;
483
+	memcpy(p, cont->contents, cont->pos);
484
+	tags->contents[idx-1] = p;
485
+	cont->pos = 0;
492 486
 }
493 487
 
494 488
 struct screnc_state {
... ...
@@ -628,8 +622,10 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
628 628
 	/* dconf for phishing engine sets scanContents, so no need for a flag here */
629 629
 	struct parser_state *js_state = NULL;
630 630
 	const unsigned char *js_begin = NULL, *js_end = NULL;
631
+	struct tag_contents contents;
631 632
 
632 633
 	tag_args.scanContents=0;/* do we need to store the contents of <a></a>?*/
634
+	contents.pos = 0;
633 635
 	if (!m_area) {
634 636
 		if (fd < 0) {
635 637
 			cli_dbgmsg("Invalid HTML fd\n");
... ...
@@ -755,7 +751,7 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
755 755
 					}
756 756
 					if(hrefs && hrefs->scanContents && in_ahref && href_contents_begin) {
757 757
 						/*append this text portion to the contents of <a>*/
758
-						html_tag_contents_append(hrefs,in_ahref,href_contents_begin,ptr);
758
+						html_tag_contents_append(&contents,href_contents_begin,ptr);
759 759
 						href_contents_begin=NULL;/*We just encountered another tag inside <a>, so skip it*/
760 760
 					}
761 761
 					ptr++;
... ...
@@ -1052,7 +1048,7 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
1052 1052
 					}
1053 1053
 					if (hrefs && hrefs->scanContents && in_ahref) {
1054 1054
 						if(strcmp(tag,"/a") == 0) {
1055
-							html_tag_contents_done(hrefs,in_ahref);
1055
+							html_tag_contents_done(hrefs,in_ahref, &contents);
1056 1056
 							in_ahref=0;/* we are no longer inside an <a href>
1057 1057
 							nesting <a> tags not supported, and shouldn't be supported*/
1058 1058
 						}
... ...
@@ -1110,34 +1106,32 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
1110 1110
 								if (in_ahref)
1111 1111
 									/*we encountered nested <a> tags, pretend previous closed*/
1112 1112
 									if (href_contents_begin) {
1113
-										html_tag_contents_append(hrefs,in_ahref,
1114
-											href_contents_begin,ptrend);
1113
+										html_tag_contents_append(&contents, href_contents_begin, ptrend);
1115 1114
 										/*add pending contents between tags*/
1116
-										html_tag_contents_done(hrefs,in_ahref);
1115
+										html_tag_contents_done(hrefs, in_ahref, &contents);
1117 1116
 										in_ahref=0;
1118 1117
 										}
1119 1118
 								if (arg_value_title) {
1120 1119
 									/* title is a 'displayed link'*/
1121 1120
 									html_tag_arg_add(hrefs,"href_title",arg_value_title);
1122
-									hrefs->contents[hrefs->count-1]=blobCreate();
1123
-									html_tag_contents_append(hrefs,hrefs->count,arg_value,
1121
+									html_tag_contents_append(&contents,arg_value,
1124 1122
 										arg_value+strlen(arg_value));
1125
-									html_tag_contents_done(hrefs,hrefs->count);
1123
+									html_tag_contents_done(hrefs, hrefs->count, &contents);
1126 1124
 								}
1127 1125
 								if (in_form_action) {
1128 1126
 									/* form action is the real URL, and href is the 'displayed' */
1129 1127
 									html_tag_arg_add(hrefs,"form",arg_value);
1130
-									hrefs->contents[hrefs->count-1] =  blobCreate();
1131
-									html_tag_contents_append(hrefs, hrefs->count, in_form_action,
1128
+									contents.pos = 0;
1129
+									html_tag_contents_append(&contents, in_form_action,
1132 1130
 											in_form_action + strlen(in_form_action));
1133
-									html_tag_contents_done(hrefs,hrefs->count);
1131
+									html_tag_contents_done(hrefs, hrefs->count, &contents);
1134 1132
 								}
1135 1133
 							}
1136 1134
 							html_tag_arg_add(hrefs, "href", arg_value);
1137 1135
 							if (hrefs->scanContents) {
1138 1136
 								in_ahref=hrefs->count; /* index of this tag (counted from 1) */
1139 1137
 								href_contents_begin=ptr;/* contents begin after <a ..> ends */
1140
-								hrefs->contents[hrefs->count-1]=blobCreate();
1138
+								contents.pos = 0;
1141 1139
 							}
1142 1140
 						}
1143 1141
 					} else if (strcmp(tag,"form") == 0 && hrefs->scanContents) {
... ...
@@ -1153,14 +1147,14 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
1153 1153
 							html_tag_arg_add(hrefs, "src", arg_value);
1154 1154
 							if(hrefs->scanContents && in_ahref)
1155 1155
 								/* "contents" of an img tag, is the URL of its parent <a> tag */
1156
-								html_tag_set_inahref(hrefs,hrefs->count,in_ahref);
1156
+								hrefs->contents[hrefs->count-1] = cli_strdup(hrefs->value[in_ahref-1]);
1157 1157
 							if (in_form_action) {
1158 1158
 								/* form action is the real URL, and href is the 'displayed' */
1159 1159
 								html_tag_arg_add(hrefs,"form",arg_value);
1160
-								hrefs->contents[hrefs->count-1] =  blobCreate();
1161
-								html_tag_contents_append(hrefs, hrefs->count, in_form_action,
1160
+								contents.pos = 0;
1161
+								html_tag_contents_append(&contents, in_form_action,
1162 1162
 										in_form_action + strlen(in_form_action));
1163
-								html_tag_contents_done(hrefs,hrefs->count);
1163
+								html_tag_contents_done(hrefs, hrefs->count, &contents);
1164 1164
 							}
1165 1165
 						}
1166 1166
 						arg_value = html_tag_arg_value(&tag_args, "dynsrc");
... ...
@@ -1168,14 +1162,14 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
1168 1168
 							html_tag_arg_add(hrefs, "dynsrc", arg_value);
1169 1169
 							if(hrefs->scanContents && in_ahref)
1170 1170
 								/* see above */
1171
-								html_tag_set_inahref(hrefs,hrefs->count,in_ahref);
1171
+								hrefs->contents[hrefs->count-1] = cli_strdup(hrefs->value[in_ahref-1]);
1172 1172
 							if (in_form_action) {
1173 1173
 								/* form action is the real URL, and href is the 'displayed' */
1174 1174
 								html_tag_arg_add(hrefs,"form",arg_value);
1175
-								hrefs->contents[hrefs->count-1] =  blobCreate();
1176
-								html_tag_contents_append(hrefs, hrefs->count, in_form_action,
1175
+								contents.pos = 0;
1176
+								html_tag_contents_append(&contents, in_form_action,
1177 1177
 										in_form_action + strlen(in_form_action));
1178
-								html_tag_contents_done(hrefs,hrefs->count);
1178
+								html_tag_contents_done(hrefs, hrefs->count, &contents);
1179 1179
 							}
1180 1180
 						}
1181 1181
 					} else if (strcmp(tag, "iframe") == 0) {
... ...
@@ -1184,14 +1178,14 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
1184 1184
 							html_tag_arg_add(hrefs, "iframe", arg_value);
1185 1185
 							if(hrefs->scanContents && in_ahref)
1186 1186
 								/* see above */
1187
-								html_tag_set_inahref(hrefs,hrefs->count,in_ahref);
1187
+								hrefs->contents[hrefs->count-1] = cli_strdup(hrefs->value[in_ahref-1]);
1188 1188
 							if (in_form_action) {
1189 1189
 								/* form action is the real URL, and href is the 'displayed' */
1190 1190
 								html_tag_arg_add(hrefs,"form",arg_value);
1191
-								hrefs->contents[hrefs->count-1] =  blobCreate();
1192
-								html_tag_contents_append(hrefs, hrefs->count, in_form_action,
1191
+								contents.pos = 0;
1192
+								html_tag_contents_append(&contents, in_form_action,
1193 1193
 										in_form_action + strlen(in_form_action));
1194
-								html_tag_contents_done(hrefs,hrefs->count);
1194
+								html_tag_contents_done(hrefs, hrefs->count, &contents);
1195 1195
 							}
1196 1196
 						}
1197 1197
 					} else if (strcmp(tag,"area") == 0) {
... ...
@@ -1200,14 +1194,14 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
1200 1200
 							html_tag_arg_add(hrefs, "area", arg_value);
1201 1201
 							if(hrefs->scanContents && in_ahref)
1202 1202
 								/* see above */
1203
-								html_tag_set_inahref(hrefs,hrefs->count,in_ahref);
1203
+								hrefs->contents[hrefs->count-1] = cli_strdup(hrefs->value[in_ahref-1]);
1204 1204
 							if (in_form_action) {
1205 1205
 								/* form action is the real URL, and href is the 'displayed' */
1206 1206
 								html_tag_arg_add(hrefs,"form",arg_value);
1207
-								hrefs->contents[hrefs->count-1] =  blobCreate();
1208
-								html_tag_contents_append(hrefs, hrefs->count, in_form_action,
1207
+								contents.pos = 0;
1208
+								html_tag_contents_append(&contents, in_form_action,
1209 1209
 									in_form_action + strlen(in_form_action));
1210
-								html_tag_contents_done(hrefs,hrefs->count);
1210
+								html_tag_contents_done(hrefs, hrefs->count, &contents);
1211 1211
 							}
1212 1212
 						}
1213 1213
 					}
... ...
@@ -1605,7 +1599,7 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
1605 1605
 		}
1606 1606
 		if(hrefs && hrefs->scanContents && in_ahref && href_contents_begin)
1607 1607
 			/* end of line, append contents now, resume on next line */
1608
-			html_tag_contents_append(hrefs,in_ahref,href_contents_begin,ptr);
1608
+			html_tag_contents_append(&contents,href_contents_begin,ptr);
1609 1609
 		ptrend = NULL;
1610 1610
 
1611 1611
 		if(js_state) {
... ...
@@ -1663,8 +1657,8 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
1663 1663
 abort:
1664 1664
 	if (in_form_action)
1665 1665
 		free(in_form_action);
1666
-	if (in_ahref) /* tag not closed, force closing */
1667
-		html_tag_contents_done(hrefs,in_ahref);
1666
+        if (in_ahref) /* tag not closed, force closing */
1667
+                html_tag_contents_done(hrefs, in_ahref, &contents);
1668 1668
 
1669 1669
 	if(js_state) {
1670 1670
 		/*  output script so far */
... ...
@@ -27,7 +27,7 @@ typedef struct tag_arguments_tag
27 27
 	int scanContents;
28 28
         unsigned char **tag;
29 29
         unsigned char **value;
30
-	struct blob   **contents; 
30
+	unsigned char **contents;
31 31
 } tag_arguments_t;
32 32
 
33 33
 typedef struct m_area_tag {
... ...
@@ -4038,7 +4038,7 @@ checkURLs(message *mainMessage, mbox_ctx *mctx, mbox_status *rc, int is_html)
4038 4038
 	b = getHrefs(mainMessage, &hrefs);
4039 4039
 	if(b) {
4040 4040
 		if(hrefs.scanContents) {
4041
-			if(phishingScan(mainMessage, mctx->dir, mctx->ctx, &hrefs) == CL_VIRUS) {
4041
+			if(phishingScan(mctx->dir, mctx->ctx, &hrefs) == CL_VIRUS) {
4042 4042
 				/*
4043 4043
 				 * FIXME: message objects' contents are
4044 4044
 				 *	encapsulated so we should not access
... ...
@@ -41,8 +41,6 @@
41 41
 #include "clamav.h"
42 42
 #include "cltypes.h"
43 43
 #include "others.h"
44
-#include "mbox.h"
45
-#include "message.h"
46 44
 #include "htmlnorm.h"
47 45
 #include "phishcheck.h"
48 46
 #include "phish_domaincheck_db.h"
... ...
@@ -726,7 +724,7 @@ cleanupURL(struct string *URL,struct string *pre_URL, int isReal)
726 726
 }
727 727
 
728 728
 /* -------end runtime disable---------*/
729
-int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs)
729
+int phishingScan(const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs)
730 730
 {
731 731
 	/* TODO: get_host and then apply regex, etc. */
732 732
 	int i;
... ...
@@ -788,12 +786,8 @@ int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs)
788 788
 				urls.always_check_flags |= CHECK_CLOAKING;
789 789
 			}
790 790
 			string_init_c(&urls.realLink,(char*)hrefs->value[i]);
791
-			string_init_c(&urls.displayLink,(char*)blobGetData(hrefs->contents[i]));
791
+			string_init_c(&urls.displayLink, hrefs->contents[i]);
792 792
 			string_init_c(&urls.pre_fixup.pre_displayLink, NULL);
793
-			if (urls.displayLink.data[blobGetDataSize(hrefs->contents[i])-1]) {
794
-				cli_warnmsg("urls.displayLink.data[...]");
795
-				return CL_CLEAN;
796
-			}
797 793
 
798 794
 			urls.realLink.refcount=-1;
799 795
 			urls.displayLink.refcount=-1;/*don't free these, caller will free*/
... ...
@@ -23,6 +23,7 @@
23 23
 #define _PHISH_CHECK_H
24 24
 
25 25
 #include "regex/regex.h"
26
+#include "htmlnorm.h"
26 27
 
27 28
 #define CL_PHISH_BASE 100
28 29
 enum phish_status {CL_PHISH_NODECISION=0, CL_PHISH_CLEAN=CL_PHISH_BASE,
... ...
@@ -65,9 +66,7 @@ struct url_check {
65 65
 	unsigned short       link_type;
66 66
 };
67 67
 
68
-#ifdef _MESSAGE_H
69
-int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs);
70
-#endif
68
+int phishingScan(const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs);
71 69
 
72 70
 void phish_disable(struct cl_engine* engine,const char* reason);
73 71
 /* Global, non-thread-safe functions, call only once! */
... ...
@@ -357,16 +357,15 @@ static void do_phishing_test(const struct rtest *rtest)
357 357
 	hrefs.value[0] = (unsigned char*)realurl;
358 358
 	hrefs.contents = cli_malloc(sizeof(*hrefs.contents));
359 359
 	fail_unless(!!hrefs.contents, "cli_malloc");
360
-	hrefs.contents[0] = blobCreate();
361 360
 	hrefs.tag = cli_malloc(sizeof(*hrefs.tag));
362 361
 	fail_unless(!!hrefs.tag, "cli_malloc");
363 362
 	hrefs.tag[0] = (unsigned char*)cli_strdup("href");
364
-	blobAddData(hrefs.contents[0], (const unsigned char*) rtest->displayurl, strlen(rtest->displayurl)+1);
363
+	hrefs.contents[0] = cli_strdup(rtest->displayurl);
365 364
 
366 365
 	ctx.engine = engine;
367 366
 	ctx.virname = &virname;
368 367
 
369
-	rc = phishingScan(NULL, NULL, &ctx, &hrefs);
368
+	rc = phishingScan(NULL, &ctx, &hrefs);
370 369
 
371 370
 	html_tag_arg_free(&hrefs);
372 371
 	fail_unless(rc == CL_CLEAN,"phishingScan");