git-svn: trunk@4203
Török Edvin authored on 2008/09/24 05:52:33... | ... |
@@ -48,11 +48,6 @@ |
48 | 48 |
#include "others.h" |
49 | 49 |
#include "htmlnorm.h" |
50 | 50 |
|
51 |
-typedef enum { |
|
52 |
- INVALIDCLASS, BLOBCLASS |
|
53 |
-} object_type; |
|
54 |
-#include "blob.h" |
|
55 |
- |
|
56 | 51 |
#include "entconv.h" |
57 | 52 |
#include "jsparse/js-norm.h" |
58 | 53 |
|
... | ... |
@@ -101,6 +96,11 @@ typedef struct file_buff_tag { |
101 | 101 |
int length; |
102 | 102 |
} file_buff_t; |
103 | 103 |
|
104 |
+struct tag_contents { |
|
105 |
+ unsigned char contents[MAX_TAG_CONTENTS_LENGTH + 1]; |
|
106 |
+ size_t pos; |
|
107 |
+}; |
|
108 |
+ |
|
104 | 109 |
static const int base64_chars[256] = { |
105 | 110 |
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, |
106 | 111 |
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, |
... | ... |
@@ -359,7 +359,7 @@ static void html_tag_arg_add(tag_arguments_t *tags, |
359 | 359 |
goto abort; |
360 | 360 |
} |
361 | 361 |
if(tags->scanContents) { |
362 |
- tags->contents= (blob **) cli_realloc2(tags->contents, |
|
362 |
+ tags->contents= (unsigned char **) cli_realloc2(tags->contents, |
|
363 | 363 |
tags->count*sizeof(*tags->contents)); |
364 | 364 |
if(!tags->contents) { |
365 | 365 |
goto abort; |
... | ... |
@@ -394,7 +394,7 @@ abort: |
394 | 394 |
} |
395 | 395 |
if(tags->contents) { |
396 | 396 |
if(tags->contents[i]) |
397 |
- blobDestroy(tags->contents[i]); |
|
397 |
+ free(tags->contents[i]); |
|
398 | 398 |
} |
399 | 399 |
} |
400 | 400 |
if (tags->tag) { |
... | ... |
@@ -443,7 +443,7 @@ void html_tag_arg_free(tag_arguments_t *tags) |
443 | 443 |
} |
444 | 444 |
if(tags->contents) |
445 | 445 |
if (tags->contents[i]) |
446 |
- blobDestroy(tags->contents[i]); |
|
446 |
+ free(tags->contents[i]); |
|
447 | 447 |
} |
448 | 448 |
if (tags->tag) { |
449 | 449 |
free(tags->tag); |
... | ... |
@@ -459,36 +459,30 @@ void html_tag_arg_free(tag_arguments_t *tags) |
459 | 459 |
} |
460 | 460 |
|
461 | 461 |
/** |
462 |
- * this is used for img, and iframe tags. If they are inside an <a href> tag, then set the contents of the image|iframe to the real URL. |
|
463 |
- */ |
|
464 |
-static inline void html_tag_set_inahref(tag_arguments_t *tags,int idx,int in_ahref) |
|
465 |
-{ |
|
466 |
- tags->contents[idx-1]=blobCreate(); |
|
467 |
- blobAddData(tags->contents[idx-1],tags->value[in_ahref-1],strlen(tags->value[in_ahref-1])); |
|
468 |
- blobAddData(tags->contents[idx-1], "",1); |
|
469 |
- blobClose(tags->contents[idx-1]); |
|
470 |
-} |
|
471 |
- |
|
472 |
-/** |
|
473 | 462 |
* the displayed text for an <a href> tag |
474 | 463 |
*/ |
475 |
-static inline void html_tag_contents_append(tag_arguments_t *tags,int idx,const unsigned char* begin,const unsigned char *end) |
|
464 |
+static inline void html_tag_contents_append(struct tag_contents *cont, const unsigned char* begin,const unsigned char *end) |
|
476 | 465 |
{ |
477 |
- if(end && (begin<end)) { |
|
478 |
- const size_t blob_len = blobGetDataSize(tags->contents[idx-1]); |
|
479 |
- const size_t blob_sizeleft = blob_len <= MAX_TAG_CONTENTS_LENGTH ? (MAX_TAG_CONTENTS_LENGTH - blob_len) : 0; |
|
480 |
- const size_t str_len = end - begin; |
|
481 |
- if(blob_sizeleft) |
|
482 |
- blobAddData(tags->contents[idx-1],begin, blob_sizeleft < str_len ? blob_sizeleft : str_len ); |
|
466 |
+ size_t i; |
|
467 |
+ if(!begin || !end) |
|
468 |
+ return; |
|
469 |
+ for(i = cont->pos; i < MAX_TAG_CONTENTS_LENGTH && (begin < end);i++) { |
|
470 |
+ cont->contents[i] = *begin++; |
|
483 | 471 |
} |
472 |
+ cont->pos = i; |
|
484 | 473 |
} |
485 | 474 |
|
486 | 475 |
|
487 |
-static inline void html_tag_contents_done(tag_arguments_t *tags,int idx) |
|
476 |
+static inline void html_tag_contents_done(tag_arguments_t *tags,int idx, struct tag_contents *cont) |
|
488 | 477 |
{ |
489 |
- /* append NUL byte */ |
|
490 |
- blobAddData(tags->contents[idx-1], "", 1); |
|
491 |
- blobClose(tags->contents[idx-1]); |
|
478 |
+ unsigned char *p; |
|
479 |
+ cont->contents[cont->pos++] = '\0'; |
|
480 |
+ p = cli_malloc(cont->pos); |
|
481 |
+ if(!p) |
|
482 |
+ return; |
|
483 |
+ memcpy(p, cont->contents, cont->pos); |
|
484 |
+ tags->contents[idx-1] = p; |
|
485 |
+ cont->pos = 0; |
|
492 | 486 |
} |
493 | 487 |
|
494 | 488 |
struct screnc_state { |
... | ... |
@@ -628,8 +622,10 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag |
628 | 628 |
/* dconf for phishing engine sets scanContents, so no need for a flag here */ |
629 | 629 |
struct parser_state *js_state = NULL; |
630 | 630 |
const unsigned char *js_begin = NULL, *js_end = NULL; |
631 |
+ struct tag_contents contents; |
|
631 | 632 |
|
632 | 633 |
tag_args.scanContents=0;/* do we need to store the contents of <a></a>?*/ |
634 |
+ contents.pos = 0; |
|
633 | 635 |
if (!m_area) { |
634 | 636 |
if (fd < 0) { |
635 | 637 |
cli_dbgmsg("Invalid HTML fd\n"); |
... | ... |
@@ -755,7 +751,7 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag |
755 | 755 |
} |
756 | 756 |
if(hrefs && hrefs->scanContents && in_ahref && href_contents_begin) { |
757 | 757 |
/*append this text portion to the contents of <a>*/ |
758 |
- html_tag_contents_append(hrefs,in_ahref,href_contents_begin,ptr); |
|
758 |
+ html_tag_contents_append(&contents,href_contents_begin,ptr); |
|
759 | 759 |
href_contents_begin=NULL;/*We just encountered another tag inside <a>, so skip it*/ |
760 | 760 |
} |
761 | 761 |
ptr++; |
... | ... |
@@ -1052,7 +1048,7 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag |
1052 | 1052 |
} |
1053 | 1053 |
if (hrefs && hrefs->scanContents && in_ahref) { |
1054 | 1054 |
if(strcmp(tag,"/a") == 0) { |
1055 |
- html_tag_contents_done(hrefs,in_ahref); |
|
1055 |
+ html_tag_contents_done(hrefs,in_ahref, &contents); |
|
1056 | 1056 |
in_ahref=0;/* we are no longer inside an <a href> |
1057 | 1057 |
nesting <a> tags not supported, and shouldn't be supported*/ |
1058 | 1058 |
} |
... | ... |
@@ -1110,34 +1106,32 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag |
1110 | 1110 |
if (in_ahref) |
1111 | 1111 |
/*we encountered nested <a> tags, pretend previous closed*/ |
1112 | 1112 |
if (href_contents_begin) { |
1113 |
- html_tag_contents_append(hrefs,in_ahref, |
|
1114 |
- href_contents_begin,ptrend); |
|
1113 |
+ html_tag_contents_append(&contents, href_contents_begin, ptrend); |
|
1115 | 1114 |
/*add pending contents between tags*/ |
1116 |
- html_tag_contents_done(hrefs,in_ahref); |
|
1115 |
+ html_tag_contents_done(hrefs, in_ahref, &contents); |
|
1117 | 1116 |
in_ahref=0; |
1118 | 1117 |
} |
1119 | 1118 |
if (arg_value_title) { |
1120 | 1119 |
/* title is a 'displayed link'*/ |
1121 | 1120 |
html_tag_arg_add(hrefs,"href_title",arg_value_title); |
1122 |
- hrefs->contents[hrefs->count-1]=blobCreate(); |
|
1123 |
- html_tag_contents_append(hrefs,hrefs->count,arg_value, |
|
1121 |
+ html_tag_contents_append(&contents,arg_value, |
|
1124 | 1122 |
arg_value+strlen(arg_value)); |
1125 |
- html_tag_contents_done(hrefs,hrefs->count); |
|
1123 |
+ html_tag_contents_done(hrefs, hrefs->count, &contents); |
|
1126 | 1124 |
} |
1127 | 1125 |
if (in_form_action) { |
1128 | 1126 |
/* form action is the real URL, and href is the 'displayed' */ |
1129 | 1127 |
html_tag_arg_add(hrefs,"form",arg_value); |
1130 |
- hrefs->contents[hrefs->count-1] = blobCreate(); |
|
1131 |
- html_tag_contents_append(hrefs, hrefs->count, in_form_action, |
|
1128 |
+ contents.pos = 0; |
|
1129 |
+ html_tag_contents_append(&contents, in_form_action, |
|
1132 | 1130 |
in_form_action + strlen(in_form_action)); |
1133 |
- html_tag_contents_done(hrefs,hrefs->count); |
|
1131 |
+ html_tag_contents_done(hrefs, hrefs->count, &contents); |
|
1134 | 1132 |
} |
1135 | 1133 |
} |
1136 | 1134 |
html_tag_arg_add(hrefs, "href", arg_value); |
1137 | 1135 |
if (hrefs->scanContents) { |
1138 | 1136 |
in_ahref=hrefs->count; /* index of this tag (counted from 1) */ |
1139 | 1137 |
href_contents_begin=ptr;/* contents begin after <a ..> ends */ |
1140 |
- hrefs->contents[hrefs->count-1]=blobCreate(); |
|
1138 |
+ contents.pos = 0; |
|
1141 | 1139 |
} |
1142 | 1140 |
} |
1143 | 1141 |
} else if (strcmp(tag,"form") == 0 && hrefs->scanContents) { |
... | ... |
@@ -1153,14 +1147,14 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag |
1153 | 1153 |
html_tag_arg_add(hrefs, "src", arg_value); |
1154 | 1154 |
if(hrefs->scanContents && in_ahref) |
1155 | 1155 |
/* "contents" of an img tag, is the URL of its parent <a> tag */ |
1156 |
- html_tag_set_inahref(hrefs,hrefs->count,in_ahref); |
|
1156 |
+ hrefs->contents[hrefs->count-1] = cli_strdup(hrefs->value[in_ahref-1]); |
|
1157 | 1157 |
if (in_form_action) { |
1158 | 1158 |
/* form action is the real URL, and href is the 'displayed' */ |
1159 | 1159 |
html_tag_arg_add(hrefs,"form",arg_value); |
1160 |
- hrefs->contents[hrefs->count-1] = blobCreate(); |
|
1161 |
- html_tag_contents_append(hrefs, hrefs->count, in_form_action, |
|
1160 |
+ contents.pos = 0; |
|
1161 |
+ html_tag_contents_append(&contents, in_form_action, |
|
1162 | 1162 |
in_form_action + strlen(in_form_action)); |
1163 |
- html_tag_contents_done(hrefs,hrefs->count); |
|
1163 |
+ html_tag_contents_done(hrefs, hrefs->count, &contents); |
|
1164 | 1164 |
} |
1165 | 1165 |
} |
1166 | 1166 |
arg_value = html_tag_arg_value(&tag_args, "dynsrc"); |
... | ... |
@@ -1168,14 +1162,14 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag |
1168 | 1168 |
html_tag_arg_add(hrefs, "dynsrc", arg_value); |
1169 | 1169 |
if(hrefs->scanContents && in_ahref) |
1170 | 1170 |
/* see above */ |
1171 |
- html_tag_set_inahref(hrefs,hrefs->count,in_ahref); |
|
1171 |
+ hrefs->contents[hrefs->count-1] = cli_strdup(hrefs->value[in_ahref-1]); |
|
1172 | 1172 |
if (in_form_action) { |
1173 | 1173 |
/* form action is the real URL, and href is the 'displayed' */ |
1174 | 1174 |
html_tag_arg_add(hrefs,"form",arg_value); |
1175 |
- hrefs->contents[hrefs->count-1] = blobCreate(); |
|
1176 |
- html_tag_contents_append(hrefs, hrefs->count, in_form_action, |
|
1175 |
+ contents.pos = 0; |
|
1176 |
+ html_tag_contents_append(&contents, in_form_action, |
|
1177 | 1177 |
in_form_action + strlen(in_form_action)); |
1178 |
- html_tag_contents_done(hrefs,hrefs->count); |
|
1178 |
+ html_tag_contents_done(hrefs, hrefs->count, &contents); |
|
1179 | 1179 |
} |
1180 | 1180 |
} |
1181 | 1181 |
} else if (strcmp(tag, "iframe") == 0) { |
... | ... |
@@ -1184,14 +1178,14 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag |
1184 | 1184 |
html_tag_arg_add(hrefs, "iframe", arg_value); |
1185 | 1185 |
if(hrefs->scanContents && in_ahref) |
1186 | 1186 |
/* see above */ |
1187 |
- html_tag_set_inahref(hrefs,hrefs->count,in_ahref); |
|
1187 |
+ hrefs->contents[hrefs->count-1] = cli_strdup(hrefs->value[in_ahref-1]); |
|
1188 | 1188 |
if (in_form_action) { |
1189 | 1189 |
/* form action is the real URL, and href is the 'displayed' */ |
1190 | 1190 |
html_tag_arg_add(hrefs,"form",arg_value); |
1191 |
- hrefs->contents[hrefs->count-1] = blobCreate(); |
|
1192 |
- html_tag_contents_append(hrefs, hrefs->count, in_form_action, |
|
1191 |
+ contents.pos = 0; |
|
1192 |
+ html_tag_contents_append(&contents, in_form_action, |
|
1193 | 1193 |
in_form_action + strlen(in_form_action)); |
1194 |
- html_tag_contents_done(hrefs,hrefs->count); |
|
1194 |
+ html_tag_contents_done(hrefs, hrefs->count, &contents); |
|
1195 | 1195 |
} |
1196 | 1196 |
} |
1197 | 1197 |
} else if (strcmp(tag,"area") == 0) { |
... | ... |
@@ -1200,14 +1194,14 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag |
1200 | 1200 |
html_tag_arg_add(hrefs, "area", arg_value); |
1201 | 1201 |
if(hrefs->scanContents && in_ahref) |
1202 | 1202 |
/* see above */ |
1203 |
- html_tag_set_inahref(hrefs,hrefs->count,in_ahref); |
|
1203 |
+ hrefs->contents[hrefs->count-1] = cli_strdup(hrefs->value[in_ahref-1]); |
|
1204 | 1204 |
if (in_form_action) { |
1205 | 1205 |
/* form action is the real URL, and href is the 'displayed' */ |
1206 | 1206 |
html_tag_arg_add(hrefs,"form",arg_value); |
1207 |
- hrefs->contents[hrefs->count-1] = blobCreate(); |
|
1208 |
- html_tag_contents_append(hrefs, hrefs->count, in_form_action, |
|
1207 |
+ contents.pos = 0; |
|
1208 |
+ html_tag_contents_append(&contents, in_form_action, |
|
1209 | 1209 |
in_form_action + strlen(in_form_action)); |
1210 |
- html_tag_contents_done(hrefs,hrefs->count); |
|
1210 |
+ html_tag_contents_done(hrefs, hrefs->count, &contents); |
|
1211 | 1211 |
} |
1212 | 1212 |
} |
1213 | 1213 |
} |
... | ... |
@@ -1605,7 +1599,7 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag |
1605 | 1605 |
} |
1606 | 1606 |
if(hrefs && hrefs->scanContents && in_ahref && href_contents_begin) |
1607 | 1607 |
/* end of line, append contents now, resume on next line */ |
1608 |
- html_tag_contents_append(hrefs,in_ahref,href_contents_begin,ptr); |
|
1608 |
+ html_tag_contents_append(&contents,href_contents_begin,ptr); |
|
1609 | 1609 |
ptrend = NULL; |
1610 | 1610 |
|
1611 | 1611 |
if(js_state) { |
... | ... |
@@ -1663,8 +1657,8 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag |
1663 | 1663 |
abort: |
1664 | 1664 |
if (in_form_action) |
1665 | 1665 |
free(in_form_action); |
1666 |
- if (in_ahref) /* tag not closed, force closing */ |
|
1667 |
- html_tag_contents_done(hrefs,in_ahref); |
|
1666 |
+ if (in_ahref) /* tag not closed, force closing */ |
|
1667 |
+ html_tag_contents_done(hrefs, in_ahref, &contents); |
|
1668 | 1668 |
|
1669 | 1669 |
if(js_state) { |
1670 | 1670 |
/* output script so far */ |
... | ... |
@@ -4038,7 +4038,7 @@ checkURLs(message *mainMessage, mbox_ctx *mctx, mbox_status *rc, int is_html) |
4038 | 4038 |
b = getHrefs(mainMessage, &hrefs); |
4039 | 4039 |
if(b) { |
4040 | 4040 |
if(hrefs.scanContents) { |
4041 |
- if(phishingScan(mainMessage, mctx->dir, mctx->ctx, &hrefs) == CL_VIRUS) { |
|
4041 |
+ if(phishingScan(mctx->dir, mctx->ctx, &hrefs) == CL_VIRUS) { |
|
4042 | 4042 |
/* |
4043 | 4043 |
* FIXME: message objects' contents are |
4044 | 4044 |
* encapsulated so we should not access |
... | ... |
@@ -41,8 +41,6 @@ |
41 | 41 |
#include "clamav.h" |
42 | 42 |
#include "cltypes.h" |
43 | 43 |
#include "others.h" |
44 |
-#include "mbox.h" |
|
45 |
-#include "message.h" |
|
46 | 44 |
#include "htmlnorm.h" |
47 | 45 |
#include "phishcheck.h" |
48 | 46 |
#include "phish_domaincheck_db.h" |
... | ... |
@@ -726,7 +724,7 @@ cleanupURL(struct string *URL,struct string *pre_URL, int isReal) |
726 | 726 |
} |
727 | 727 |
|
728 | 728 |
/* -------end runtime disable---------*/ |
729 |
-int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs) |
|
729 |
+int phishingScan(const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs) |
|
730 | 730 |
{ |
731 | 731 |
/* TODO: get_host and then apply regex, etc. */ |
732 | 732 |
int i; |
... | ... |
@@ -788,12 +786,8 @@ int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs) |
788 | 788 |
urls.always_check_flags |= CHECK_CLOAKING; |
789 | 789 |
} |
790 | 790 |
string_init_c(&urls.realLink,(char*)hrefs->value[i]); |
791 |
- string_init_c(&urls.displayLink,(char*)blobGetData(hrefs->contents[i])); |
|
791 |
+ string_init_c(&urls.displayLink, hrefs->contents[i]); |
|
792 | 792 |
string_init_c(&urls.pre_fixup.pre_displayLink, NULL); |
793 |
- if (urls.displayLink.data[blobGetDataSize(hrefs->contents[i])-1]) { |
|
794 |
- cli_warnmsg("urls.displayLink.data[...]"); |
|
795 |
- return CL_CLEAN; |
|
796 |
- } |
|
797 | 793 |
|
798 | 794 |
urls.realLink.refcount=-1; |
799 | 795 |
urls.displayLink.refcount=-1;/*don't free these, caller will free*/ |
... | ... |
@@ -23,6 +23,7 @@ |
23 | 23 |
#define _PHISH_CHECK_H |
24 | 24 |
|
25 | 25 |
#include "regex/regex.h" |
26 |
+#include "htmlnorm.h" |
|
26 | 27 |
|
27 | 28 |
#define CL_PHISH_BASE 100 |
28 | 29 |
enum phish_status {CL_PHISH_NODECISION=0, CL_PHISH_CLEAN=CL_PHISH_BASE, |
... | ... |
@@ -65,9 +66,7 @@ struct url_check { |
65 | 65 |
unsigned short link_type; |
66 | 66 |
}; |
67 | 67 |
|
68 |
-#ifdef _MESSAGE_H |
|
69 |
-int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs); |
|
70 |
-#endif |
|
68 |
+int phishingScan(const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs); |
|
71 | 69 |
|
72 | 70 |
void phish_disable(struct cl_engine* engine,const char* reason); |
73 | 71 |
/* Global, non-thread-safe functions, call only once! */ |
... | ... |
@@ -357,16 +357,15 @@ static void do_phishing_test(const struct rtest *rtest) |
357 | 357 |
hrefs.value[0] = (unsigned char*)realurl; |
358 | 358 |
hrefs.contents = cli_malloc(sizeof(*hrefs.contents)); |
359 | 359 |
fail_unless(!!hrefs.contents, "cli_malloc"); |
360 |
- hrefs.contents[0] = blobCreate(); |
|
361 | 360 |
hrefs.tag = cli_malloc(sizeof(*hrefs.tag)); |
362 | 361 |
fail_unless(!!hrefs.tag, "cli_malloc"); |
363 | 362 |
hrefs.tag[0] = (unsigned char*)cli_strdup("href"); |
364 |
- blobAddData(hrefs.contents[0], (const unsigned char*) rtest->displayurl, strlen(rtest->displayurl)+1); |
|
363 |
+ hrefs.contents[0] = cli_strdup(rtest->displayurl); |
|
365 | 364 |
|
366 | 365 |
ctx.engine = engine; |
367 | 366 |
ctx.virname = &virname; |
368 | 367 |
|
369 |
- rc = phishingScan(NULL, NULL, &ctx, &hrefs); |
|
368 |
+ rc = phishingScan(NULL, &ctx, &hrefs); |
|
370 | 369 |
|
371 | 370 |
html_tag_arg_free(&hrefs); |
372 | 371 |
fail_unless(rc == CL_CLEAN,"phishingScan"); |