git-svn-id: file:///var/lib/svn/clamav-devel/trunk/clamav-devel@849 77e5149b-7576-45b1-b177-96237e5ba77b
Trog authored on 2004/09/13 19:30:14... | ... |
@@ -1,3 +1,11 @@ |
1 |
+Mon Sep 13 11:23:21 BST 2004 (trog) |
|
2 |
+----------------------------------- |
|
3 |
+ * libclamav: re-write HTML code: |
|
4 |
+ - decode MS Script Encoder code |
|
5 |
+ - doesn't require mmap(), uses it if available |
|
6 |
+ - extract href tag values |
|
7 |
+ - single pass parser |
|
8 |
+ |
|
1 | 9 |
Mon Sep 13 03:31:58 CEST 2004 (tk) |
2 | 10 |
---------------------------------- |
3 | 11 |
* libclamav: CL_BLOCKMAX: allow blocking (i.e. marking as viruses) of |
... | ... |
@@ -1,5 +1,10 @@ |
1 | 1 |
/* |
2 |
- * Copyright (C) 2004 Trog <trog@clamav.net> |
|
2 |
+ * Normalise HTML text. |
|
3 |
+ * Decode MS Script Encoder protection. |
|
4 |
+ * |
|
5 |
+ * Copyright (C) 2004 trog@uncon.org |
|
6 |
+ * |
|
7 |
+ * The ScrEnc decoder was initially based upon an analysis by Andreas Marx. |
|
3 | 8 |
* |
4 | 9 |
* This program is free software; you can redistribute it and/or modify |
5 | 10 |
* it under the terms of the GNU General Public License as published by |
... | ... |
@@ -16,284 +21,1026 @@ |
16 | 16 |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
17 | 17 |
*/ |
18 | 18 |
|
19 |
-#if HAVE_CONFIG_H |
|
20 |
-#include "clamav-config.h" |
|
21 |
-#endif |
|
22 |
- |
|
23 | 19 |
#include <stdio.h> |
24 |
-#include <string.h> |
|
25 |
-#include <ctype.h> |
|
26 | 20 |
#include <unistd.h> |
27 | 21 |
#include <sys/types.h> |
28 | 22 |
#include <sys/stat.h> |
29 | 23 |
#include <fcntl.h> |
24 |
+#include <string.h> |
|
25 |
+#include <errno.h> |
|
26 |
+#include <stdio.h> |
|
27 |
+ |
|
28 |
+#if HAVE_CONFIG_H |
|
29 |
+#include "clamav-config.h" |
|
30 |
+#endif |
|
31 |
+ |
|
32 |
+#if HAVE_MMAP |
|
33 |
+#if HAVE_SYS_MMAN_H |
|
34 |
+#include <sys/mman.h> |
|
35 |
+#else /* HAVE_SYS_MMAN_H */ |
|
36 |
+#undef HAVE_MMAP |
|
37 |
+#endif |
|
38 |
+#endif |
|
30 | 39 |
|
31 | 40 |
#include "others.h" |
41 |
+#include "htmlnorm.h" |
|
32 | 42 |
|
43 |
+#define HTML_STR_LENGTH 1024 |
|
33 | 44 |
#define FALSE (0) |
34 | 45 |
#define TRUE (1) |
35 | 46 |
|
36 |
-/* Normalize an HTML buffer using the following rules: |
|
37 |
- o Remove multiple contiguous spaces |
|
38 |
- o Remove spaces around '<' and '>' in tags |
|
39 |
- o Remove spaces around '=' in tags |
|
40 |
- o Replace single quote with double quote in tags |
|
41 |
- o Convert to lowercase |
|
42 |
- o Convert all white space to a space character |
|
43 |
-*/ |
|
47 |
+typedef enum { |
|
48 |
+ HTML_BAD_STATE=0, |
|
49 |
+ HTML_NORM=1, |
|
50 |
+ HTML_COMMENT=2, |
|
51 |
+ HTML_CHAR_REF=3, |
|
52 |
+ HTML_JS_DECODE=4, |
|
53 |
+ HTML_SKIP_WS=5, |
|
54 |
+ HTML_TRIM_WS=6, |
|
55 |
+ HTML_TAG=7, |
|
56 |
+ HTML_TAG_ARG=8, |
|
57 |
+ HTML_TAG_ARG_VAL=9, |
|
58 |
+ HTML_TAG_ARG_EQUAL=10, |
|
59 |
+ HTML_PROCESS_TAG=11, |
|
60 |
+ HTML_CHAR_REF_DECODE=12, |
|
61 |
+ HTML_SKIP_LENGTH=13, |
|
62 |
+ HTML_JSDECODE=14, |
|
63 |
+ HTML_JSDECODE_LENGTH=15, |
|
64 |
+ HTML_JSDECODE_DECRYPT=16, |
|
65 |
+ HTML_SPECIAL_CHAR=17, |
|
66 |
+} html_state; |
|
67 |
+ |
|
68 |
+typedef enum { |
|
69 |
+ SINGLE_QUOTED, |
|
70 |
+ DOUBLE_QUOTED, |
|
71 |
+ NOT_QUOTED, |
|
72 |
+} quoted_state; |
|
73 |
+ |
|
74 |
+typedef struct m_area_tag { |
|
75 |
+ unsigned char *buffer; |
|
76 |
+ off_t length; |
|
77 |
+ off_t offset; |
|
78 |
+} m_area_t; |
|
79 |
+ |
|
80 |
+#define HTML_FILE_BUFF_LEN 8192 |
|
81 |
+ |
|
82 |
+typedef struct file_buff_tag { |
|
83 |
+ int fd; |
|
84 |
+ unsigned char buffer[HTML_FILE_BUFF_LEN]; |
|
85 |
+ int length; |
|
86 |
+} file_buff_t; |
|
87 |
+ |
|
88 |
+static const int base64_chars[256] = { |
|
89 |
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, |
|
90 |
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, |
|
91 |
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63, |
|
92 |
+ 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1, |
|
93 |
+ -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14, |
|
94 |
+ 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1, |
|
95 |
+ -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, |
|
96 |
+ 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1, |
|
97 |
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, |
|
98 |
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, |
|
99 |
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, |
|
100 |
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, |
|
101 |
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, |
|
102 |
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, |
|
103 |
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, |
|
104 |
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, |
|
105 |
+}; |
|
106 |
+ |
|
107 |
+int table_order[] = { |
|
108 |
+ 00, 02, 01, 00, 02, 01, 02, 01, 01, 02, 01, 02, 00, 01, 02, 01, |
|
109 |
+ 00, 01, 02, 01, 00, 00, 02, 01, 01, 02, 00, 01, 02, 01, 01, 02, |
|
110 |
+ 00, 00, 01, 02, 01, 02, 01, 00, 01, 00, 00, 02, 01, 00, 01, 02, |
|
111 |
+ 00, 01, 02, 01, 00, 00, 02, 01, 01, 00, 00, 02, 01, 00, 01, 02 |
|
112 |
+}; |
|
44 | 113 |
|
45 |
-unsigned char *html_normalize(unsigned char *in_buff, off_t in_size) |
|
114 |
+int decrypt_tables[3][128] = { |
|
115 |
+ {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x57, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, |
|
116 |
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, |
|
117 |
+ 0x2E, 0x47, 0x7A, 0x56, 0x42, 0x6A, 0x2F, 0x26, 0x49, 0x41, 0x34, 0x32, 0x5B, 0x76, 0x72, 0x43, |
|
118 |
+ 0x38, 0x39, 0x70, 0x45, 0x68, 0x71, 0x4F, 0x09, 0x62, 0x44, 0x23, 0x75, 0x3C, 0x7E, 0x3E, 0x5E, |
|
119 |
+ 0xFF, 0x77, 0x4A, 0x61, 0x5D, 0x22, 0x4B, 0x6F, 0x4E, 0x3B, 0x4C, 0x50, 0x67, 0x2A, 0x7D, 0x74, |
|
120 |
+ 0x54, 0x2B, 0x2D, 0x2C, 0x30, 0x6E, 0x6B, 0x66, 0x35, 0x25, 0x21, 0x64, 0x4D, 0x52, 0x63, 0x3F, |
|
121 |
+ 0x7B, 0x78, 0x29, 0x28, 0x73, 0x59, 0x33, 0x7F, 0x6D, 0x55, 0x53, 0x7C, 0x3A, 0x5F, 0x65, 0x46, |
|
122 |
+ 0x58, 0x31, 0x69, 0x6C, 0x5A, 0x48, 0x27, 0x5C, 0x3D, 0x24, 0x79, 0x37, 0x60, 0x51, 0x20, 0x36}, |
|
123 |
+ |
|
124 |
+ {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x7B, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, |
|
125 |
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, |
|
126 |
+ 0x32, 0x30, 0x21, 0x29, 0x5B, 0x38, 0x33, 0x3D, 0x58, 0x3A, 0x35, 0x65, 0x39, 0x5C, 0x56, 0x73, |
|
127 |
+ 0x66, 0x4E, 0x45, 0x6B, 0x62, 0x59, 0x78, 0x5E, 0x7D, 0x4A, 0x6D, 0x71, 0x3C, 0x60, 0x3E, 0x53, |
|
128 |
+ 0xFF, 0x42, 0x27, 0x48, 0x72, 0x75, 0x31, 0x37, 0x4D, 0x52, 0x22, 0x54, 0x6A, 0x47, 0x64, 0x2D, |
|
129 |
+ 0x20, 0x7F, 0x2E, 0x4C, 0x5D, 0x7E, 0x6C, 0x6F, 0x79, 0x74, 0x43, 0x26, 0x76, 0x25, 0x24, 0x2B, |
|
130 |
+ 0x28, 0x23, 0x41, 0x34, 0x09, 0x2A, 0x44, 0x3F, 0x77, 0x3B, 0x55, 0x69, 0x61, 0x63, 0x50, 0x67, |
|
131 |
+ 0x51, 0x49, 0x4F, 0x46, 0x68, 0x7C, 0x36, 0x70, 0x6E, 0x7A, 0x2F, 0x5F, 0x4B, 0x5A, 0x2C, 0x57}, |
|
132 |
+ |
|
133 |
+ {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x6E, 0x0A, 0x0B, 0x0C, 0x06, 0x0E, 0x0F, |
|
134 |
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, |
|
135 |
+ 0x2D, 0x75, 0x52, 0x60, 0x71, 0x5E, 0x49, 0x5C, 0x62, 0x7D, 0x29, 0x36, 0x20, 0x7C, 0x7A, 0x7F, |
|
136 |
+ 0x6B, 0x63, 0x33, 0x2B, 0x68, 0x51, 0x66, 0x76, 0x31, 0x64, 0x54, 0x43, 0x3C, 0x3A, 0x3E, 0x7E, |
|
137 |
+ 0xFF, 0x45, 0x2C, 0x2A, 0x74, 0x27, 0x37, 0x44, 0x79, 0x59, 0x2F, 0x6F, 0x26, 0x72, 0x6A, 0x39, |
|
138 |
+ 0x7B, 0x3F, 0x38, 0x77, 0x67, 0x53, 0x47, 0x34, 0x78, 0x5D, 0x30, 0x23, 0x5A, 0x5B, 0x6C, 0x48, |
|
139 |
+ 0x55, 0x70, 0x69, 0x2E, 0x4C, 0x21, 0x24, 0x4E, 0x50, 0x09, 0x56, 0x73, 0x35, 0x61, 0x4B, 0x58, |
|
140 |
+ 0x3B, 0x57, 0x22, 0x6D, 0x4D, 0x25, 0x28, 0x46, 0x4A, 0x32, 0x41, 0x3D, 0x5F, 0x4F, 0x42, 0x65} |
|
141 |
+}; |
|
142 |
+ |
|
143 |
+/* TODO: mmap support */ |
|
144 |
+static unsigned char *cli_readline(FILE *stream, m_area_t *m_area, unsigned int max_len) |
|
46 | 145 |
{ |
47 |
- unsigned char *out_buff; |
|
48 |
- off_t out_size=0, i; |
|
49 |
- int had_space=FALSE, tag_depth=0, in_quote=FALSE; |
|
146 |
+ unsigned char *line, *ptr, *start, *end; |
|
147 |
+ unsigned int line_len, count; |
|
148 |
+ int fddup; |
|
50 | 149 |
|
51 |
- out_buff = (unsigned char *) cli_malloc(in_size+1); |
|
52 |
- if (!out_buff) { |
|
53 |
- cli_dbgmsg("html_normalize(): malloc failed\n"); |
|
150 |
+ line = (unsigned char *) malloc(max_len); |
|
151 |
+ if (!line) { |
|
54 | 152 |
return NULL; |
55 | 153 |
} |
56 | 154 |
|
57 |
- for (i=0 ; i < in_size ; i++) { |
|
58 |
- if (in_buff[i] == '<') { |
|
59 |
- out_buff[out_size++] = '<'; |
|
60 |
- tag_depth++; |
|
61 |
- if (tag_depth == 1) { |
|
62 |
- had_space=TRUE; /* consume spaces */ |
|
63 |
- } |
|
64 |
- } else if ((in_buff[i] == '=') && (tag_depth == 1)) { |
|
65 |
- /* Remove preceeding spaces */ |
|
66 |
- while ((out_size > 0) && |
|
67 |
- (out_buff[out_size-1] == ' ')) { |
|
68 |
- out_size--; |
|
155 |
+ /* Try and use the memory buffer first */ |
|
156 |
+ if (m_area) { |
|
157 |
+ start = ptr = m_area->buffer + m_area->offset; |
|
158 |
+ end = m_area->buffer + m_area->length; |
|
159 |
+ if (start >= end) { |
|
160 |
+ free(line); |
|
161 |
+ return NULL; |
|
162 |
+ } |
|
163 |
+ line_len = 1; |
|
164 |
+ while ((ptr < end) && (*ptr != '\n') && (line_len < (max_len-1))) { |
|
165 |
+ ptr++; |
|
166 |
+ line_len++; |
|
167 |
+ } |
|
168 |
+ if (ptr == end) { |
|
169 |
+ line_len--; |
|
170 |
+ memcpy(line, start, line_len); |
|
171 |
+ line[line_len] = '\0'; |
|
172 |
+ } else if (*ptr == '\n') { |
|
173 |
+ memcpy(line, start, line_len); |
|
174 |
+ line[line_len] = '\0'; |
|
175 |
+ } else { |
|
176 |
+ /* Hit max_len */ |
|
177 |
+ /* Store the current line end and length*/ |
|
178 |
+ count = line_len; |
|
179 |
+ while (!isspace(*ptr) && (line_len > 1)) { |
|
180 |
+ ptr--; |
|
181 |
+ line_len--; |
|
69 | 182 |
} |
70 |
- out_buff[out_size++] = '='; |
|
71 |
- had_space=TRUE; |
|
72 |
- } else if (isspace(in_buff[i])) { |
|
73 |
- if (!had_space) { |
|
74 |
- out_buff[out_size++] = ' '; |
|
75 |
- had_space=TRUE; |
|
183 |
+ if (line_len == 1) { |
|
184 |
+ line_len=count; |
|
76 | 185 |
} |
77 |
- } else if (in_buff[i] == '>') { |
|
78 |
- /* Remove preceeding spaces */ |
|
79 |
- if (tag_depth == 1) { |
|
80 |
- while ((out_size > 0) && |
|
81 |
- (out_buff[out_size-1] == ' ')) { |
|
82 |
- out_size--; |
|
186 |
+ memcpy(line, start, line_len); |
|
187 |
+ line[line_len] = '\0'; |
|
188 |
+ } |
|
189 |
+ m_area->offset += line_len; |
|
190 |
+ } else { |
|
191 |
+ if (!stream) { |
|
192 |
+ cli_dbgmsg("No HTML stream\n"); |
|
193 |
+ free(line); |
|
194 |
+ return NULL; |
|
195 |
+ } |
|
196 |
+ if (fgets(line, max_len, stream) == NULL) { |
|
197 |
+ free(line); |
|
198 |
+ return NULL; |
|
199 |
+ } |
|
200 |
+ |
|
201 |
+ line_len=strlen(line); |
|
202 |
+ if (line_len == 0) { |
|
203 |
+ free(line); |
|
204 |
+ return NULL; |
|
205 |
+ } |
|
206 |
+ if (line_len == max_len-1) { |
|
207 |
+ /* didn't find a whole line - rewind to a space*/ |
|
208 |
+ count = 0; |
|
209 |
+ while (!isspace(line[--line_len])) { |
|
210 |
+ count--; |
|
211 |
+ if (line_len == 0) { |
|
212 |
+ return line; |
|
83 | 213 |
} |
84 | 214 |
} |
85 |
- out_buff[out_size++] = '>'; |
|
86 |
- tag_depth--; |
|
87 |
- } else if ((in_buff[i] == '\'') && (tag_depth==1)) { |
|
88 |
- /* Convert single quotes to double quotes */ |
|
89 |
- if (in_quote || out_buff[out_size-1] == '=') { |
|
90 |
- out_buff[out_size++] = '\"'; |
|
91 |
- in_quote = !in_quote; |
|
92 |
- } else { |
|
93 |
- out_buff[out_size++] = '\''; |
|
94 |
- } |
|
95 |
- } else { |
|
96 |
- out_buff[out_size++] = tolower(in_buff[i]); |
|
97 |
- had_space=FALSE; |
|
215 |
+ fseek(stream, count, SEEK_CUR); |
|
216 |
+ line[line_len+1] = '\0'; |
|
98 | 217 |
} |
99 | 218 |
} |
100 |
- out_buff[out_size] = '\0'; |
|
101 |
- return out_buff; |
|
219 |
+ return line; |
|
102 | 220 |
} |
103 | 221 |
|
104 |
-/* Remove HTML style comments from buffer */ |
|
105 |
-unsigned char *remove_html_comments(unsigned char *line) |
|
222 |
+static void html_output_flush(file_buff_t *fbuff) |
|
106 | 223 |
{ |
107 |
- unsigned char *newline, *newcurrent; |
|
108 |
- int in_comment=FALSE; |
|
109 |
- |
|
110 |
- if (!line) { |
|
111 |
- return NULL; |
|
224 |
+ if (fbuff && (fbuff->length > 0)) { |
|
225 |
+ cli_writen(fbuff->fd, fbuff->buffer, fbuff->length); |
|
226 |
+ fbuff->length = 0; |
|
112 | 227 |
} |
113 |
- |
|
114 |
- newcurrent = newline = (unsigned char *) cli_malloc(strlen(line) + 1); |
|
115 |
- if (!newline) { |
|
116 |
- return NULL; |
|
228 |
+} |
|
229 |
+ |
|
230 |
+static void html_output_c(file_buff_t *fbuff1, file_buff_t *fbuff2, unsigned char c) |
|
231 |
+{ |
|
232 |
+ if (fbuff1) { |
|
233 |
+ if (fbuff1->length == HTML_FILE_BUFF_LEN) { |
|
234 |
+ html_output_flush(fbuff1); |
|
235 |
+ } |
|
236 |
+ fbuff1->buffer[fbuff1->length++] = c; |
|
117 | 237 |
} |
118 |
- |
|
119 |
- while(line) { |
|
120 |
- if (!(in_comment)) { |
|
121 |
- while (*line && *line != '<') { |
|
122 |
- *newcurrent = *line; |
|
123 |
- newcurrent++; |
|
124 |
- line++; |
|
125 |
- } |
|
126 |
- if (! *line) { |
|
127 |
- break; |
|
128 |
- } |
|
129 |
- if (!line[1]) { |
|
130 |
- *newcurrent = *line; |
|
131 |
- newcurrent++; |
|
132 |
- line++; |
|
133 |
- continue; |
|
134 |
- } |
|
135 |
- if (line[1] == '!') { |
|
136 |
- in_comment = TRUE; |
|
137 |
- line += 1; |
|
138 |
- } else { |
|
139 |
- *newcurrent = *line; |
|
140 |
- newcurrent++; |
|
141 |
- line++; |
|
142 |
- } |
|
143 |
- } else { |
|
144 |
- while (*line && *line != '>') { |
|
145 |
- line++; |
|
146 |
- } |
|
147 |
- if (! *line) { |
|
148 |
- break; |
|
149 |
- } |
|
150 |
- in_comment = FALSE; |
|
151 |
- line++; |
|
238 |
+ if (fbuff2) { |
|
239 |
+ if (fbuff2->length == HTML_FILE_BUFF_LEN) { |
|
240 |
+ html_output_flush(fbuff2); |
|
152 | 241 |
} |
242 |
+ fbuff2->buffer[fbuff2->length++] = c; |
|
153 | 243 |
} |
154 |
- *newcurrent = '\0'; |
|
155 |
- return newline; |
|
156 | 244 |
} |
157 | 245 |
|
158 |
-/* Decode an HTML escape character into it's character value */ |
|
159 |
-unsigned int decode_html_char_ref(unsigned char *cref, |
|
160 |
- unsigned char *dest) |
|
246 |
+static html_output_str(file_buff_t *fbuff, unsigned char *str, int len) |
|
161 | 247 |
{ |
162 |
- |
|
163 |
- unsigned int hex=FALSE, value=0, count=0; |
|
164 |
- |
|
165 |
- if (!cref[0] || !cref[1]) { |
|
166 |
- return 0; |
|
248 |
+ if (fbuff) { |
|
249 |
+ if ((fbuff->length + len) >= HTML_FILE_BUFF_LEN) { |
|
250 |
+ html_output_flush(fbuff); |
|
251 |
+ } |
|
252 |
+ memcpy(fbuff->buffer + fbuff->length, str, len); |
|
167 | 253 |
} |
254 |
+} |
|
255 |
+ |
|
256 |
+static char *html_tag_arg_value(tag_arguments_t *tags, char *tag) |
|
257 |
+{ |
|
258 |
+ int i; |
|
168 | 259 |
|
169 |
- if (((*cref == 'x') || (*cref == 'X')) && isxdigit(cref[1])) { |
|
170 |
- hex=TRUE; |
|
171 |
- cref++; |
|
172 |
- count++; |
|
260 |
+ for (i=0; i < tags->count; i++) { |
|
261 |
+ if (strcmp(tags->tag[i], tag) == 0) { |
|
262 |
+ return tags->value[i]; |
|
263 |
+ } |
|
173 | 264 |
} |
265 |
+ return NULL; |
|
266 |
+} |
|
267 |
+ |
|
268 |
+static void html_tag_arg_set(tag_arguments_t *tags, char *tag, char *value) |
|
269 |
+{ |
|
270 |
+ int i; |
|
174 | 271 |
|
175 |
- while (isdigit(*cref) || (hex && isxdigit(*cref))) { |
|
176 |
- if (hex) { |
|
177 |
- value *= 16; |
|
178 |
- } else { |
|
179 |
- value *= 10; |
|
272 |
+ for (i=0; i < tags->count; i++) { |
|
273 |
+ if (strcmp(tags->tag[i], tag) == 0) { |
|
274 |
+ free(tags->value[i]); |
|
275 |
+ tags->value[i] = strdup(value); |
|
276 |
+ return; |
|
180 | 277 |
} |
181 |
- if (isdigit(*cref)) { |
|
182 |
- value += (*cref - '0'); |
|
278 |
+ } |
|
279 |
+ return; |
|
280 |
+} |
|
281 |
+static void html_tag_arg_add(tag_arguments_t *tags, |
|
282 |
+ unsigned char *tag, unsigned char *value) |
|
283 |
+{ |
|
284 |
+ int len; |
|
285 |
+ tags->count++; |
|
286 |
+ tags->tag = (unsigned char **) realloc(tags->tag, |
|
287 |
+ tags->count * sizeof(char *)); |
|
288 |
+ tags->value = (unsigned char **) realloc(tags->value, |
|
289 |
+ tags->count * sizeof(char *)); |
|
290 |
+ if (!tags->tag || !tags->value) { |
|
291 |
+ tags->count--; |
|
292 |
+ return; |
|
293 |
+ } |
|
294 |
+ tags->tag[tags->count-1] = strdup(tag); |
|
295 |
+ if (value) { |
|
296 |
+ if (*value == '"') { |
|
297 |
+ tags->value[tags->count-1] = strdup(value+1); |
|
298 |
+ len = strlen(value+1); |
|
299 |
+ if (len > 0) { |
|
300 |
+ tags->value[tags->count-1][len-1] = '\0'; |
|
301 |
+ } |
|
183 | 302 |
} else { |
184 |
- value += (tolower(*cref) - 'a' + 10); |
|
303 |
+ tags->value[tags->count-1] = strdup(value); |
|
185 | 304 |
} |
186 |
- cref++; |
|
187 |
- count++; |
|
305 |
+ } else { |
|
306 |
+ tags->value[tags->count-1] = NULL; |
|
188 | 307 |
} |
189 |
- if (*cref == ';') { |
|
190 |
- cref++; |
|
191 |
- count++; |
|
308 |
+} |
|
309 |
+ |
|
310 |
+static void html_output_tag(file_buff_t *fbuff, char *tag, tag_arguments_t *tags) |
|
311 |
+{ |
|
312 |
+ int i; |
|
313 |
+ |
|
314 |
+ html_output_c(fbuff, NULL, '<'); |
|
315 |
+ html_output_str(fbuff, tag, strlen(tag)); |
|
316 |
+ for (i=0; i < tags->count; i++) { |
|
317 |
+ html_output_c(fbuff, NULL, ' '); |
|
318 |
+ html_output_str(fbuff, tags->tag[i], strlen(tags->tag[i])); |
|
319 |
+ if (tags->value[i]) { |
|
320 |
+ html_output_str(fbuff, "=\"", 2); |
|
321 |
+ html_output_str(fbuff, tags->value[i], strlen(tags->value[i])); |
|
322 |
+ html_output_c(fbuff, NULL, '"'); |
|
323 |
+ } |
|
192 | 324 |
} |
325 |
+ html_output_c(fbuff, NULL, '>'); |
|
326 |
+} |
|
327 |
+ |
|
328 |
+void html_tag_arg_free(tag_arguments_t *tags) |
|
329 |
+{ |
|
330 |
+ int i; |
|
193 | 331 |
|
194 |
- *dest = value; |
|
195 |
- |
|
196 |
- return count; |
|
332 |
+ for (i=0; i < tags->count; i++) { |
|
333 |
+ free(tags->tag[i]); |
|
334 |
+ if (tags->value[i]) { |
|
335 |
+ free(tags->value[i]); |
|
336 |
+ } |
|
337 |
+ } |
|
338 |
+ if (tags->tag) { |
|
339 |
+ free(tags->tag); |
|
340 |
+ } |
|
341 |
+ if (tags->value) { |
|
342 |
+ free(tags->value); |
|
343 |
+ } |
|
344 |
+ tags->tag = tags->value = NULL; |
|
345 |
+ tags->count = 0; |
|
197 | 346 |
} |
198 | 347 |
|
199 |
-/* Remove HTML character escape sequences from buffer */ |
|
200 |
-unsigned char *remove_html_char_ref(unsigned char *line) |
|
348 |
+static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag_arguments_t *hrefs) |
|
201 | 349 |
{ |
202 |
- unsigned char *newline, *newcurrent; |
|
203 |
- unsigned char *linepos, count; |
|
350 |
+ int fd_tmp, tag_length, tag_arg_length; |
|
351 |
+ int retval=FALSE, escape, value, hex, tag_val_length, table_pos, in_script=FALSE; |
|
352 |
+ FILE *stream_in; |
|
353 |
+ html_state state=HTML_NORM, next_state=HTML_BAD_STATE; |
|
354 |
+ char filename[1024], tag[HTML_STR_LENGTH+1], tag_arg[HTML_STR_LENGTH+1]; |
|
355 |
+ char tag_val[HTML_STR_LENGTH+1]; |
|
356 |
+ unsigned char *line, *ptr, *arg_value; |
|
357 |
+ tag_arguments_t tag_args; |
|
358 |
+ quoted_state quoted; |
|
359 |
+ unsigned long length; |
|
360 |
+ file_buff_t file_buff_o1, file_buff_o2, file_buff_script; |
|
204 | 361 |
|
205 |
- if (!line) { |
|
206 |
- return NULL; |
|
362 |
+ if (!m_area) { |
|
363 |
+ if (fd < 0) { |
|
364 |
+ cli_dbgmsg("Invalid HTML fd\n"); |
|
365 |
+ return FALSE; |
|
366 |
+ } |
|
367 |
+ lseek(fd, 0, SEEK_SET); |
|
368 |
+ fd_tmp = dup(fd); |
|
369 |
+ if (fd_tmp < 0) { |
|
370 |
+ return FALSE; |
|
371 |
+ } |
|
372 |
+ stream_in = fdopen(fd_tmp, "r"); |
|
373 |
+ if (!stream_in) { |
|
374 |
+ close(fd_tmp); |
|
375 |
+ return FALSE; |
|
376 |
+ } |
|
207 | 377 |
} |
208 | 378 |
|
209 |
- newcurrent = newline = (unsigned char *) cli_malloc(strlen(line) + 1); |
|
210 |
- if (!newline) { |
|
211 |
- return NULL; |
|
379 |
+ if (dirname) { |
|
380 |
+ snprintf(filename, 1024, "%s/comment.html", dirname); |
|
381 |
+ file_buff_o1.fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, S_IRWXU); |
|
382 |
+ if (!file_buff_o1.fd) { |
|
383 |
+ cli_dbgmsg("open failed: %s\n", filename); |
|
384 |
+ fclose(stream_in); |
|
385 |
+ return FALSE; |
|
386 |
+ } |
|
387 |
+ |
|
388 |
+ snprintf(filename, 1024, "%s/nocomment.html", dirname); |
|
389 |
+ file_buff_o2.fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, S_IRWXU); |
|
390 |
+ if (!file_buff_o2.fd) { |
|
391 |
+ cli_dbgmsg("open failed: %s\n", filename); |
|
392 |
+ close(file_buff_o1.fd); |
|
393 |
+ fclose(stream_in); |
|
394 |
+ return FALSE; |
|
395 |
+ } |
|
396 |
+ |
|
397 |
+ snprintf(filename, 1024, "%s/script.html", dirname); |
|
398 |
+ file_buff_script.fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, S_IRWXU); |
|
399 |
+ if (!file_buff_script.fd) { |
|
400 |
+ cli_dbgmsg("open failed: %s\n", filename); |
|
401 |
+ close(file_buff_o1.fd); |
|
402 |
+ close(file_buff_o2.fd); |
|
403 |
+ fclose(stream_in); |
|
404 |
+ return FALSE; |
|
405 |
+ } |
|
406 |
+ |
|
407 |
+ file_buff_o1.length = 0; |
|
408 |
+ file_buff_o2.length = 0; |
|
409 |
+ file_buff_script.length = 0; |
|
410 |
+ } else { |
|
411 |
+ file_buff_o1 = NULL; |
|
412 |
+ file_buff_o2 = NULL; |
|
413 |
+ file_buff_script = NULL; |
|
212 | 414 |
} |
415 |
+ |
|
416 |
+ tag_args.count = 0; |
|
417 |
+ tag_args.tag = NULL; |
|
418 |
+ tag_args.value = NULL; |
|
419 |
+ |
|
420 |
+ ptr = line = cli_readline(stream_in, m_area, 8192); |
|
213 | 421 |
while (line) { |
214 |
- linepos = strchr(line, '&'); |
|
215 |
- if (!linepos) { |
|
216 |
- strcpy(newcurrent, line); |
|
217 |
- return newline; |
|
422 |
+ while (*ptr && isspace(*ptr)) { |
|
423 |
+ ptr++; |
|
218 | 424 |
} |
219 |
- strncpy(newcurrent, line, linepos-line); |
|
220 |
- newcurrent += linepos-line; |
|
221 |
- |
|
222 |
- if (!linepos[1] || !linepos[2]) { |
|
223 |
- *newcurrent = '&'; |
|
224 |
- newcurrent++; |
|
225 |
- line = linepos+1; |
|
226 |
- continue; |
|
227 |
- } |
|
228 |
- switch (linepos[1]) { |
|
229 |
- case '#': |
|
230 |
- count = decode_html_char_ref(linepos+2, |
|
231 |
- newcurrent); |
|
232 |
- if (count > 0) { |
|
233 |
- newcurrent++; |
|
234 |
- linepos += count+2; |
|
235 |
- } else { |
|
236 |
- *newcurrent = '&'; |
|
237 |
- newcurrent++; |
|
238 |
- linepos++; |
|
425 |
+ while (*ptr) { |
|
426 |
+ if (*ptr == '\n') { |
|
427 |
+ if (state == HTML_COMMENT) { |
|
428 |
+ html_output_c(&file_buff_o1, NULL, ' '); |
|
429 |
+ } else if ((state != HTML_SKIP_WS) && |
|
430 |
+ (state != HTML_TRIM_WS) && |
|
431 |
+ (state != HTML_PROCESS_TAG)) { |
|
432 |
+ html_output_c(&file_buff_o1, &file_buff_o2, ' '); |
|
433 |
+ } |
|
434 |
+ ptr++; |
|
435 |
+ continue; |
|
436 |
+ } |
|
437 |
+ if (*ptr == '\r') { |
|
438 |
+ ptr++; |
|
439 |
+ continue; |
|
440 |
+ } |
|
441 |
+ switch (state) { |
|
442 |
+ case HTML_BAD_STATE: |
|
443 |
+ /* An engine error has occurred */ |
|
444 |
+ cli_dbgmsg("HTML Engine Error\n"); |
|
445 |
+ goto abort; |
|
446 |
+ case HTML_SKIP_LENGTH: |
|
447 |
+ length--; |
|
448 |
+ ptr++; |
|
449 |
+ if (!length) { |
|
450 |
+ state = next_state; |
|
451 |
+ } |
|
452 |
+ break; |
|
453 |
+ case HTML_SKIP_WS: |
|
454 |
+ if (isspace(*ptr)) { |
|
455 |
+ ptr++; |
|
456 |
+ } else { |
|
457 |
+ state = next_state; |
|
458 |
+ next_state = HTML_BAD_STATE; |
|
459 |
+ } |
|
460 |
+ break; |
|
461 |
+ case HTML_TRIM_WS: |
|
462 |
+ if (isspace(*ptr)) { |
|
463 |
+ ptr++; |
|
464 |
+ } else { |
|
465 |
+ html_output_c(&file_buff_o1, &file_buff_o2, ' '); |
|
466 |
+ state = next_state; |
|
467 |
+ next_state = HTML_BAD_STATE; |
|
468 |
+ } |
|
469 |
+ break; |
|
470 |
+ case HTML_NORM: |
|
471 |
+ if (*ptr == '<') { |
|
472 |
+ html_output_c(&file_buff_o1, &file_buff_o2, '<'); |
|
473 |
+ if (in_script) { |
|
474 |
+ html_output_c(&file_buff_script, NULL, '<'); |
|
475 |
+ } |
|
476 |
+ ptr++; |
|
477 |
+ state = HTML_SKIP_WS; |
|
478 |
+ tag_length=0; |
|
479 |
+ next_state = HTML_TAG; |
|
480 |
+ } else if (isspace(*ptr)) { |
|
481 |
+ state = HTML_TRIM_WS; |
|
482 |
+ next_state = HTML_NORM; |
|
483 |
+ } else if (*ptr == '&') { |
|
484 |
+ state = HTML_CHAR_REF; |
|
485 |
+ next_state = HTML_NORM; |
|
486 |
+ ptr++; |
|
487 |
+ } else { |
|
488 |
+ html_output_c(&file_buff_o1, &file_buff_o2, tolower(*ptr)); |
|
489 |
+ if (in_script) { |
|
490 |
+ html_output_c(&file_buff_script, NULL, tolower(*ptr)); |
|
491 |
+ } |
|
492 |
+ ptr++; |
|
493 |
+ } |
|
494 |
+ break; |
|
495 |
+ case HTML_TAG: |
|
496 |
+ if ((tag_length == 0) && (*ptr == '!')) { |
|
497 |
+ /* Comment */ |
|
498 |
+ html_output_c(&file_buff_o1, NULL, '!'); |
|
499 |
+ if (in_script) { |
|
500 |
+ html_output_c(&file_buff_script, NULL, '!'); |
|
501 |
+ } |
|
502 |
+ /* Need to rewind in the no-comment output stream */ |
|
503 |
+ if (file_buff_o2.length > 0) { |
|
504 |
+ file_buff_o2.length--; |
|
505 |
+ } |
|
506 |
+ state = HTML_COMMENT; |
|
507 |
+ next_state = HTML_BAD_STATE; |
|
508 |
+ ptr++; |
|
509 |
+ } else if (*ptr == '>') { |
|
510 |
+ html_output_c(&file_buff_o1, &file_buff_o2, '>'); |
|
511 |
+ if (in_script) { |
|
512 |
+ html_output_c(&file_buff_script, NULL, '>'); |
|
513 |
+ } |
|
514 |
+ ptr++; |
|
515 |
+ tag[tag_length] = '\0'; |
|
516 |
+ state = HTML_SKIP_WS; |
|
517 |
+ next_state = HTML_PROCESS_TAG; |
|
518 |
+ } else if (!isspace(*ptr)) { |
|
519 |
+ html_output_c(&file_buff_o1, &file_buff_o2, tolower(*ptr)); |
|
520 |
+ if (in_script) { |
|
521 |
+ html_output_c(&file_buff_script, NULL, tolower(*ptr)); |
|
522 |
+ } |
|
523 |
+ if (tag_length < HTML_STR_LENGTH) { |
|
524 |
+ tag[tag_length++] = tolower(*ptr); |
|
525 |
+ } |
|
526 |
+ ptr++; |
|
527 |
+ } else { |
|
528 |
+ tag[tag_length] = '\0'; |
|
529 |
+ state = HTML_SKIP_WS; |
|
530 |
+ tag_arg_length = 0; |
|
531 |
+ next_state = HTML_TAG_ARG; |
|
532 |
+ } |
|
533 |
+ break; |
|
534 |
+ case HTML_TAG_ARG: |
|
535 |
+ if (*ptr == '=') { |
|
536 |
+ html_output_c(&file_buff_o1, &file_buff_o2, '='); |
|
537 |
+ tag_arg[tag_arg_length] = '\0'; |
|
538 |
+ ptr++; |
|
539 |
+ state = HTML_SKIP_WS; |
|
540 |
+ escape = FALSE; |
|
541 |
+ quoted = NOT_QUOTED; |
|
542 |
+ tag_val_length = 0; |
|
543 |
+ next_state = HTML_TAG_ARG_VAL; |
|
544 |
+ } else if (isspace(*ptr)) { |
|
545 |
+ ptr++; |
|
546 |
+ tag_arg[tag_arg_length] = '\0'; |
|
547 |
+ state = HTML_SKIP_WS; |
|
548 |
+ next_state = HTML_TAG_ARG_EQUAL; |
|
549 |
+ } else if (*ptr == '>') { |
|
550 |
+ html_output_c(&file_buff_o1, &file_buff_o2, '>'); |
|
551 |
+ if (tag_arg_length > 0) { |
|
552 |
+ tag_arg[tag_arg_length] = '\0'; |
|
553 |
+ html_tag_arg_add(&tag_args, tag_arg, NULL); |
|
554 |
+ } |
|
555 |
+ ptr++; |
|
556 |
+ state = HTML_PROCESS_TAG; |
|
557 |
+ next_state = HTML_BAD_STATE; |
|
558 |
+ } else { |
|
559 |
+ if (tag_arg_length == 0) { |
|
560 |
+ /* Start of new tag - add space */ |
|
561 |
+ html_output_c(&file_buff_o1, &file_buff_o2,' '); |
|
562 |
+ } |
|
563 |
+ html_output_c(&file_buff_o1, &file_buff_o2, tolower(*ptr)); |
|
564 |
+ if (tag_arg_length < HTML_STR_LENGTH) { |
|
565 |
+ tag_arg[tag_arg_length++] = tolower(*ptr); |
|
566 |
+ } |
|
567 |
+ ptr++; |
|
568 |
+ } |
|
569 |
+ break; |
|
570 |
+ case HTML_TAG_ARG_EQUAL: |
|
571 |
+ if (*ptr == '=') { |
|
572 |
+ html_output_c(&file_buff_o1, &file_buff_o2, '='); |
|
573 |
+ ptr++; |
|
574 |
+ state = HTML_SKIP_WS; |
|
575 |
+ escape = FALSE; |
|
576 |
+ quoted = NOT_QUOTED; |
|
577 |
+ tag_val_length = 0; |
|
578 |
+ next_state = HTML_TAG_ARG_VAL; |
|
579 |
+ } else { |
|
580 |
+ if (tag_arg_length > 0) { |
|
581 |
+ tag_arg[tag_arg_length] = '\0'; |
|
582 |
+ html_tag_arg_add(&tag_args, tag_arg, NULL); |
|
583 |
+ } |
|
584 |
+ tag_arg_length=0; |
|
585 |
+ state = HTML_TAG_ARG; |
|
586 |
+ next_state = HTML_BAD_STATE; |
|
587 |
+ } |
|
588 |
+ break; |
|
589 |
+ case HTML_TAG_ARG_VAL: |
|
590 |
+ if (*ptr == '&') { |
|
591 |
+ state = HTML_CHAR_REF; |
|
592 |
+ next_state = HTML_TAG_ARG_VAL; |
|
593 |
+ ptr++; |
|
594 |
+ } else if (*ptr == '\'') { |
|
595 |
+ if (tag_val_length == 0) { |
|
596 |
+ quoted = SINGLE_QUOTED; |
|
597 |
+ html_output_c(&file_buff_o1, &file_buff_o2, '"'); |
|
598 |
+ if (tag_val_length < HTML_STR_LENGTH) { |
|
599 |
+ tag_val[tag_val_length++] = '"'; |
|
600 |
+ } |
|
601 |
+ ptr++; |
|
602 |
+ } else { |
|
603 |
+ if (!escape && (quoted==SINGLE_QUOTED)) { |
|
604 |
+ html_output_c(&file_buff_o1, &file_buff_o2, '"'); |
|
605 |
+ if (tag_val_length < HTML_STR_LENGTH) { |
|
606 |
+ tag_val[tag_val_length++] = '"'; |
|
607 |
+ } |
|
608 |
+ tag_val[tag_val_length] = '\0'; |
|
609 |
+ html_tag_arg_add(&tag_args, tag_arg, tag_val); |
|
610 |
+ ptr++; |
|
611 |
+ state = HTML_SKIP_WS; |
|
612 |
+ tag_arg_length=0; |
|
613 |
+ next_state = HTML_TAG_ARG; |
|
614 |
+ } else { |
|
615 |
+ html_output_c(&file_buff_o1, &file_buff_o2, '"'); |
|
616 |
+ if (tag_val_length < HTML_STR_LENGTH) { |
|
617 |
+ tag_val[tag_val_length++] = '"'; |
|
618 |
+ } |
|
619 |
+ ptr++; |
|
620 |
+ } |
|
621 |
+ } |
|
622 |
+ } else if (*ptr == '"') { |
|
623 |
+ if (tag_val_length == 0) { |
|
624 |
+ quoted = DOUBLE_QUOTED; |
|
625 |
+ html_output_c(&file_buff_o1, &file_buff_o2, '"'); |
|
626 |
+ if (tag_val_length < HTML_STR_LENGTH) { |
|
627 |
+ tag_val[tag_val_length++] = '"'; |
|
628 |
+ } |
|
629 |
+ ptr++; |
|
630 |
+ } else { |
|
631 |
+ if (!escape && (quoted==DOUBLE_QUOTED)) { |
|
632 |
+ html_output_c(&file_buff_o1, &file_buff_o2, '"'); |
|
633 |
+ if (tag_val_length < HTML_STR_LENGTH) { |
|
634 |
+ tag_val[tag_val_length++] = '"'; |
|
635 |
+ } |
|
636 |
+ tag_val[tag_val_length] = '\0'; |
|
637 |
+ html_tag_arg_add(&tag_args, tag_arg, tag_val); |
|
638 |
+ ptr++; |
|
639 |
+ state = HTML_SKIP_WS; |
|
640 |
+ tag_arg_length=0; |
|
641 |
+ next_state = HTML_TAG_ARG; |
|
642 |
+ } else { |
|
643 |
+ html_output_c(&file_buff_o1, &file_buff_o2, '"'); |
|
644 |
+ if (tag_val_length < HTML_STR_LENGTH) { |
|
645 |
+ tag_val[tag_val_length++] = '"'; |
|
646 |
+ } |
|
647 |
+ ptr++; |
|
648 |
+ } |
|
649 |
+ } |
|
650 |
+ } else if (isspace(*ptr) || (*ptr == '>')) { |
|
651 |
+ if (quoted == NOT_QUOTED) { |
|
652 |
+ tag_val[tag_val_length] = '\0'; |
|
653 |
+ html_tag_arg_add(&tag_args, tag_arg, tag_val); |
|
654 |
+ state = HTML_SKIP_WS; |
|
655 |
+ tag_arg_length=0; |
|
656 |
+ next_state = HTML_TAG_ARG; |
|
657 |
+ } else { |
|
658 |
+ html_output_c(&file_buff_o1, &file_buff_o2, *ptr); |
|
659 |
+ if (tag_val_length < HTML_STR_LENGTH) { |
|
660 |
+ if (isspace(*ptr)) { |
|
661 |
+ tag_val[tag_val_length++] = ' '; |
|
662 |
+ } else { |
|
663 |
+ tag_val[tag_val_length++] = '>'; |
|
664 |
+ } |
|
665 |
+ } |
|
666 |
+ state = HTML_SKIP_WS; |
|
667 |
+ escape = FALSE; |
|
668 |
+ quoted = NOT_QUOTED; |
|
669 |
+ next_state = HTML_TAG_ARG_VAL; |
|
670 |
+ ptr++; |
|
671 |
+ } |
|
672 |
+ } else { |
|
673 |
+ html_output_c(&file_buff_o1, &file_buff_o2, tolower(*ptr)); |
|
674 |
+ if (tag_val_length < HTML_STR_LENGTH) { |
|
675 |
+ tag_val[tag_val_length++] = tolower(*ptr); |
|
676 |
+ } |
|
677 |
+ ptr++; |
|
678 |
+ } |
|
679 |
+ |
|
680 |
+ if (*ptr == '\\') { |
|
681 |
+ escape = TRUE; |
|
682 |
+ } else { |
|
683 |
+ escape = FALSE; |
|
684 |
+ } |
|
685 |
+ break; |
|
686 |
+ case HTML_COMMENT: |
|
687 |
+ html_output_c(&file_buff_o1, NULL, tolower(*ptr)); |
|
688 |
+ if (in_script) { |
|
689 |
+ html_output_c(&file_buff_script, NULL, tolower(*ptr)); |
|
690 |
+ } |
|
691 |
+ if (*ptr == '>') { |
|
692 |
+ state = HTML_SKIP_WS; |
|
693 |
+ next_state = HTML_NORM; |
|
694 |
+ } |
|
695 |
+ ptr++; |
|
696 |
+ break; |
|
697 |
+ case HTML_PROCESS_TAG: |
|
698 |
+ |
|
699 |
+ /* Default to no action for this tag */ |
|
700 |
+ state = HTML_SKIP_WS; |
|
701 |
+ next_state = HTML_NORM; |
|
702 |
+ if (tag[0] == '/') { |
|
703 |
+ /* End tag */ |
|
704 |
+ state = HTML_SKIP_WS; |
|
705 |
+ next_state = HTML_NORM; |
|
706 |
+ if (strcmp(tag, "/script") == 0) { |
|
707 |
+ in_script=FALSE; |
|
708 |
+ html_output_c(&file_buff_script, NULL, '\n'); |
|
709 |
+ } |
|
710 |
+ } else if (strcmp(tag, "script") == 0) { |
|
711 |
+ arg_value = html_tag_arg_value(&tag_args, "language"); |
|
712 |
+ if (arg_value && (strcmp(arg_value, "jscript.encode") == 0)) { |
|
713 |
+ html_tag_arg_set(&tag_args, "language", "javascript"); |
|
714 |
+ state = HTML_SKIP_WS; |
|
715 |
+ next_state = HTML_JSDECODE; |
|
716 |
+ } else if (arg_value && (strcmp(arg_value, "vbscript.encode") == 0)) { |
|
717 |
+ html_tag_arg_set(&tag_args, "language", "vbscript"); |
|
718 |
+ state = HTML_SKIP_WS; |
|
719 |
+ next_state = HTML_JSDECODE; |
|
720 |
+ } else { |
|
721 |
+ in_script = TRUE; |
|
722 |
+ } |
|
723 |
+ html_output_tag(&file_buff_script, tag, &tag_args); |
|
724 |
+ } else if (hrefs && strcmp(tag, "a") == 0) { |
|
725 |
+ arg_value = html_tag_arg_value(&tag_args, "href"); |
|
726 |
+ if (strlen(arg_value) > 0) { |
|
727 |
+ html_tag_arg_add(hrefs, "href", arg_value); |
|
728 |
+ } |
|
729 |
+ } |
|
730 |
+ html_tag_arg_free(&tag_args); |
|
731 |
+ break; |
|
732 |
+ case HTML_CHAR_REF: |
|
733 |
+ if (*ptr == '#') { |
|
734 |
+ value = 0; |
|
735 |
+ hex = FALSE; |
|
736 |
+ state = HTML_CHAR_REF_DECODE; |
|
737 |
+ ptr++; |
|
738 |
+ } else { |
|
739 |
+ html_output_c(&file_buff_o1, &file_buff_o2, '&'); |
|
740 |
+ state = next_state; |
|
741 |
+ next_state = HTML_BAD_STATE; |
|
742 |
+ } |
|
743 |
+ break; |
|
744 |
+ case HTML_CHAR_REF_DECODE: |
|
745 |
+ if ((value==0) && ((*ptr == 'x') || (*ptr == 'X'))) { |
|
746 |
+ hex=TRUE; |
|
747 |
+ ptr++; |
|
748 |
+ } else if (*ptr == ';') { |
|
749 |
+ html_output_c(&file_buff_o1, &file_buff_o2, value); |
|
750 |
+ state = next_state; |
|
751 |
+ next_state = HTML_BAD_STATE; |
|
752 |
+ ptr++; |
|
753 |
+ } else if (isdigit(*ptr) || (hex && isxdigit(*ptr))) { |
|
754 |
+ if (hex) { |
|
755 |
+ value *= 16; |
|
756 |
+ } else { |
|
757 |
+ value *= 10; |
|
758 |
+ } |
|
759 |
+ if (isdigit(*ptr)) { |
|
760 |
+ value += (*ptr - '0'); |
|
761 |
+ } else { |
|
762 |
+ value += (tolower(*ptr) - 'a' + 10); |
|
763 |
+ } |
|
764 |
+ ptr++; |
|
765 |
+ } else { |
|
766 |
+ html_output_c(&file_buff_o1, &file_buff_o2, value); |
|
767 |
+ state = next_state; |
|
768 |
+ next_state = HTML_BAD_STATE; |
|
769 |
+ } |
|
770 |
+ break; |
|
771 |
+ case HTML_JSDECODE: |
|
772 |
+ /* Check for start marker */ |
|
773 |
+ if (strncmp(ptr, "#@~^", 4) == 0) { |
|
774 |
+ ptr += 4; |
|
775 |
+ state = HTML_JSDECODE_LENGTH; |
|
776 |
+ next_state = HTML_BAD_STATE; |
|
777 |
+ } else { |
|
778 |
+ html_output_c(&file_buff_o1, &file_buff_o2, tolower(*ptr)); |
|
779 |
+ html_output_c(&file_buff_script, NULL, tolower(*ptr)); |
|
780 |
+ ptr++; |
|
781 |
+ } |
|
782 |
+ break; |
|
783 |
+ case HTML_JSDECODE_LENGTH: |
|
784 |
+ if (strlen(ptr) < 8) { |
|
785 |
+ state = HTML_NORM; |
|
786 |
+ next_state = HTML_BAD_STATE; |
|
787 |
+ break; |
|
788 |
+ } |
|
789 |
+ length = base64_chars[ptr[0]] << 2; |
|
790 |
+ length += base64_chars[ptr[1]] >> 4; |
|
791 |
+ length += (base64_chars[ptr[1]] & 0x0f) << 12; |
|
792 |
+ length += (base64_chars[ptr[2]] >> 2) << 8; |
|
793 |
+ length += (base64_chars[ptr[2]] & 0x03) << 22; |
|
794 |
+ length += base64_chars[ptr[3]] << 16; |
|
795 |
+ length += (base64_chars[ptr[4]] << 2) << 24; |
|
796 |
+ length += (base64_chars[ptr[5]] >> 4) << 24; |
|
797 |
+ table_pos = 0; |
|
798 |
+ state = HTML_JSDECODE_DECRYPT; |
|
799 |
+ next_state = HTML_BAD_STATE; |
|
800 |
+ ptr += 8; |
|
801 |
+ break; |
|
802 |
+ case HTML_JSDECODE_DECRYPT: |
|
803 |
+ if (length == 0) { |
|
804 |
+ html_output_str(&file_buff_script, "</script>\n", 10); |
|
805 |
+ length = 12; |
|
806 |
+ state = HTML_SKIP_LENGTH; |
|
807 |
+ next_state = HTML_NORM; |
|
808 |
+ break; |
|
809 |
+ } |
|
810 |
+ if (*ptr < 0x80) { |
|
811 |
+ value = decrypt_tables[table_order[table_pos]][*ptr]; |
|
812 |
+ if (value == 0xFF) { /* special character */ |
|
813 |
+ ptr++; |
|
814 |
+ length--; |
|
815 |
+ switch (*ptr) { |
|
816 |
+ case '\0': |
|
817 |
+ /* Fixup for end of line */ |
|
818 |
+ ptr--; |
|
819 |
+ break; |
|
820 |
+ case 0x21: |
|
821 |
+ html_output_c(&file_buff_o1, &file_buff_o2, 0x3c); |
|
822 |
+ html_output_c(&file_buff_script, NULL, 0x3c); |
|
823 |
+ break; |
|
824 |
+ case 0x23: |
|
825 |
+ html_output_c(&file_buff_o1, &file_buff_o2, 0x0d); |
|
826 |
+ html_output_c(&file_buff_script, NULL, 0x0d); |
|
827 |
+ break; |
|
828 |
+ case 0x24: |
|
829 |
+ html_output_c(&file_buff_o1, &file_buff_o2, 0x40); |
|
830 |
+ html_output_c(&file_buff_script, NULL, 0x40); |
|
831 |
+ break; |
|
832 |
+ case 0x26: |
|
833 |
+ html_output_c(&file_buff_o1, &file_buff_o2, 0x0a); |
|
834 |
+ html_output_c(&file_buff_script, NULL, 0x0a); |
|
835 |
+ break; |
|
836 |
+ case 0x2a: |
|
837 |
+ html_output_c(&file_buff_o1, &file_buff_o2, 0x3e); |
|
838 |
+ html_output_c(&file_buff_script, NULL, 0x3e); |
|
839 |
+ break; |
|
840 |
+ } |
|
841 |
+ } else { |
|
842 |
+ html_output_c(&file_buff_o1, &file_buff_o2, value); |
|
843 |
+ html_output_c(&file_buff_script, NULL, tolower(value)); |
|
844 |
+ } |
|
845 |
+ } |
|
846 |
+ table_pos = (table_pos + 1) % 64; |
|
847 |
+ ptr++; |
|
848 |
+ length--; |
|
849 |
+ break; |
|
239 | 850 |
} |
240 |
- break; |
|
241 |
- /* TODO: character entities, & etc. */ |
|
242 |
- default: |
|
243 |
- *newcurrent = '&'; |
|
244 |
- newcurrent++; |
|
245 |
- linepos++; |
|
246 | 851 |
} |
247 |
- line = linepos; |
|
852 |
+ free(line); |
|
853 |
+ ptr = line = cli_readline(stream_in, m_area, 8192); |
|
854 |
+ } |
|
855 |
+ |
|
856 |
+ retval = TRUE; |
|
857 |
+abort: |
|
858 |
+ html_tag_arg_free(&tag_args); |
|
859 |
+ if (!m_area) { |
|
860 |
+ fclose(stream_in); |
|
248 | 861 |
} |
249 |
- *newcurrent = '\0'; |
|
250 |
- return newline; |
|
862 |
+ html_output_flush(&file_buff_o1); |
|
863 |
+ html_output_flush(&file_buff_o2); |
|
864 |
+ html_output_flush(&file_buff_script); |
|
865 |
+ close(file_buff_o1.fd); |
|
866 |
+ close(file_buff_o2.fd); |
|
867 |
+ close(file_buff_script.fd); |
|
868 |
+ return retval; |
|
869 |
+} |
|
870 |
+ |
|
871 |
+int html_normalise_mem(unsigned char *in_buff, off_t in_size, const char *dirname, tag_arguments_t *hrefs) |
|
872 |
+{ |
|
873 |
+ m_area_t m_area; |
|
874 |
+ |
|
875 |
+ m_area.buffer = in_buff; |
|
876 |
+ m_area.length = in_size; |
|
877 |
+ m_area.offset = 0; |
|
878 |
+ |
|
879 |
+ cli_html_normalise(-1, &m_area, dirname, hrefs); |
|
251 | 880 |
} |
252 | 881 |
|
253 |
-int char2hex(unsigned char c) |
|
882 |
+int html_normalise_fd(int fd, const char *dirname, tag_arguments_t *hrefs) |
|
254 | 883 |
{ |
255 |
- if ((c-'0') <= 9) { |
|
256 |
- return (c-'0'); |
|
257 |
- } else if ((c-'A') <= 5) { |
|
258 |
- return (c-'A'+10); |
|
884 |
+#if HAVE_MMAP |
|
885 |
+ int retval=FALSE; |
|
886 |
+ m_area_t m_area; |
|
887 |
+ struct stat statbuf; |
|
888 |
+ |
|
889 |
+ if (fstat(fd, &statbuf) == 0) { |
|
890 |
+ m_area.length = statbuf.st_size; |
|
891 |
+ m_area.buffer = (unsigned char *) mmap(NULL, m_area.length, PROT_READ, MAP_PRIVATE, fd, 0); |
|
892 |
+ m_area.offset = 0; |
|
893 |
+ if (m_area.buffer == MAP_FAILED) { |
|
894 |
+ cli_dbgmsg("mmap HTML failed\n"); |
|
895 |
+ retval = cli_html_normalise(fd, NULL, dirname, hrefs); |
|
896 |
+ } else { |
|
897 |
+ cli_dbgmsg("mmap'ed file\n"); |
|
898 |
+ retval = cli_html_normalise(-1, &m_area, dirname, hrefs); |
|
899 |
+ munmap(m_area.buffer, m_area.length); |
|
900 |
+ } |
|
901 |
+ } else { |
|
902 |
+ cli_dbgmsg("fstat HTML failed\n"); |
|
903 |
+ retval = cli_html_normalise(fd, NULL, dirname, hrefs); |
|
259 | 904 |
} |
260 |
- return (c-'a'+10); |
|
905 |
+ return retval; |
|
906 |
+#else |
|
907 |
+ return cli_html_normalise(fd, NULL, dirname, hrefs); |
|
908 |
+#endif |
|
261 | 909 |
} |
262 | 910 |
|
263 |
-char *quoted_decode(unsigned char *line, off_t in_size) |
|
911 |
+int html_screnc_decode(int fd, const char *dirname) |
|
264 | 912 |
{ |
265 |
- unsigned char *newline, *newcurrent, *line_end; |
|
913 |
+ int fd_tmp, table_pos=0, result, count, state, retval=FALSE; |
|
914 |
+ unsigned char *line, tmpstr[6]; |
|
915 |
+ unsigned long length; |
|
916 |
+ unsigned char *ptr, filename[1024]; |
|
917 |
+ FILE *stream_in; |
|
918 |
+ file_buff_t file_buff; |
|
266 | 919 |
|
267 |
- newcurrent = newline = (unsigned char *) cli_malloc(in_size + 1); |
|
268 |
- if (!newline) { |
|
269 |
- return NULL; |
|
920 |
+ lseek(fd, 0, SEEK_SET); |
|
921 |
+ fd_tmp = dup(fd); |
|
922 |
+ if (fd_tmp < 0) { |
|
923 |
+ return FALSE; |
|
924 |
+ } |
|
925 |
+ stream_in = fdopen(fd_tmp, "r"); |
|
926 |
+ if (!stream_in) { |
|
927 |
+ close(fd_tmp); |
|
928 |
+ return FALSE; |
|
929 |
+ } |
|
930 |
+ |
|
931 |
+ snprintf(filename, 1024, "%s/screnc.html", dirname); |
|
932 |
+ file_buff.fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, S_IRWXU); |
|
933 |
+ if (!file_buff.fd) { |
|
934 |
+ cli_dbgmsg("open failed: %s\n", filename); |
|
935 |
+ fclose(stream_in); |
|
936 |
+ return FALSE; |
|
270 | 937 |
} |
271 | 938 |
|
272 |
- line_end = line+in_size; |
|
273 |
- while (line <= line_end) { |
|
274 |
- while ((line < line_end) && *line != '=') { |
|
275 |
- *newcurrent = *line; |
|
276 |
- line++; |
|
277 |
- newcurrent++; |
|
939 |
+ while ((line = cli_readline(stream_in, NULL, 8192)) != NULL) { |
|
940 |
+ ptr = strstr(line, "#@~^"); |
|
941 |
+ if (ptr) { |
|
942 |
+ break; |
|
278 | 943 |
} |
279 |
- if ((line < line_end) && isspace(line[1])) { |
|
280 |
- line++; |
|
281 |
- while ((line < line_end) && isspace(*line)) { |
|
282 |
- line++; |
|
944 |
+ free(line); |
|
945 |
+ } |
|
946 |
+ if (!line) { |
|
947 |
+ goto abort; |
|
948 |
+ } |
|
949 |
+ |
|
950 |
+ /* Calculate the length of the encoded string */ |
|
951 |
+ ptr += 4; |
|
952 |
+ count = 0; |
|
953 |
+ do { |
|
954 |
+ if (! *ptr) { |
|
955 |
+ free(line); |
|
956 |
+ ptr = line = cli_readline(stream_in, NULL, 8192); |
|
957 |
+ if (!line) { |
|
958 |
+ goto abort; |
|
283 | 959 |
} |
284 |
- continue; |
|
285 | 960 |
} |
286 |
- if ((line+2) <= line_end) { |
|
287 |
- if (isxdigit(line[1]) && isxdigit(line[2])) { |
|
288 |
- *newcurrent = (char2hex(line[1]) * 16) + |
|
289 |
- char2hex(line[2]); |
|
290 |
- newcurrent++; |
|
291 |
- line += 3; |
|
961 |
+ tmpstr[count++] = *ptr; |
|
962 |
+ ptr++; |
|
963 |
+ } while (count < 6); |
|
964 |
+ |
|
965 |
+ length = base64_chars[tmpstr[0]] << 2; |
|
966 |
+ length += base64_chars[tmpstr[1]] >> 4; |
|
967 |
+ length += (base64_chars[tmpstr[1]] & 0x0f) << 12; |
|
968 |
+ length += (base64_chars[tmpstr[2]] >> 2) << 8; |
|
969 |
+ length += (base64_chars[tmpstr[2]] & 0x03) << 22; |
|
970 |
+ length += base64_chars[tmpstr[3]] << 16; |
|
971 |
+ length += (base64_chars[tmpstr[4]] << 2) << 24; |
|
972 |
+ length += (base64_chars[tmpstr[5]] >> 4) << 24; |
|
973 |
+ |
|
974 |
+ /* Move forward 2 bytes */ |
|
975 |
+ count = 2; |
|
976 |
+ state = HTML_SKIP_LENGTH; |
|
977 |
+ |
|
978 |
+ while (length && line) { |
|
979 |
+ while (length && *ptr) { |
|
980 |
+ if ((*ptr == '\n') || (*ptr == '\r')) { |
|
981 |
+ ptr++; |
|
292 | 982 |
continue; |
293 | 983 |
} |
984 |
+ switch (state) { |
|
985 |
+ case HTML_SKIP_LENGTH: |
|
986 |
+ ptr++; |
|
987 |
+ count--; |
|
988 |
+ if (count == 0) { |
|
989 |
+ state = HTML_NORM; |
|
990 |
+ } |
|
991 |
+ break; |
|
992 |
+ case HTML_SPECIAL_CHAR: |
|
993 |
+ switch (*ptr) { |
|
994 |
+ case 0x21: |
|
995 |
+ html_output_c(&file_buff, NULL, 0x3c); |
|
996 |
+ break; |
|
997 |
+ case 0x23: |
|
998 |
+ html_output_c(&file_buff, NULL, 0x0d); |
|
999 |
+ break; |
|
1000 |
+ case 0x24: |
|
1001 |
+ html_output_c(&file_buff, NULL, 0x40); |
|
1002 |
+ break; |
|
1003 |
+ case 0x26: |
|
1004 |
+ html_output_c(&file_buff, NULL, 0x0a); |
|
1005 |
+ break; |
|
1006 |
+ case 0x2a: |
|
1007 |
+ html_output_c(&file_buff, NULL, 0x3e); |
|
1008 |
+ break; |
|
1009 |
+ } |
|
1010 |
+ ptr++; |
|
1011 |
+ length--; |
|
1012 |
+ state = HTML_NORM; |
|
1013 |
+ break; |
|
1014 |
+ case HTML_NORM: |
|
1015 |
+ if (*ptr < 0x80) { |
|
1016 |
+ result = decrypt_tables[table_order[table_pos]][*ptr]; |
|
1017 |
+ if (result == 0xFF) { /* special character */ |
|
1018 |
+ state = HTML_SPECIAL_CHAR; |
|
1019 |
+ } else { |
|
1020 |
+ html_output_c(&file_buff, NULL, (char)result); |
|
1021 |
+ } |
|
1022 |
+ } |
|
1023 |
+ ptr++; |
|
1024 |
+ length--; |
|
1025 |
+ table_pos = (table_pos + 1) % 64; |
|
1026 |
+ break; |
|
1027 |
+ } |
|
1028 |
+ } |
|
1029 |
+ free(line); |
|
1030 |
+ if (length) { |
|
1031 |
+ ptr = line = cli_readline(stream_in, NULL, 8192); |
|
294 | 1032 |
} |
295 |
- line++; |
|
296 | 1033 |
} |
297 |
- *newcurrent = '\0'; |
|
298 |
- return newline; |
|
1034 |
+ retval = TRUE; |
|
1035 |
+ |
|
1036 |
+abort: |
|
1037 |
+ fclose(stream_in); |
|
1038 |
+ html_output_flush(&file_buff); |
|
1039 |
+ close(file_buff.fd); |
|
1040 |
+ return retval; |
|
299 | 1041 |
} |
... | ... |
@@ -15,15 +15,21 @@ |
15 | 15 |
* along with this program; if not, write to the Free Software |
16 | 16 |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
17 | 17 |
*/ |
18 |
- |
|
18 |
+ |
|
19 | 19 |
#ifndef __HTMLNORM_H |
20 | 20 |
#define __HTMLNORM_H |
21 | 21 |
|
22 |
-#include <sys/types.h> |
|
23 |
- |
|
24 |
-unsigned char *html_normalize(unsigned char *in_buff, off_t in_size); |
|
25 |
-unsigned char *remove_html_comments(unsigned char *line); |
|
26 |
-unsigned char *remove_html_char_ref(unsigned char *line); |
|
27 |
-char *quoted_decode(unsigned char *line, off_t in_size); |
|
22 |
+typedef struct tag_arguments_tag |
|
23 |
+{ |
|
24 |
+ int count; |
|
25 |
+ unsigned char **tag; |
|
26 |
+ unsigned char **value; |
|
27 |
+} tag_arguments_t; |
|
28 | 28 |
|
29 |
+int html_normalise_mem(unsigned char *in_buff, off_t in_size, const char *dirname, tag_arguments_t *hrefs); |
|
30 |
+int html_normalise_fd(int fd, const char *dirname, tag_arguments_t *hrefs); |
|
31 |
+void html_tag_arg_free(tag_arguments_t *tags); |
|
32 |
+int html_screnc_decode(int fd, const char *dirname); |
|
33 |
+ |
|
29 | 34 |
#endif |
35 |
+ |
... | ... |
@@ -680,55 +680,58 @@ static int cli_scanmscab(int desc, const char **virname, long int *scanned, cons |
680 | 680 |
|
681 | 681 |
static int cli_scanhtml(int desc, const char **virname, long int *scanned, const struct cl_node *root, const struct cl_limits *limits, int options, int *arec, int *mrec) |
682 | 682 |
{ |
683 |
- unsigned char *membuff, *newbuff, *newbuff2; |
|
684 |
- struct stat statbuf; |
|
685 |
- int ret; |
|
683 |
+ char *tempname, fullname[1024]; |
|
684 |
+ const char *tmpdir; |
|
685 |
+ int ret=CL_CLEAN, fd; |
|
686 | 686 |
|
687 | 687 |
|
688 |
-#ifdef HAVE_MMAP |
|
689 | 688 |
cli_dbgmsg("in cli_scanhtml()\n"); |
690 | 689 |
|
691 |
- if(fstat(desc, &statbuf) != 0) { |
|
692 |
- cli_dbgmsg("HTML: Can't stat descriptor %d\n", desc); |
|
693 |
- return CL_EIO; |
|
690 |
+ if((tmpdir = getenv("TMPDIR")) == NULL) |
|
691 |
+#ifdef P_tmpdir |
|
692 |
+ tmpdir = P_tmpdir; |
|
693 |
+#else |
|
694 |
+ tmpdir = "/tmp"; |
|
695 |
+#endif |
|
696 |
+ |
|
697 |
+ tempname = cli_gentemp(tmpdir); |
|
698 |
+ |
|
699 |
+ if(mkdir(tempname, 0700)) { |
|
700 |
+ cli_dbgmsg("ScanHTML -> Can't create temporary directory %s\n", tempname); |
|
701 |
+ return CL_ETMPDIR; |
|
694 | 702 |
} |
695 | 703 |
|
696 |
- if(limits && limits->maxfilesize && (statbuf.st_size > limits->maxfilesize)) { |
|
697 |
- cli_dbgmsg("HTML: Size exceeded (%d, max: %ld)\n", statbuf.st_size, limits->maxfilesize); |
|
698 |
- return CL_CLEAN; |
|
704 |
+ html_normalise_fd(desc, tempname, NULL); |
|
705 |
+ snprintf(fullname, 1024, "%s/comment.html", tempname); |
|
706 |
+ fd = open(fullname, O_RDONLY); |
|
707 |
+ if (fd >= 0) { |
|
708 |
+ ret = cli_scandesc(fd, virname, scanned, root, 0); |
|
709 |
+ close(fd); |
|
699 | 710 |
} |
700 | 711 |
|
701 |
- membuff = mmap(NULL, statbuf.st_size, PROT_READ, MAP_PRIVATE, desc, 0); |
|
702 |
- |
|
703 |
- /* TODO: do file operations if mmap fails */ |
|
704 |
- if(membuff == MAP_FAILED) { |
|
705 |
- cli_dbgmsg("HTML: mmap failed\n"); |
|
706 |
- return CL_EMEM; |
|
712 |
+ if (ret == CL_CLEAN) { |
|
713 |
+ snprintf(fullname, 1024, "%s/nocomment.html", tempname); |
|
714 |
+ fd = open(fullname, O_RDONLY); |
|
715 |
+ if (fd >= 0) { |
|
716 |
+ ret = cli_scandesc(fd, virname, scanned, root, 0); |
|
717 |
+ close(fd); |
|
718 |
+ } |
|
707 | 719 |
} |
708 | 720 |
|
709 |
- newbuff = html_normalize(membuff, statbuf.st_size); |
|
710 |
- |
|
711 |
- if(newbuff) { |
|
712 |
- newbuff2 = remove_html_comments(newbuff); |
|
713 |
- free(newbuff); |
|
714 |
- newbuff = remove_html_char_ref(newbuff2); |
|
715 |
- free(newbuff2); |
|
716 |
- /* Normalise a second time as the above can leave inconsistent white |
|
717 |
- * space |
|
718 |
- */ |
|
719 |
- newbuff2 = html_normalize(newbuff, strlen(newbuff)); |
|
720 |
- free(newbuff); |
|
721 |
- newbuff = newbuff2; |
|
721 |
+ if (ret == CL_CLEAN) { |
|
722 |
+ snprintf(fullname, 1024, "%s/script.html", tempname); |
|
723 |
+ fd = open(fullname, O_RDONLY); |
|
724 |
+ if (fd >= 0) { |
|
725 |
+ ret = cli_scandesc(fd, virname, scanned, root, 0); |
|
726 |
+ close(fd); |
|
727 |
+ } |
|
722 | 728 |
} |
723 | 729 |
|
724 |
- ret = cl_scanbuff(newbuff, strlen(newbuff), virname, root); |
|
730 |
+ if(!cli_leavetemps_flag) |
|
731 |
+ cli_rmdirs(tempname); |
|
725 | 732 |
|
726 |
- free(newbuff); |
|
727 |
- munmap(membuff, statbuf.st_size); |
|
733 |
+ free(tempname); |
|
728 | 734 |
return ret; |
729 |
-#else /* FIXME */ |
|
730 |
- return CL_CLEAN; |
|
731 |
-#endif |
|
732 | 735 |
} |
733 | 736 |
|
734 | 737 |
static int cli_scan_mydoom_log(int desc, const char **virname, long int *scanned, const struct cl_node *root, const struct cl_limits *limits, int options, int *arec, int *mrec) |
... | ... |
@@ -1053,6 +1056,37 @@ static int cli_scanmschm(int desc, const char **virname, long int *scanned, cons |
1053 | 1053 |
return ret; |
1054 | 1054 |
} |
1055 | 1055 |
|
1056 |
+static int cli_scanscrenc(int desc, const char **virname, long int *scanned, const struct cl_node *root, const struct cl_limits *limits, int options, int *arec, int *mrec) |
|
1057 |
+{ |
|
1058 |
+ const char *tmpdir; |
|
1059 |
+ char *tempname; |
|
1060 |
+ int ret = CL_CLEAN; |
|
1061 |
+ |
|
1062 |
+ cli_dbgmsg("in cli_scanscrenc()\n"); |
|
1063 |
+ |
|
1064 |
+ if((tmpdir = getenv("TMPDIR")) == NULL) |
|
1065 |
+#ifdef P_tmpdir |
|
1066 |
+ tmpdir = P_tmpdir; |
|
1067 |
+#else |
|
1068 |
+ tmpdir = "/tmp"; |
|
1069 |
+#endif |
|
1070 |
+ |
|
1071 |
+ tempname = cli_gentemp(tmpdir); |
|
1072 |
+ |
|
1073 |
+ if(mkdir(tempname, 0700)) { |
|
1074 |
+ cli_dbgmsg("CHM: Can't create temporary directory %s\n", tempname); |
|
1075 |
+ return CL_ETMPDIR; |
|
1076 |
+ } |
|
1077 |
+ |
|
1078 |
+ if (html_screnc_decode(desc, tempname)) |
|
1079 |
+ ret = cli_scandir(tempname, virname, scanned, root, limits, options, arec, mrec); |
|
1080 |
+ |
|
1081 |
+ if(!cli_leavetemps_flag) |
|
1082 |
+ cli_rmdirs(tempname); |
|
1083 |
+ |
|
1084 |
+ free(tempname); |
|
1085 |
+ return ret; |
|
1086 |
+} |
|
1056 | 1087 |
static int cli_scanmail(int desc, const char **virname, long int *scanned, const struct cl_node *root, const struct cl_limits *limits, int options, int *arec, int *mrec) |
1057 | 1088 |
{ |
1058 | 1089 |
const char *tmpdir; |
... | ... |
@@ -1193,6 +1227,10 @@ int cli_magic_scandesc(int desc, const char **virname, long int *scanned, const |
1193 | 1193 |
ret = cli_scantar(desc, virname, scanned, root, limits, options, arec, mrec); |
1194 | 1194 |
break; |
1195 | 1195 |
|
1196 |
+ case CL_SCRENC: |
|
1197 |
+ ret = cli_scanscrenc(desc, virname, scanned, root, limits, options, arec, mrec); |
|
1198 |
+ break; |
|
1199 |
+ |
|
1196 | 1200 |
case CL_DATAFILE: |
1197 | 1201 |
/* it could be a false positive and a standard DOS .COM file */ |
1198 | 1202 |
{ |