git-svn: trunk@3543
Török Edvin authored on 2008/01/26 01:39:40... | ... |
@@ -1,3 +1,7 @@ |
1 |
+Fri Jan 25 18:15:21 EET 2008 (edwin) |
|
2 |
+------------------------------------ |
|
3 |
+ * libclamav/htmlnorm.[ch]: handle NULL characters in HTML files. (bb #539). |
|
4 |
+ |
|
1 | 5 |
Fri Jan 25 16:35:34 CET 2008 (tk) |
2 | 6 |
--------------------------------- |
3 | 7 |
* libclamav/cab.[ch]: rewrite file/folder handling code as a complete |
... | ... |
@@ -156,13 +156,27 @@ int decrypt_tables[3][128] = { |
156 | 156 |
0x3B, 0x57, 0x22, 0x6D, 0x4D, 0x25, 0x28, 0x46, 0x4A, 0x32, 0x41, 0x3D, 0x5F, 0x4F, 0x42, 0x65} |
157 | 157 |
}; |
158 | 158 |
|
159 |
-unsigned char *cli_readline(FILE *stream, m_area_t *m_area, unsigned int max_len) |
|
159 |
+static inline unsigned int rewind_tospace(const unsigned char* chunk, unsigned int len) |
|
160 | 160 |
{ |
161 |
- unsigned char *line, *ptr, *start, *end; |
|
162 |
- unsigned int line_len, count; |
|
161 |
+ unsigned int count = len; |
|
162 |
+ while (!isspace(chunk[len - 1]) && (len > 1)) { |
|
163 |
+ len--; |
|
164 |
+ } |
|
165 |
+ if (len == 1) { |
|
166 |
+ return count; |
|
167 |
+ } |
|
168 |
+ return len; |
|
169 |
+} |
|
163 | 170 |
|
164 |
- line = (unsigned char *) cli_malloc(max_len); |
|
165 |
- if (!line) { |
|
171 |
+/* read at most @max_len of data from @m_area or @stream, skipping NULL chars. |
|
172 |
+ * This used to be called cli_readline, but we don't stop at end-of-line anymore */ |
|
173 |
+static unsigned char *cli_readchunk(FILE *stream, m_area_t *m_area, unsigned int max_len) |
|
174 |
+{ |
|
175 |
+ unsigned char *chunk, *start, *ptr, *end; |
|
176 |
+ unsigned int chunk_len, count; |
|
177 |
+ |
|
178 |
+ chunk = (unsigned char *) cli_malloc(max_len); |
|
179 |
+ if (!chunk) { |
|
166 | 180 |
return NULL; |
167 | 181 |
} |
168 | 182 |
|
... | ... |
@@ -171,66 +185,103 @@ unsigned char *cli_readline(FILE *stream, m_area_t *m_area, unsigned int max_len |
171 | 171 |
start = ptr = m_area->buffer + m_area->offset; |
172 | 172 |
end = m_area->buffer + m_area->length; |
173 | 173 |
if (start >= end) { |
174 |
- free(line); |
|
174 |
+ free(chunk); |
|
175 | 175 |
return NULL; |
176 | 176 |
} |
177 |
- line_len = 1; |
|
178 |
- while ((ptr < end) && (*ptr != '\n') && (line_len < (max_len-1))) { |
|
179 |
- ptr++; |
|
180 |
- line_len++; |
|
181 |
- } |
|
182 |
- if (ptr == end) { |
|
183 |
- line_len--; |
|
184 |
- memcpy(line, start, line_len); |
|
185 |
- line[line_len] = '\0'; |
|
186 |
- } else if (*ptr == '\n') { |
|
187 |
- memcpy(line, start, line_len); |
|
188 |
- line[line_len] = '\0'; |
|
177 |
+ /* maximum we can copy into the buffer, |
|
178 |
+ * we could have less than max_len bytes available */ |
|
179 |
+ chunk_len = MIN(end-start, max_len-1); |
|
180 |
+ |
|
181 |
+ /* look for NULL chars */ |
|
182 |
+ ptr = memchr(start, 0, chunk_len); |
|
183 |
+ if(!ptr) { |
|
184 |
+ /* no NULL chars found, copy all */ |
|
185 |
+ memcpy(chunk, start, chunk_len); |
|
186 |
+ chunk[chunk_len] = '\0'; |
|
187 |
+ m_area->offset += chunk_len; |
|
188 |
+ /* point ptr to end of chunk, |
|
189 |
+ * so we can check and rewind to a space below */ |
|
190 |
+ ptr = start + chunk_len; |
|
189 | 191 |
} else { |
190 |
- /* Hit max_len */ |
|
191 |
- /* Store the current line end and length*/ |
|
192 |
- count = line_len; |
|
193 |
- while (!isspace(*ptr) && (line_len > 1)) { |
|
194 |
- ptr--; |
|
195 |
- line_len--; |
|
192 |
+ /* copy portion that doesn't contain NULL chars */ |
|
193 |
+ chunk_len = ptr - start; |
|
194 |
+ if(chunk_len < max_len) { |
|
195 |
+ memcpy(chunk, start, chunk_len); |
|
196 |
+ } else { |
|
197 |
+ chunk_len = 0; |
|
198 |
+ ptr = start; |
|
199 |
+ } |
|
200 |
+ /* we have unknown number of NULL chars, |
|
201 |
+ * copy char-by-char and skip them */ |
|
202 |
+ while((ptr < end) && (chunk_len < max_len-1)) { |
|
203 |
+ const unsigned char c = *ptr++; |
|
204 |
+ if(c) { |
|
205 |
+ chunk[chunk_len++] = c; |
|
206 |
+ } |
|
196 | 207 |
} |
197 |
- if (line_len == 1) { |
|
198 |
- line_len=count; |
|
208 |
+ chunk[chunk_len] = '\0'; |
|
209 |
+ /* we can't use chunk_len to determine how many bytes we read, since |
|
210 |
+ * we skipped chars */ |
|
211 |
+ m_area->offset = ptr - m_area->buffer; |
|
212 |
+ } |
|
213 |
+ if(ptr && ptr < end && !isspace(*ptr)) { |
|
214 |
+ /* we hit max_len, rewind to a space */ |
|
215 |
+ count = rewind_tospace(chunk, chunk_len); |
|
216 |
+ if(count < chunk_len) { |
|
217 |
+ chunk[count] = '\0'; |
|
218 |
+ m_area->offset -= chunk_len - count; |
|
199 | 219 |
} |
200 |
- memcpy(line, start, line_len); |
|
201 |
- line[line_len] = '\0'; |
|
202 | 220 |
} |
203 |
- m_area->offset += line_len; |
|
204 | 221 |
} else { |
205 | 222 |
if (!stream) { |
206 | 223 |
cli_dbgmsg("No HTML stream\n"); |
207 |
- free(line); |
|
224 |
+ free(chunk); |
|
208 | 225 |
return NULL; |
209 | 226 |
} |
210 |
- if (fgets(line, max_len, stream) == NULL) { |
|
211 |
- free(line); |
|
227 |
+ chunk_len = fread(chunk, 1, max_len-1, stream); |
|
228 |
+ if(!chunk_len || chunk_len > max_len-1) { |
|
229 |
+ /* EOF, or prevent overflow */ |
|
230 |
+ free(chunk); |
|
212 | 231 |
return NULL; |
213 | 232 |
} |
214 | 233 |
|
215 |
- line_len=strlen(line); |
|
216 |
- if (line_len == 0) { |
|
217 |
- free(line); |
|
218 |
- return NULL; |
|
219 |
- } |
|
220 |
- if (line_len == max_len-1) { |
|
221 |
- /* didn't find a whole line - rewind to a space*/ |
|
222 |
- count = 0; |
|
223 |
- while (!isspace(line[--line_len])) { |
|
224 |
- count--; |
|
225 |
- if (line_len == 0) { |
|
226 |
- return line; |
|
234 |
+ /* Look for NULL chars */ |
|
235 |
+ ptr = memchr(chunk, 0, chunk_len); |
|
236 |
+ if(ptr) { |
|
237 |
+ /* NULL char found */ |
|
238 |
+ /* save buffer limits */ |
|
239 |
+ start = ptr; |
|
240 |
+ end = chunk + chunk_len; |
|
241 |
+ |
|
242 |
+ /* start of NULL chars, we will copy non-NULL characters |
|
243 |
+ * to this position */ |
|
244 |
+ chunk_len = ptr - chunk; |
|
245 |
+ |
|
246 |
+ /* find first non-NULL char */ |
|
247 |
+ while((ptr < end) && !(*ptr)) { |
|
248 |
+ ptr++; |
|
249 |
+ } |
|
250 |
+ /* skip over NULL chars, and move back the rest */ |
|
251 |
+ while((ptr < end) && (chunk_len < max_len-1)) { |
|
252 |
+ const unsigned char c = *ptr++; |
|
253 |
+ if(c) { |
|
254 |
+ chunk[chunk_len++] = c; |
|
227 | 255 |
} |
228 | 256 |
} |
229 |
- fseek(stream, count, SEEK_CUR); |
|
230 |
- line[line_len+1] = '\0'; |
|
257 |
+ chunk[chunk_len] = '\0'; |
|
258 |
+ } |
|
259 |
+ if(chunk_len == max_len - 1) { |
|
260 |
+ /* rewind to a space (which includes newline) */ |
|
261 |
+ count = rewind_tospace(chunk, chunk_len); |
|
262 |
+ if(count < chunk_len) { |
|
263 |
+ chunk[count] = '\0'; |
|
264 |
+ /* seek-back to space */ |
|
265 |
+ fseek(stream, (long)(count - chunk_len), SEEK_CUR); |
|
266 |
+ } |
|
231 | 267 |
} |
232 | 268 |
} |
233 |
- return line; |
|
269 |
+ |
|
270 |
+ return chunk; |
|
234 | 271 |
} |
235 | 272 |
|
236 | 273 |
static void html_output_flush(file_buff_t *fbuff) |
... | ... |
@@ -580,7 +631,7 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag |
580 | 580 |
if(dconf_entconv) |
581 | 581 |
ptr = line = encoding_norm_readline(&conv, stream_in, m_area); |
582 | 582 |
else |
583 |
- ptr = line = cli_readline(stream_in, m_area, 8192); |
|
583 |
+ ptr = line = cli_readchunk(stream_in, m_area, 8192); |
|
584 | 584 |
|
585 | 585 |
while (line) { |
586 | 586 |
if(href_contents_begin) |
... | ... |
@@ -1486,7 +1537,7 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag |
1486 | 1486 |
ptr = line = encoding_norm_readline(&conv, stream_in, m_area); |
1487 | 1487 |
else { |
1488 | 1488 |
free(line); |
1489 |
- ptr = line = cli_readline(stream_in, m_area, 8192); |
|
1489 |
+ ptr = line = cli_readchunk(stream_in, m_area, 8192); |
|
1490 | 1490 |
} |
1491 | 1491 |
} |
1492 | 1492 |
|
... | ... |
@@ -1609,7 +1660,7 @@ int html_screnc_decode(int fd, const char *dirname) |
1609 | 1609 |
return FALSE; |
1610 | 1610 |
} |
1611 | 1611 |
|
1612 |
- while ((line = cli_readline(stream_in, NULL, 8192)) != NULL) { |
|
1612 |
+ while ((line = cli_readchunk(stream_in, NULL, 8192)) != NULL) { |
|
1613 | 1613 |
ptr = strstr(line, "#@~^"); |
1614 | 1614 |
if (ptr) { |
1615 | 1615 |
break; |
... | ... |
@@ -1626,7 +1677,7 @@ int html_screnc_decode(int fd, const char *dirname) |
1626 | 1626 |
do { |
1627 | 1627 |
if (! *ptr) { |
1628 | 1628 |
free(line); |
1629 |
- ptr = line = cli_readline(stream_in, NULL, 8192); |
|
1629 |
+ ptr = line = cli_readchunk(stream_in, NULL, 8192); |
|
1630 | 1630 |
if (!line) { |
1631 | 1631 |
goto abort; |
1632 | 1632 |
} |
... | ... |
@@ -1701,7 +1752,7 @@ int html_screnc_decode(int fd, const char *dirname) |
1701 | 1701 |
} |
1702 | 1702 |
free(line); |
1703 | 1703 |
if (length) { |
1704 |
- ptr = line = cli_readline(stream_in, NULL, 8192); |
|
1704 |
+ ptr = line = cli_readchunk(stream_in, NULL, 8192); |
|
1705 | 1705 |
} |
1706 | 1706 |
} |
1707 | 1707 |
retval = TRUE; |
... | ... |
@@ -35,8 +35,6 @@ typedef struct m_area_tag { |
35 | 35 |
off_t offset; |
36 | 36 |
} m_area_t; |
37 | 37 |
|
38 |
- |
|
39 |
-unsigned char *cli_readline(FILE *stream, m_area_t *m_area, unsigned int max_len); |
|
40 | 38 |
int html_normalise_mem(unsigned char *in_buff, off_t in_size, const char *dirname, tag_arguments_t *hrefs,const struct cli_dconf* dconf); |
41 | 39 |
int html_normalise_fd(int fd, const char *dirname, tag_arguments_t *hrefs,const struct cli_dconf* dconf); |
42 | 40 |
void html_tag_arg_free(tag_arguments_t *tags); |