git-svn: trunk@2586
Tomasz Kojm authored on 2007/01/01 01:32:01... | ... |
@@ -1,3 +1,8 @@ |
1 |
+Sun Dec 31 17:29:11 CET 2006 (tk) |
|
2 |
+--------------------------------- |
|
3 |
+ * libclamav: iconv: fix incorrect resuming on invalid UTF8 character, |
|
4 |
+ reported by nitrox <mail*nerdbase.de> (bb#215, patch by Edwin) |
|
5 |
+ |
|
1 | 6 |
Sat Dec 30 17:10:42 GMT 2006 (njh) |
2 | 7 |
---------------------------------- |
3 | 8 |
* libclamav/mbox.c: Fix compilation error on Solaris10 (reported by Andy |
... | ... |
@@ -297,50 +297,70 @@ static int iconv(iconv_t iconv_struct,char **inbuf, size_t *inbytesleft, |
297 | 297 |
{ |
298 | 298 |
const size_t maxread = *inbytesleft; |
299 | 299 |
const size_t maxwrite = *outbytesleft; |
300 |
- uint16_t* out = (uint16_t*) output; |
|
301 | 300 |
size_t j; |
302 | 301 |
for(i=0,j=0 ; i < maxread && j < maxwrite;) { |
303 | 302 |
if(input[i] < 0x7F) { |
304 |
- out[j++] = input[i++]; |
|
303 |
+ output[j++] = 0; |
|
304 |
+ output[j++] = input[i++]; |
|
305 | 305 |
} |
306 | 306 |
else if( (input[i]&0xE0) == 0xC0 ) { |
307 | 307 |
if ((input[i+1]&0xC0) == 0x80) { |
308 | 308 |
/* 2 bytes long 110yyyyy zzzzzzzz -> 00000yyy yyzzzzzz*/ |
309 |
+ output[j++] = ((input[i] & 0x1F) >> 2) & 0x07; |
|
309 | 310 |
output[j++] = ((input[i] & 0x1F) << 6) | (input[i+1] & 0x3F); |
310 | 311 |
} |
311 |
- else |
|
312 |
+ else { |
|
312 | 313 |
cli_dbgmsg("invalid UTF8 character encountered\n"); |
314 |
+ break; |
|
315 |
+ } |
|
313 | 316 |
i+=2; |
314 | 317 |
} |
315 | 318 |
else if( (input[i]&0xE0) == 0xE0) { |
316 | 319 |
if( (input[i+1]&0xC0) == 0x80 && (input[i+2]&0xC0) == 0x80) { |
317 | 320 |
/* 3 bytes long 1110xxxx 10yyyyyy 10zzzzzzzz -> xxxxyyyy yyzzzzzz*/ |
318 |
- output[j++] = ((input[i] & 0x0F) << 12) | ((input[i+1] & 0x3F)<<6) | (input[i+2] & 0x3F); |
|
321 |
+ output[j++] = (input[i] << 4) | ((input[i+1] >> 2) & 0x0F); |
|
322 |
+ output[j++] = (input[i+1] << 6) | (input[i+2] & 0x3F); |
|
319 | 323 |
} |
320 |
- else |
|
324 |
+ else { |
|
321 | 325 |
cli_dbgmsg("invalid UTF8 character encountered\n"); |
326 |
+ break; |
|
327 |
+ } |
|
322 | 328 |
i+=3; |
323 | 329 |
} |
324 | 330 |
else if( (input[i]&0xF8) == 0xF0) { |
325 | 331 |
if((input[i+1]&0xC0) == 0x80 && (input[i+2]&0xC0) == 0x80 && (input[i+3]&0xC0) == 0x80) { |
326 | 332 |
/* 4 bytes long 11110www 10xxxxxx 10yyyyyy 10zzzzzz -> 000wwwxx xxxxyyyy yyzzzzzz*/ |
327 |
- output[j++] = ((input[i] & 0x07) << 18) | ((input[i+1] & 0x3F)<<12) | ((input[i+2] & 0x3F) <<6) | (input[i+3] & 0x3F); |
|
333 |
+ cli_dbgmsg("UTF8 character out of UTF16 range encountered"); |
|
334 |
+ output[j++] = 0xff; |
|
335 |
+ output[j++] = 0xff; |
|
336 |
+ |
|
337 |
+ /*out[j++] = ((input[i] & 0x07) << 2) | ((input[i+1] >> 4) & 0x3); |
|
338 |
+ out[j++] = (input[i+1] << 4) | ((input[i+2] >> 2) & 0x0F); |
|
339 |
+ out[j++] = (input[i+2] << 6) | (input[i+2] & 0x3F);*/ |
|
328 | 340 |
} |
329 |
- else |
|
341 |
+ else { |
|
330 | 342 |
cli_dbgmsg("invalid UTF8 character encountered\n"); |
343 |
+ break; |
|
344 |
+ } |
|
331 | 345 |
i+=4; |
332 | 346 |
} |
333 | 347 |
else { |
334 |
- i++; |
|
335 | 348 |
cli_dbgmsg("invalid UTF8 character encountered\n"); |
349 |
+ break; |
|
336 | 350 |
} |
337 | 351 |
} |
338 | 352 |
*inbytesleft -= i; |
339 | 353 |
*outbytesleft -= j; |
340 | 354 |
*inbuf += i; |
341 | 355 |
*outbuf += j; |
342 |
- if(*inbytesleft) |
|
343 |
- return E2BIG; |
|
356 |
+ if(*inbytesleft && *outbytesleft) { |
|
357 |
+ errno = EILSEQ;/* we had an early exit */ |
|
358 |
+ return -1; |
|
359 |
+ } |
|
360 |
+ if(*inbytesleft) { |
|
361 |
+ errno = E2BIG; |
|
362 |
+ return -1; |
|
363 |
+ } |
|
344 | 364 |
return 0; |
345 | 365 |
} |
346 | 366 |
} |
... | ... |
@@ -349,8 +369,10 @@ static int iconv(iconv_t iconv_struct,char **inbuf, size_t *inbytesleft, |
349 | 349 |
*inbytesleft -= maxcopy; |
350 | 350 |
*inbuf += maxcopy; |
351 | 351 |
*outbuf += maxcopy; |
352 |
- if(*inbytesleft) |
|
353 |
- return E2BIG; |
|
352 |
+ if(*inbytesleft) { |
|
353 |
+ errno = E2BIG; |
|
354 |
+ return -1; |
|
355 |
+ } |
|
354 | 356 |
return 0; |
355 | 357 |
} |
356 | 358 |
|
... | ... |
@@ -671,8 +693,12 @@ unsigned char* encoding_norm_readline(struct entity_conv* conv, FILE* stream_in, |
671 | 671 |
iconv_close(iconv_struct); |
672 | 672 |
|
673 | 673 |
if(rc==(size_t)-1 && errno != E2BIG) { |
674 |
- cli_dbgmsg("iconv error:%s, silently resuming\n",strerror(errno)); |
|
675 |
- return cli_readline(NULL, &conv->tmp_area, maxlen); |
|
674 |
+ cli_dbgmsg("iconv error:%s, silently resuming (%ld,%ld,%ld,%ld)\n",strerror(errno),out-conv->out_area.buffer,tmpbuff-conv->tmp_area.buffer,inleft,outleft); |
|
675 |
+ /* output raw byte, and resume at next byte */ |
|
676 |
+ *out++ = 0; |
|
677 |
+ *out++ = *tmpbuff++; |
|
678 |
+ inleft--; |
|
679 |
+/* return cli_readline(NULL, &conv->norm_area, maxlen);*/ |
|
676 | 680 |
} |
677 | 681 |
|
678 | 682 |
conv->tmp_area.length = inleft + (alignfix > 0 ? alignfix : 0); |
... | ... |
@@ -35,6 +35,8 @@ typedef struct m_area_tag { |
35 | 35 |
off_t offset; |
36 | 36 |
} m_area_t; |
37 | 37 |
|
38 |
+ |
|
39 |
+unsigned char *cli_readline(FILE *stream, m_area_t *m_area, unsigned int max_len); |
|
38 | 40 |
int html_normalise_mem(unsigned char *in_buff, off_t in_size, const char *dirname, tag_arguments_t *hrefs); |
39 | 41 |
int html_normalise_fd(int fd, const char *dirname, tag_arguments_t *hrefs); |
40 | 42 |
void html_tag_arg_free(tag_arguments_t *tags); |