git-svn: trunk@3531
Török Edvin authored on 2008/01/23 23:22:16... | ... |
@@ -1,3 +1,14 @@ |
1 |
+Wed Jan 23 15:54:00 EET 2008 (edwin) |
|
2 |
+------------------------------------ |
|
3 |
+ * contrib/entitynorm, entconv.c: fix valid characters table for encoding name |
|
4 |
+ * libclamav/entconv.[ch]: |
|
5 |
+ * skip \0 characters |
|
6 |
+ * account for alignfix when calculating offset |
|
7 |
+ * reset iconv state before reusing |
|
8 |
+ * fix entity handling |
|
9 |
+ * fix memory leaks reported by valgrind |
|
10 |
+ * remove unused fields |
|
11 |
+ |
|
1 | 12 |
Wed Jan 23 12:49:46 CET 2008 (tk) |
2 | 13 |
--------------------------------- |
3 | 14 |
* freshclam/manager.c: reduce the number of error messages (bb#478); |
... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
PERL=perl |
2 | 2 |
CC=cc |
3 | 3 |
|
4 |
-all: entitylist.h encoding_aliases.h gentbl |
|
4 |
+all: entitylist.h encoding_aliases.h gentbl encname_chars.h |
|
5 | 5 |
|
6 | 6 |
entities_parsed: entities entities/* entity_decl_parse.pl |
7 | 7 |
$(PERL) entity_decl_parse.pl $</* | sort -u >$@ |
... | ... |
@@ -20,6 +20,8 @@ encoding_aliases.h: generate_encoding_aliases |
20 | 20 |
|
21 | 21 |
gentbl: gentbl.c |
22 | 22 |
$(CC) $< -o $@ |
23 |
+encname_chars.h: gentbl |
|
24 |
+ ./gentbl encname_chars 0-9 a-z A-Z _ . / \( \) - : >$@ |
|
23 | 25 |
|
24 | 26 |
clean: |
25 | 27 |
rm -f entitylist.h encoding_aliases.h entities_parsed generate_entitylist generate_encoding_aliases gentbl |
... | ... |
@@ -76,7 +76,7 @@ static inline unsigned char* u16_normalize(uint16_t u16, unsigned char* out, con |
76 | 76 |
assert((uint8_t)u16 != 0); |
77 | 77 |
*out++ = (uint8_t)u16; |
78 | 78 |
} |
79 |
- else { |
|
79 |
+ else if (u16) { |
|
80 | 80 |
/* normalize only >255 to speed up */ |
81 | 81 |
char buf[10]; |
82 | 82 |
const ssize_t max_num_length = sizeof(buf)-1; |
... | ... |
@@ -103,14 +103,14 @@ static inline unsigned char* u16_normalize(uint16_t u16, unsigned char* out, con |
103 | 103 |
|
104 | 104 |
const char* entity_norm(struct entity_conv* conv,const unsigned char* entity) |
105 | 105 |
{ |
106 |
- struct element* e = hashtab_find(conv->ht, (const char*)entity, strlen((const char*)entity)); |
|
106 |
+ struct element* e = hashtab_find(&entities_htable, (const char*)entity, strlen((const char*)entity)); |
|
107 | 107 |
if(e && e->key) { |
108 | 108 |
const uint16_t val = e->data; |
109 | 109 |
unsigned char* out = u16_normalize(val, conv->entity_buff, sizeof(conv->entity_buff)-1); |
110 | 110 |
if(out) { |
111 | 111 |
*out++ = '\0'; |
112 | 112 |
} |
113 |
- return (const char*) out; |
|
113 |
+ return (const char*) conv->entity_buff; |
|
114 | 114 |
} |
115 | 115 |
return NULL; |
116 | 116 |
} |
... | ... |
@@ -132,9 +132,6 @@ int init_entity_converter(struct entity_conv* conv, size_t buffer_size) |
132 | 132 |
conv->encoding = NULL; |
133 | 133 |
conv->encoding_symbolic = E_UNKNOWN; |
134 | 134 |
conv->bom_cnt = 0; |
135 |
- conv->buffer_cnt = 0; |
|
136 |
- conv->bytes_read = 0; |
|
137 |
- conv->partial = 0; |
|
138 | 135 |
conv->buffer_size = buffer_size; |
139 | 136 |
conv->priority = NOPRIO; |
140 | 137 |
/* start in linemode */ |
... | ... |
@@ -166,9 +163,6 @@ int init_entity_converter(struct entity_conv* conv, size_t buffer_size) |
166 | 166 |
return CL_EMEM; |
167 | 167 |
} |
168 | 168 |
|
169 |
- conv->ht = &entities_htable; |
|
170 |
- conv->msg_zero_shown = 0; |
|
171 |
- |
|
172 | 169 |
conv->iconv_struct = cli_calloc(1, sizeof(iconv_t)); |
173 | 170 |
if(!conv->iconv_struct) { |
174 | 171 |
free(conv->tmp_area.buffer); |
... | ... |
@@ -520,16 +514,16 @@ static inline void process_bom(struct entity_conv* conv) |
520 | 520 |
conv->has_bom = has_bom; |
521 | 521 |
} |
522 | 522 |
|
523 |
-/*()-./012345678:ABCDEFGHIJKLMNOPQRSTUVWXY_abcdefghijklmnopqrstuvwxy*/ |
|
523 |
+/*()-./0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz*/ |
|
524 | 524 |
static const uint8_t encname_chars[256] = { |
525 | 525 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
526 | 526 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
527 | 527 |
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, |
528 |
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, |
|
528 |
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, |
|
529 | 529 |
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
530 |
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, |
|
530 |
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, |
|
531 | 531 |
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
532 |
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, |
|
532 |
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, |
|
533 | 533 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
534 | 534 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
535 | 535 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
... | ... |
@@ -583,6 +577,9 @@ static int encoding_norm_done(struct entity_conv* conv) |
583 | 583 |
free(conv->norm_area.buffer); |
584 | 584 |
conv->norm_area.buffer = NULL; |
585 | 585 |
} |
586 |
+ if(conv->iconv_struct) { |
|
587 |
+ free(conv->iconv_struct); |
|
588 |
+ } |
|
586 | 589 |
return 0; |
587 | 590 |
} |
588 | 591 |
|
... | ... |
@@ -752,6 +749,9 @@ static iconv_t iconv_open_cached(const char* fromcode) |
752 | 752 |
e = NULL; |
753 | 753 |
} |
754 | 754 |
if(e) { |
755 |
+ size_t dummy_in, dummy_out; |
|
756 |
+ /* reset state */ |
|
757 |
+ iconv(cache->tab[e->data], NULL, &dummy_in, NULL, &dummy_out); |
|
755 | 758 |
return cache->tab[e->data]; |
756 | 759 |
} |
757 | 760 |
cli_dbgmsg(MODULE_NAME "iconv not found in cache, for encoding:%s\n",fromcode); |
... | ... |
@@ -863,6 +863,7 @@ static int in_iconv_u16(m_area_t* in_m_area, iconv_t* iconv_struct, m_area_t* ou |
863 | 863 |
memcpy(tmp4, input, alignfix); |
864 | 864 |
input = tmp4; |
865 | 865 |
inleft = 4; |
866 |
+ alignfix = 0; |
|
866 | 867 |
} |
867 | 868 |
|
868 | 869 |
while (inleft && (outleft >= 2)) { /* iconv doesn't like inleft to be 0 */ |
... | ... |
@@ -891,7 +892,9 @@ static int in_iconv_u16(m_area_t* in_m_area, iconv_t* iconv_struct, m_area_t* ou |
891 | 891 |
*out++ = *input++; |
892 | 892 |
inleft--; |
893 | 893 |
} |
894 |
- in_m_area->offset = in_m_area->length - inleft; |
|
894 |
+ /* length - offset - alignfix is original value of inleft, new value is inleft, |
|
895 |
+ * difference tells how much it moved. */ |
|
896 |
+ in_m_area->offset = in_m_area->length - alignfix - inleft; |
|
895 | 897 |
if(out_m_area->length >= 0 && out_m_area->length >= (off_t)outleft) { |
896 | 898 |
out_m_area->length -= (off_t)outleft; |
897 | 899 |
} else { |
... | ... |
@@ -1036,8 +1039,10 @@ unsigned char* encoding_norm_readline(struct entity_conv* conv, FILE* stream_in, |
1036 | 1036 |
i++; |
1037 | 1037 |
break; |
1038 | 1038 |
} |
1039 |
- *out++ = c; |
|
1040 |
- limit--; |
|
1039 |
+ if(c) { |
|
1040 |
+ *out++ = c; |
|
1041 |
+ limit--; |
|
1042 |
+ } |
|
1041 | 1043 |
} |
1042 | 1044 |
in_m_area->offset = i; |
1043 | 1045 |
} |
... | ... |
@@ -1048,8 +1053,6 @@ unsigned char* encoding_norm_readline(struct entity_conv* conv, FILE* stream_in, |
1048 | 1048 |
} |
1049 | 1049 |
|
1050 | 1050 |
if(limit < 0) limit = 0; |
1051 |
-/* assert((unsigned)(conv->out_area.length - limit - 1) < conv->buffer_size); |
|
1052 |
- assert(conv->out_area.length - limit - 1 >= 0); */ |
|
1053 | 1051 |
conv->out_area.buffer[conv->out_area.length - limit - 1] = '\0'; |
1054 | 1052 |
return conv->out_area.buffer; |
1055 | 1053 |
} |
... | ... |
@@ -53,22 +53,16 @@ struct entity_conv { |
53 | 53 |
char* encoding; |
54 | 54 |
enum encoding_priority priority; |
55 | 55 |
enum encodings encoding_symbolic; |
56 |
- unsigned short int encoding_specific;/* sub-encoding, used for ISO*/ |
|
57 |
- const struct hashtable* ht; |
|
58 | 56 |
uint8_t has_bom; |
59 | 57 |
uint8_t enc_bytes; |
60 |
- uint8_t bytes_read; |
|
61 | 58 |
uint8_t bom_cnt; |
62 |
- uint32_t partial; |
|
63 | 59 |
unsigned char bom[4]; |
64 | 60 |
size_t buffer_size; |
65 |
- size_t buffer_cnt; |
|
66 | 61 |
void* iconv_struct; |
67 | 62 |
unsigned char entity_buff[MAX_ENTITY_SIZE+2]; |
68 | 63 |
m_area_t tmp_area; |
69 | 64 |
m_area_t out_area; |
70 | 65 |
m_area_t norm_area; |
71 |
- int msg_zero_shown; |
|
72 | 66 |
int linemode;/* TODO:set */ |
73 | 67 |
int linemode_processed; |
74 | 68 |
}; |