Browse code

fix valid characters table for encoding name skip \0 characters account for alignfix when calculating offset reset iconv state before reusing fix entity handling fix memory leaks reported by valgrind remove unused fields

git-svn: trunk@3531

Török Edvin authored on 2008/01/23 23:22:16
Showing 5 changed files
... ...
@@ -1,3 +1,14 @@
1
+Wed Jan 23 15:54:00 EET 2008 (edwin)
2
+------------------------------------
3
+  * contrib/entitynorm, entconv.c: fix valid characters table for encoding name
4
+  * libclamav/entconv.[ch]:
5
+			* skip \0 characters
6
+			* account for alignfix when calculating offset
7
+			* reset iconv state before reusing  
8
+  			* fix entity handling 
9
+  			* fix memory leaks reported by valgrind
10
+			* remove unused fields
11
+
1 12
 Wed Jan 23 12:49:46 CET 2008 (tk)
2 13
 ---------------------------------
3 14
   * freshclam/manager.c: reduce the number of error messages (bb#478);
... ...
@@ -1,7 +1,7 @@
1 1
 PERL=perl
2 2
 CC=cc
3 3
 
4
-all: entitylist.h encoding_aliases.h gentbl
4
+all: entitylist.h encoding_aliases.h gentbl encname_chars.h
5 5
 
6 6
 entities_parsed: entities entities/* entity_decl_parse.pl
7 7
 	$(PERL) entity_decl_parse.pl $</* | sort -u >$@
... ...
@@ -20,6 +20,8 @@ encoding_aliases.h: generate_encoding_aliases
20 20
 
21 21
 gentbl: gentbl.c
22 22
 	$(CC) $< -o $@
23
+encname_chars.h: gentbl
24
+	./gentbl encname_chars 0-9 a-z A-Z _ . / \( \) - : >$@
23 25
 
24 26
 clean: 
25 27
 	rm -f entitylist.h encoding_aliases.h entities_parsed generate_entitylist generate_encoding_aliases gentbl
... ...
@@ -16,7 +16,7 @@ int main(int argc, char* argv[])
16 16
 		tbl[*v] = 1;
17 17
 		if(v[1] == '-') {
18 18
 			int j;
19
-			for(j=v[0]+1;j<v[2];j++) {
19
+			for(j=v[0]+1;j<=v[2];j++) {
20 20
 				tbl[j]=1;
21 21
 			}
22 22
 		} else if(v[1]){
... ...
@@ -76,7 +76,7 @@ static inline unsigned char* u16_normalize(uint16_t u16, unsigned char* out, con
76 76
 		assert((uint8_t)u16 != 0);
77 77
 		*out++ = (uint8_t)u16;
78 78
 	}
79
-	else {
79
+	else if (u16) {
80 80
 		/* normalize only >255 to speed up */
81 81
 		char buf[10];
82 82
 		const ssize_t max_num_length = sizeof(buf)-1;
... ...
@@ -103,14 +103,14 @@ static inline unsigned char* u16_normalize(uint16_t u16, unsigned char* out, con
103 103
 
104 104
 const char* entity_norm(struct entity_conv* conv,const unsigned char* entity)
105 105
 {
106
-	struct element* e = hashtab_find(conv->ht, (const char*)entity, strlen((const char*)entity));
106
+	struct element* e = hashtab_find(&entities_htable, (const char*)entity, strlen((const char*)entity));
107 107
 	if(e && e->key) {
108 108
 		const uint16_t val = e->data;
109 109
 		unsigned char* out = u16_normalize(val, conv->entity_buff, sizeof(conv->entity_buff)-1);
110 110
 		if(out) {
111 111
 			*out++ = '\0';
112 112
 		}
113
-		return (const char*) out;
113
+		return (const char*) conv->entity_buff;
114 114
 	}
115 115
 	return NULL;
116 116
 }
... ...
@@ -132,9 +132,6 @@ int init_entity_converter(struct entity_conv* conv, size_t buffer_size)
132 132
 		conv->encoding = NULL;
133 133
 		conv->encoding_symbolic = E_UNKNOWN;
134 134
 		conv->bom_cnt = 0;
135
-		conv->buffer_cnt = 0;
136
-		conv->bytes_read = 0;
137
-		conv->partial = 0;
138 135
 		conv->buffer_size = buffer_size;
139 136
 		conv->priority = NOPRIO;
140 137
 		/* start in linemode */
... ...
@@ -166,9 +163,6 @@ int init_entity_converter(struct entity_conv* conv, size_t buffer_size)
166 166
 			return CL_EMEM;
167 167
 		}
168 168
 
169
-		conv->ht = &entities_htable;
170
-		conv->msg_zero_shown = 0;
171
-
172 169
 		conv->iconv_struct = cli_calloc(1, sizeof(iconv_t));
173 170
 		if(!conv->iconv_struct) {
174 171
 			free(conv->tmp_area.buffer);
... ...
@@ -520,16 +514,16 @@ static inline void process_bom(struct entity_conv* conv)
520 520
 	conv->has_bom = has_bom;
521 521
 }
522 522
 
523
-/*()-./012345678:ABCDEFGHIJKLMNOPQRSTUVWXY_abcdefghijklmnopqrstuvwxy*/
523
+/*()-./0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz*/
524 524
 static const uint8_t encname_chars[256] = {
525 525
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526 526
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
527 527
         0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1,
528
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0,
528
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
529 529
         0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
530
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1,
530
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
531 531
         0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
532
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
532
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
533 533
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
534 534
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
535 535
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
... ...
@@ -583,6 +577,9 @@ static int encoding_norm_done(struct entity_conv* conv)
583 583
 		free(conv->norm_area.buffer);
584 584
 		conv->norm_area.buffer = NULL;
585 585
 	}
586
+	if(conv->iconv_struct) {
587
+		free(conv->iconv_struct);
588
+	}
586 589
 	return 0;
587 590
 }
588 591
 
... ...
@@ -752,6 +749,9 @@ static iconv_t iconv_open_cached(const char* fromcode)
752 752
 		e = NULL;
753 753
 	}
754 754
 	if(e) {
755
+		size_t dummy_in, dummy_out;
756
+		/* reset state */
757
+		iconv(cache->tab[e->data], NULL, &dummy_in, NULL, &dummy_out);
755 758
 		return cache->tab[e->data];
756 759
 	}
757 760
 	cli_dbgmsg(MODULE_NAME "iconv not found in cache, for encoding:%s\n",fromcode);
... ...
@@ -863,6 +863,7 @@ static int in_iconv_u16(m_area_t* in_m_area, iconv_t* iconv_struct, m_area_t* ou
863 863
 		memcpy(tmp4, input, alignfix);
864 864
 		input = tmp4;
865 865
 		inleft = 4;
866
+		alignfix = 0;
866 867
 	}
867 868
 
868 869
 	while (inleft && (outleft >= 2)) { /* iconv doesn't like inleft to be 0 */
... ...
@@ -891,7 +892,9 @@ static int in_iconv_u16(m_area_t* in_m_area, iconv_t* iconv_struct, m_area_t* ou
891 891
 		*out++ = *input++;
892 892
 		inleft--;
893 893
 	}
894
-	in_m_area->offset = in_m_area->length - inleft;
894
+	/* length - offset - alignfix is original value of inleft, new value is inleft, 
895
+	 * difference tells how much it moved. */
896
+	in_m_area->offset = in_m_area->length - alignfix - inleft;
895 897
 	if(out_m_area->length >= 0 && out_m_area->length >= (off_t)outleft) {
896 898
 		out_m_area->length -= (off_t)outleft;
897 899
 	} else {
... ...
@@ -1036,8 +1039,10 @@ unsigned char* encoding_norm_readline(struct entity_conv* conv, FILE* stream_in,
1036 1036
 						i++;
1037 1037
 						break;
1038 1038
 					}
1039
-					*out++ = c;
1040
-					limit--;
1039
+					if(c) {
1040
+						*out++ = c;
1041
+						limit--;
1042
+					}
1041 1043
 				}
1042 1044
 				in_m_area->offset = i;
1043 1045
 		}
... ...
@@ -1048,8 +1053,6 @@ unsigned char* encoding_norm_readline(struct entity_conv* conv, FILE* stream_in,
1048 1048
 		}
1049 1049
 
1050 1050
 		if(limit < 0) limit = 0;
1051
-/*		assert((unsigned)(conv->out_area.length - limit - 1) < conv->buffer_size);
1052
-		assert(conv->out_area.length - limit - 1 >= 0); */
1053 1051
 		conv->out_area.buffer[conv->out_area.length - limit - 1] = '\0';
1054 1052
 		return conv->out_area.buffer;
1055 1053
 	}
... ...
@@ -53,22 +53,16 @@ struct entity_conv {
53 53
 	char* encoding;
54 54
 	enum encoding_priority priority;
55 55
 	enum encodings encoding_symbolic;
56
-	unsigned short int encoding_specific;/* sub-encoding, used for ISO*/
57
-	const struct hashtable* ht;
58 56
 	uint8_t has_bom;
59 57
 	uint8_t enc_bytes;
60
-	uint8_t bytes_read;
61 58
 	uint8_t  bom_cnt;
62
-	uint32_t partial;
63 59
 	unsigned char bom[4];
64 60
 	size_t buffer_size;
65
-	size_t buffer_cnt;
66 61
 	void* iconv_struct;
67 62
 	unsigned char entity_buff[MAX_ENTITY_SIZE+2];
68 63
 	m_area_t tmp_area;
69 64
 	m_area_t out_area;
70 65
 	m_area_t norm_area;
71
-	int      msg_zero_shown;
72 66
 	int      linemode;/* TODO:set */
73 67
 	int      linemode_processed;
74 68
 };