Browse code

use pthread_cond_* instead of usleep() in MULTISCAN (bb #758) enable entconv by default others.c: fix compiler warning regex_list.c: remove unused code, because a better solution is scheduled for 0.94 (bb #725)

git-svn: trunk@3627

Török Edvin authored on 2008/02/13 19:57:33
Showing 7 changed files
... ...
@@ -1,3 +1,12 @@
1
+Wed Feb 13 12:43:41 EET 2008 (edwin)
2
+------------------------------------
3
+  * clamd/scanner.c, thrmgr.[ch]: use pthread_cond_* instead of usleep() in
4
+				  MULTISCAN (bb #758)
5
+  * libclamav/dconf.c: enable entconv by default
6
+  * libclamav/others.c: fix compiler warning
7
+  * libclamav/regex_list.c: remove unused code, because a better solution is scheduled
8
+			    for 0.94 (bb #725)
9
+
1 10
 Wed Feb 13 11:21:04 CET 2008 (tk)
2 11
 ---------------------------------
3 12
   * Merge security fixes:
... ...
@@ -202,12 +202,11 @@ static int dirscan(const char *dirname, const char **virname, unsigned long int
202 202
 					    return 1;
203 203
 					}
204 204
 
205
-					while(!multi_pool->thr_idle) /* non-critical */
206
-#ifdef C_WINDOWS
207
-					    Sleep(1);
208
-#else
209
-					    usleep(200);
210
-#endif
205
+					pthread_mutex_lock(&multi_pool->pool_mutex);
206
+					while(!multi_pool->thr_idle) /* non-critical */ {
207
+						pthread_cond_wait(&multi_pool->idle_cond, &multi_pool->pool_mutex);
208
+					}
209
+					pthread_mutex_unlock(&multi_pool->pool_mutex);
211 210
 
212 211
 				    } else { /* CONTSCAN, SCAN */
213 212
 
... ...
@@ -128,6 +128,7 @@ void thrmgr_destroy(threadpool_t *threadpool)
128 128
   	}
129 129
 	
130 130
 	pthread_mutex_destroy(&(threadpool->pool_mutex));
131
+	pthread_cond_destroy(&(threadpool)->idle_cond);
131 132
 	pthread_cond_destroy(&(threadpool->pool_cond));
132 133
 	pthread_attr_destroy(&(threadpool->pool_attr));
133 134
 	free(threadpool->queue);
... ...
@@ -169,8 +170,17 @@ threadpool_t *thrmgr_new(int max_threads, int idle_timeout, void (*handler)(void
169 169
 		free(threadpool);
170 170
 		return NULL;
171 171
 	}
172
-		
172
+
173
+	if (pthread_cond_init(&(threadpool->idle_cond),NULL) != 0)  {
174
+		pthread_cond_destroy(&(threadpool->pool_cond));
175
+		pthread_mutex_destroy(&(threadpool->pool_mutex));
176
+		free(threadpool->queue);
177
+		free(threadpool);
178
+		return NULL;
179
+	}
180
+
173 181
 	if (pthread_attr_init(&(threadpool->pool_attr)) != 0) {
182
+		pthread_cond_destroy(&(threadpool->idle_cond));
174 183
 		pthread_cond_destroy(&(threadpool->pool_cond));
175 184
 		pthread_mutex_destroy(&(threadpool->pool_mutex));
176 185
 		free(threadpool->queue);
... ...
@@ -180,6 +190,7 @@ threadpool_t *thrmgr_new(int max_threads, int idle_timeout, void (*handler)(void
180 180
 	
181 181
 	if (pthread_attr_setdetachstate(&(threadpool->pool_attr), PTHREAD_CREATE_DETACHED) != 0) {
182 182
 		pthread_attr_destroy(&(threadpool->pool_attr));
183
+		pthread_cond_destroy(&(threadpool->idle_cond));
183 184
 		pthread_cond_destroy(&(threadpool->pool_cond));
184 185
 		pthread_mutex_destroy(&(threadpool->pool_mutex));
185 186
 		free(threadpool->queue);
... ...
@@ -219,6 +230,7 @@ static void *thrmgr_worker(void *arg)
219 219
 		while (((job_data=work_queue_pop(threadpool->queue)) == NULL)
220 220
 				&& (threadpool->state != POOL_EXIT)) {
221 221
 			/* Sleep, awaiting wakeup */
222
+			pthread_cond_signal(&threadpool->idle_cond);
222 223
 			retval = pthread_cond_timedwait(&(threadpool->pool_cond),
223 224
 				&(threadpool->pool_mutex), &timeout);
224 225
 			if (retval == ETIMEDOUT) {
... ...
@@ -48,6 +48,8 @@ typedef struct threadpool_tag {
48 48
 	pthread_mutex_t pool_mutex;
49 49
 	pthread_cond_t pool_cond;
50 50
 	pthread_attr_t pool_attr;
51
+
52
+	pthread_cond_t  idle_cond;
51 53
 	
52 54
 	pool_state_t state;
53 55
 	int thr_max;
... ...
@@ -98,7 +98,7 @@ static struct dconf_module modules[] = {
98 98
     { "OTHER",	    "CRYPTFF",	    OTHER_CONF_CRYPTFF,	    1 },
99 99
 
100 100
     { "PHISHING",   "ENGINE",       PHISHING_CONF_ENGINE,   1 },
101
-    { "PHISHING",   "ENTCONV",      PHISHING_CONF_ENTCONV,  DCONF_ENABLE_EXPERIMENTAL }, /* exp */
101
+    { "PHISHING",   "ENTCONV",      PHISHING_CONF_ENTCONV,  1 },
102 102
 
103 103
     { NULL,	    NULL,	    0,			    0 }
104 104
 };
... ...
@@ -327,7 +327,7 @@ static char *cli_md5buff(const unsigned char *buffer, unsigned int len, unsigned
327 327
 
328 328
 
329 329
     cli_md5_init(&ctx);
330
-    cli_md5_update(&ctx, (char *) buffer, len);
330
+    cli_md5_update(&ctx, buffer, len);
331 331
     cli_md5_final(digest, &ctx);
332 332
 
333 333
     if(dig)
... ...
@@ -33,17 +33,6 @@
33 33
 #endif
34 34
 #endif
35 35
 
36
-
37
-/* TODO: when implementation of new version is complete, enable it in CL_EXPERIMENTAL */
38
-#ifdef CL_EXPERIMENTAL
39
-/*#define USE_NEW_VERSION*/
40
-#endif
41
-
42
-#ifndef USE_NEW_VERSION
43
-/*this is the old version of regex_list.c
44
- *reason for redesign: there is only one node type that has to handle all the cases: binary search among children, alternatives list, match.
45
- * This design is very error-prone.*/
46
-
47 36
 #include <stdio.h>
48 37
 #include <stdlib.h>
49 38
 #include <string.h>
... ...
@@ -1556,404 +1545,3 @@ void dump_tree(struct tree_node* root)
1556 1556
 	printf("}\n");
1557 1557
 }
1558 1558
 #endif
1559
-
1560
-
1561
-#else
1562
-/*------------------------New version of regex_list.c------------------------*/
1563
-
1564
-/* Regex_list.c: 
1565
- * A scalable, trie-based implementation for matching against 
1566
- * a list of regular expressions.
1567
- *
1568
- * A trivial way to implement matching against a list of regular expressions 
1569
- * would have been to construct a single regular expression, by concatenating 
1570
- * the list with the alternate (|) operator.
1571
- * BUT a usual DFA implementation of regular expression matching (eg.: GNU libc)
1572
- * leads to "state explosion" when there are many (5000+) alternate (|) operators.
1573
- * This is the reason for using a trie-based implementation.
1574
- *
1575
- *
1576
- * Design considerations:
1577
- *
1578
- * Recursive call points: there are situations when match has to be retried on a different sub-trie, or with a different repeat count.
1579
- * Alternate operators, and repeat/range operators (+,*,{}) are recursiv call points. When a failure is encountered during a match,
1580
- * the function simply returns from the recursive call, and ends up at a failure point (recursive call point).
1581
- *
1582
- * "go to parent" below actually means, return from recursive call.
1583
- *
1584
- * fail_action: we need to return to closest failure point (recursive call point),
1585
- *  and switch current node to node pointed by fail_action
1586
- *
1587
- * Node types:
1588
- * 	OP_ROOT: contains information that applies to the entire trie.
1589
- * 		it can only appear as root node, and not as child node.
1590
- * 		On child fail: match has failed
1591
- * 		This is NOT a recursive call point
1592
- * 	OP_CHAR_BINARY_SEARCH: chooses a sub-trie, based on current character; 
1593
- * 			using binary-search
1594
- * 			On fail: go to node indicated by fail_action, or if 
1595
- * 				fail_action is NULL, to parent
1596
- * 			On child fail: execute fail of current node
1597
- * 	OP_ALTERNATIVES: try matching each sub-trie, if all fails execute fail
1598
- * 		action of current node. This is a recursive call point
1599
- * 	OP_CHAR_REPEAT: repeat specified character a number of times in range:
1600
- *		[min_range, max_range]; 
1601
- *			min_range: 0 for * operator
1602
- *				   1 for + operator
1603
- *			max_range: remaining length of current string for *,+ operator
1604
- *			OR: min_range, max_range as specified by the {min,max} operator
1605
- *		On fail: fail_action, or parent if NULL
1606
- *		On child fail: reduce match repeat count, try again on child, if
1607
- *			repeat count<min_range, execute fail of current node
1608
- *		Also has a bitmap on what characters are accepted beyond it,
1609
- *		as an optimizations for the case, when a maximum match isn't possible
1610
- *		Not recomended to use this when min_range=max_range=1
1611
- *		This is a recursive call point
1612
- *	OP_DOT_REPEAT: like OP_CHAR_REPEAT but accept any character
1613
- *		Not recomended to use this when min_range=max_range=1
1614
- *		This is a recursive call point
1615
- *	OP_GROUP_START: start of a group "(", also specifies group flags:
1616
- *		repeat: is_repeat, min_range, max_range
1617
- *		This is a recursive call point if is_repeat
1618
- *	OP_GROUP_END: end of group ")"
1619
- *      OP_STRCMP: compare with specified string,
1620
- *      	   it has an array of fail actions, one for each character
1621
- *      	   default fail action: go to parent
1622
- *      	   This was introduced from memory- and speed-efficiency
1623
- *      	   considerations. 
1624
- *      OP_CHAR_CLASS_REPEAT: match character with character class
1625
- *      	min_range, max_range
1626
- *      	For a single character class min_range=max_range=1
1627
- *	OP_MATCH_OK: match has succeeded
1628
- *
1629
- * TODO: maybe we'll need a more efficient way to choose between character classes.
1630
- *       OP_DOT_REPEAT/OP_CHAR_REPEAT needs a more efficient specification of its failure function, instead of using
1631
- *       backtracking approach.
1632
- *
1633
- * The failure function/action is just for optimization, the match algorithms works even without it.
1634
- * TODO:In this first draft fail action will always be NULL, in a later version I'll implement fail actions too.
1635
- *
1636
- *
1637
- */ 
1638
-
1639
-#include <string.h>
1640
-#include "cltypes.h"
1641
-#include "others.h"
1642
-
1643
-/* offsetof is not ANSI C */
1644
-#ifndef offsetof
1645
-#   define offsetof(type,memb) ((size_t)&((type*)0)->memb)
1646
-#endif
1647
-
1648
-#define container_of(ptr, type, member) ( (type *) ((char *)ptr - offsetof(type, member)) )
1649
-#define container_of_const(ptr, type, member) ( (type *) ((const char *)ptr - offsetof(type, member)) )
1650
-
1651
-enum trie_node_type {
1652
-	OP_ROOT,
1653
-	OP_CHAR_BINARY_SEARCH,
1654
-	OP_ALTERNATIVES,
1655
-	OP_CHAR_REPEAT,
1656
-	OP_DOT_REPEAT,
1657
-	OP_CHAR_CLASS_REPEAT,
1658
-	OP_STRCMP,
1659
-	OP_GROUP_START,
1660
-	OP_GROUP_END,
1661
-	OP_MATCH_OK
1662
-};
1663
-
1664
-
1665
-/* the comon definition of a trie node */
1666
-struct trie_node
1667
-{
1668
-	enum trie_node_type type;
1669
-};
1670
-
1671
-struct trie_node_match {
1672
-	struct trie_node node;
1673
-	/* additional match info */
1674
-};
1675
-
1676
-struct trie_node_root
1677
-{
1678
-	struct trie_node node;
1679
-	struct trie_node* child;
1680
-};
1681
-
1682
-struct trie_node_binary_search
1683
-{
1684
-	struct trie_node node;
1685
-	uint8_t children_count;/* number of children to search among -1! 255 = 256 children*/	
1686
-	struct trie_node* fail_action;
1687
-	unsigned char* char_choices;/* children_count elements */
1688
-	struct trie_node** children;/*children_count elements */
1689
-};
1690
-
1691
-struct trie_node_alternatives
1692
-{
1693
-	struct trie_node node;
1694
-	uint32_t alternatives_count;
1695
-	/* need to support node with lots of alternatives, 
1696
-	 * for a worst-case scenario where each line ends up as a sub-trie of OP_ALTERNATIVES*/
1697
-	struct trie_node* fail_action;
1698
-	struct trie_node** children;
1699
-};
1700
-
1701
-struct trie_node_char_repeat
1702
-{
1703
-	struct trie_node node;
1704
-	unsigned char character;
1705
-	uint8_t range_min, range_max;/* according to POSIX we need not support more than 255 repetitions*/
1706
-	struct char_bitmap* bitmap_accept_after;/* bitmap of characters accepted after this, 
1707
-						   to optimize repeat < max_range case; if its NULL
1708
-						   there is no optimization*/
1709
-	struct trie_node* child;
1710
-	struct trie_node* fail_action;
1711
-};
1712
-
1713
-struct trie_node_dot_repeat
1714
-{
1715
-	struct trie_node node;
1716
-	uint8_t range_min, range_max;/* according to POSIX we need not support more than 255 repetitions*/
1717
-	struct char_bitmap* bitmap_accept_after;/* bitmap of characters accepted after this, 
1718
-						   to optimize repeat < max_range case; if its NULL
1719
-						   there is no optimization*/
1720
-	struct trie_node* child;
1721
-	struct trie_node* fail_action;
1722
-};
1723
-
1724
-struct trie_node_group_start
1725
-{
1726
-	struct trie_node node;
1727
-	uint8_t range_min, range_max;/* if range_min==range_max==1, then this is NOT a repeat, thus not a recursive call point*/
1728
-	struct trie_node* child;
1729
-	struct trie_node* fail_action;	
1730
-};
1731
-
1732
-struct trie_node_group_end
1733
-{
1734
-	struct trie_node node;
1735
-	struct trie_node* child;
1736
-};
1737
-
1738
-struct trie_node_strcmp
1739
-{
1740
-	struct trie_node node;
1741
-	uint8_t string_length;/* for longer strings a sequence of node_strcmp should be used */
1742
-	unsigned char* string;
1743
-	struct trie_node* child;
1744
-	struct trie_node** fail_actions;/* this has string_length elements, or NULL if no fail_actions are computed */
1745
-};
1746
-
1747
-struct trie_node_char_class_repeat
1748
-{
1749
-	struct trie_node node;
1750
-	struct char_bitmap* bitmap;
1751
-	struct char_bitmap* bitmap_accept_after;
1752
-	uint8_t range_min, range_max;
1753
-	struct trie_node* child;
1754
-	struct trie_node* fail_action;
1755
-};
1756
-
1757
-static inline int bitmap_accepts(const struct char_bitmap* bitmap, const char c)
1758
-{
1759
-	/* TODO: check if c is accepted by bitmap */
1760
-	return 0;
1761
-}
1762
-
1763
-#define MATCH_FAILED 0
1764
-#define MATCH_OK     1
1765
-
1766
-#define FAIL_ACTION( fail_node ) (*fail_action = (fail_node), MATCH_FAILED)
1767
-
1768
-
1769
-#ifndef MIN
1770
-#define MIN(a,b) ((a)<(b) ? (a) : (b))
1771
-#endif
1772
-
1773
-static int match_node(const struct trie_node* node, const unsigned char* text, const unsigned char* text_end, const struct trie_node** fail_action);
1774
-
1775
-static int match_repeat(const unsigned char* text, const unsigned char* text_end, const size_t range_min, const size_t repeat_start, 
1776
-		const struct char_bitmap* bitmap_accept_after, const struct trie_node* child, const struct trie_node** fail_action,
1777
-		const struct trie_node* this_fail_action)
1778
-{
1779
-	size_t i;
1780
-	for(i = repeat_start;i > range_min;i--) {
1781
-		if(!bitmap_accept_after || bitmap_accepts( bitmap_accept_after, text[i-1])) {
1782
-			int rc = match_node(child, &text[i], text_end, fail_action);
1783
-			/* ignore fail_action for now, we have the bitmap_accepts_after optimization */
1784
-			if(rc) {
1785
-				return MATCH_OK;
1786
-			}
1787
-		}						
1788
-	}
1789
-	if(!range_min) {
1790
-		/* this match is optional, try child only */
1791
-		int rc = match_node(child, text, text_end, fail_action);
1792
-		if(rc) {
1793
-			return MATCH_OK;
1794
-		}
1795
-	}
1796
-	return FAIL_ACTION(this_fail_action);
1797
-}
1798
-
1799
-/* text_end points to \0 in text */
1800
-static int match_node(const struct trie_node* node, const unsigned char* text, const unsigned char* text_end, const struct trie_node** fail_action)
1801
-{
1802
-	while(node && text < text_end) {	
1803
-		switch(node->type) {
1804
-			case OP_ROOT:
1805
-				{	
1806
-					const struct trie_node_root* root_node = container_of_const(node, const struct trie_node_root, node);
1807
-					node = root_node->child;
1808
-					break;
1809
-				}
1810
-			case OP_CHAR_BINARY_SEARCH:
1811
-				{					
1812
-					const struct trie_node_binary_search* bin_node = container_of_const(node, const struct trie_node_binary_search, node);
1813
-					const unsigned char csearch = *text;
1814
-					size_t mid, left = 0, right = bin_node->children_count-1;					
1815
-					while(left<=right) {
1816
-						mid = left+(right-left)/2;
1817
-						if(bin_node->char_choices[mid] == csearch)
1818
-							break;
1819
-						else if(bin_node->char_choices[mid] < csearch)
1820
-							left = mid+1;
1821
-						else
1822
-							right = mid-1;
1823
-					}
1824
-					if(left <= right) {
1825
-						/* match successful */
1826
-						node = bin_node->children[mid];
1827
-						++text;
1828
-					}
1829
-					else {
1830
-						return FAIL_ACTION( bin_node->fail_action );
1831
-					}
1832
-					break;
1833
-				}
1834
-			case OP_ALTERNATIVES:
1835
-				{
1836
-					const struct trie_node_alternatives* alt_node = container_of_const(node, const struct trie_node_alternatives, node);
1837
-					size_t i;
1838
-					*fail_action = NULL;
1839
-					for(i=0;i < alt_node->alternatives_count;i++) {
1840
-						int rc = match_node(alt_node->children[i], text, text_end, fail_action);
1841
-						if(rc) {							
1842
-							return MATCH_OK;
1843
-						}
1844
-						/* supporting fail_actions is tricky,
1845
-						 *  if we just go to the node specified, what happens if the match fails, and no
1846
-						 *  further fail_action is specified? We should know where to continue the search.
1847
-						 * For now fail_action isn't supported for OP_ALTERNATIVES*/						
1848
-					}
1849
-					break;
1850
-				}
1851
-			case OP_CHAR_REPEAT:
1852
-				{
1853
-					const struct trie_node_char_repeat* char_rep_node = container_of_const(node, const struct trie_node_char_repeat, node);
1854
-					const size_t max_len = MIN( text_end - text, char_rep_node->range_max-1);
1855
-					/* todo: what about the 8 bit limitation of range_max, and what about inf (+,*)? */
1856
-					const char caccept = char_rep_node->character;
1857
-					size_t rep;
1858
-
1859
-					if(max_len < char_rep_node->range_min)
1860
-						return FAIL_ACTION(char_rep_node->fail_action);
1861
-
1862
-					for(rep=0;rep < max_len;rep++) {
1863
-						if(text[rep] != caccept) {
1864
-							break;
1865
-						}
1866
-					}
1867
-
1868
-					return match_repeat(text, text_end, char_rep_node->range_min, rep,
1869
-							char_rep_node->bitmap_accept_after, char_rep_node->child, fail_action,
1870
-							char_rep_node->fail_action);
1871
-				}
1872
-			case OP_DOT_REPEAT:
1873
-				{
1874
-					const struct trie_node_dot_repeat* dot_rep_node = container_of_const(node, const struct trie_node_dot_repeat, node);
1875
-					const size_t max_len = MIN( text_end - text, dot_rep_node->range_max-1);
1876
-					/* todo: what about the 8 bit limitation of range_max, and what about inf (+,*)? */
1877
-
1878
-					if(max_len < dot_rep_node->range_min)
1879
-						return FAIL_ACTION(dot_rep_node->fail_action);
1880
-
1881
-					return match_repeat(text, text_end, dot_rep_node->range_min, max_len,
1882
-							dot_rep_node->bitmap_accept_after, dot_rep_node->child, fail_action,
1883
-							dot_rep_node->fail_action);
1884
-				}
1885
-			case OP_CHAR_CLASS_REPEAT:
1886
-				{
1887
-					const struct trie_node_char_class_repeat* class_rep_node = container_of_const(node, const struct trie_node_char_class_repeat, node);
1888
-					const size_t max_len = MIN( text_end - text, class_rep_node->range_max-1);
1889
-					/* todo: what about the 8 bit limitation of range_max, and what about inf (+,*)? */
1890
-					size_t rep;
1891
-
1892
-					if(max_len < class_rep_node->range_min)
1893
-						return FAIL_ACTION(class_rep_node->fail_action);
1894
-
1895
-					for(rep=0;rep < max_len;rep++) {
1896
-						if(!bitmap_accepts( class_rep_node->bitmap, text[rep])) {
1897
-							break;
1898
-						}
1899
-					}
1900
-
1901
-					return match_repeat(text, text_end, class_rep_node->range_min, rep,
1902
-							class_rep_node->bitmap_accept_after, class_rep_node->child, fail_action,
1903
-							class_rep_node->fail_action);
1904
-					break;
1905
-				}
1906
-			case OP_STRCMP:
1907
-				{
1908
-					const struct trie_node_strcmp* strcmp_node = container_of_const(node, const struct trie_node_strcmp, node);
1909
-					size_t i;
1910
-					if(strcmp_node->fail_actions) {
1911
-						const size_t max_len = MIN(strcmp_node->string_length, text_end-text);
1912
-						/* we don't use strncmp, because we need the exact match-fail point */
1913
-						for(i=0;i < max_len;i++) {
1914
-							if(text[i] != strcmp_node->string[i]) {
1915
-								return FAIL_ACTION( strcmp_node->fail_actions[i] );
1916
-							}
1917
-						}
1918
-						if(max_len < strcmp_node->string_length) {
1919
-							/* failed, because text was shorter */
1920
-							return FAIL_ACTION( strcmp_node->fail_actions[max_len] );
1921
-						}
1922
-					}
1923
-					else {
1924
-						/* no fail_actions computed, some shortcuts possible on compare */
1925
-						if((text_end - text < strcmp_node->string_length) ||
1926
-								strncmp((const char*)text, (const char*)strcmp_node->string, strcmp_node->string_length)) {
1927
-
1928
-							return FAIL_ACTION( NULL );
1929
-						}
1930
-					}
1931
-					/* match successful */
1932
-					node = strcmp_node->child;
1933
-					text += strcmp_node->string_length;
1934
-					break;
1935
-				}
1936
-			case OP_GROUP_START:
1937
-				{
1938
-					const struct trie_node_group_start* group_start_node = container_of_const(node, const struct trie_node_group_start, node);
1939
-					/* TODO: implement */
1940
-					break;
1941
-				}
1942
-			case OP_GROUP_END:
1943
-				{					
1944
-					const struct trie_node_group_end* group_end_node = container_of_const(node, const struct trie_node_group_end, node);
1945
-					/* TODO: implement */
1946
-					break;
1947
-				}
1948
-			case OP_MATCH_OK:
1949
-				{
1950
-					return MATCH_OK;
1951
-				}
1952
-		}
1953
-	}
1954
-	/* if fail_action was NULL, or text ended*/
1955
-	return MATCH_FAILED;
1956
-}
1957
-
1958
-#endif
1959
-