Browse code

matcher-ac: expansion of nested alternates within alternate expr matcher-ac: three types of alternates: byte, fixed, and generic (variable)

Kevin Lin authored on 2015/05/14 06:48:22
Showing 3 changed files
... ...
@@ -279,7 +279,7 @@ static inline unsigned char spec_ith_char(const struct char_spec *spec, unsigned
279 279
 	if (alt) {
280 280
 		assert (alt->type == 1);
281 281
 		assert (i < alt->num);
282
-		return alt->str[i];
282
+		return (alt->alt).byte[i];
283 283
 	}
284 284
 	return i;
285 285
 }
... ...
@@ -91,7 +91,8 @@ static inline int insert_list(struct cli_matcher *root, struct cli_ac_patt *patt
91 91
     struct cli_ac_list **newtable;
92 92
     struct cli_ac_patt *php;
93 93
     struct cli_ac_special *a1, *a2;
94
-    uint8_t i, match;
94
+    struct cli_alt_node *b1, *b2;
95
+    uint8_t i, j, match;
95 96
 
96 97
     new = (struct cli_ac_list *)mpool_calloc(root->mempool, 1, sizeof(struct cli_ac_list));
97 98
     if (!new) {
... ...
@@ -142,23 +143,40 @@ static inline int insert_list(struct cli_matcher *root, struct cli_ac_patt *patt
142 142
                             match = 0;
143 143
                             break;
144 144
                         } else if(a1->type == AC_SPECIAL_ALT_CHAR) {
145
-                            if(memcmp(a1->str, a2->str, a1->num)) {
145
+                            if(memcmp((a1->alt).byte, (a2->alt).byte, a1->num)) {
146 146
                                 match = 0;
147 147
                                 break;
148 148
                             }
149
+                        } else if(a1->type == AC_SPECIAL_ALT_STR_FIXED) {
150
+			    if(a1->len != a2->len) {
151
+				match = 0;
152
+				break;
153
+			    }
154
+
155
+			    for(j = 0; j < a1->num; j++) {
156
+				if(memcmp((a1->alt).f_str[j], (a2->alt).f_str[j], a1->len))
157
+				    break;
158
+			    }
159
+
160
+			    if(j < a1->num) {
161
+				match = 0;
162
+				break;
163
+			    }
149 164
                         } else if(a1->type == AC_SPECIAL_ALT_STR) {
150
-                            while(a1 && a2) {
151
-                                if((a1->len != a2->len) || memcmp(a1->str, a2->str, a1->len))
152
-                                    break;
153
-                                a1 = a1->next;
154
-                                a2 = a2->next;
155
-                            }
165
+			    b1 = (a1->alt).v_str;
166
+			    b2 = (a2->alt).v_str;
167
+			    while(b1 && b2) {
168
+				if((b1->len != b2->len) || memcmp(b1->str, b2->str, b1->len))
169
+				    break;
170
+				b1 = b1->next;
171
+				b2 = b2->next;
172
+			    }
156 173
 
157
-                            if(a1 || a2) {
158
-                                match = 0;
159
-                                break;
160
-                            }
161
-                        }
174
+			    if(b1 || b2) {
175
+				match = 0;
176
+				break;
177
+			    }
178
+			}
162 179
                     }
163 180
                 } else {
164 181
                     match = 0;
... ...
@@ -521,8 +539,9 @@ static void ac_free_special(mpool_t *mempool, struct cli_ac_patt *p)
521 521
 static void ac_free_special(struct cli_ac_patt *p)
522 522
 #endif
523 523
 {
524
-    unsigned int i;
525
-    struct cli_ac_special *a1, *a2;
524
+    unsigned int i, j;
525
+    struct cli_ac_special *a1;
526
+    struct cli_alt_node *b1, *b2;
526 527
 
527 528
 
528 529
     if(!p->special)
... ...
@@ -530,14 +549,22 @@ static void ac_free_special(struct cli_ac_patt *p)
530 530
 
531 531
     for(i = 0; i < p->special; i++) {
532 532
         a1 = p->special_table[i];
533
-        while(a1) {
534
-            a2 = a1;
535
-            a1 = a1->next;
536
-
537
-            if(a2->str)
538
-                mpool_free(mempool, a2->str);
539
-            mpool_free(mempool, a2);
540
-        }
533
+	if (a1->type == AC_SPECIAL_ALT_CHAR) {
534
+	    mpool_free(mempool, (a1->alt).byte);
535
+	} else if (a1->type == AC_SPECIAL_ALT_STR_FIXED) {
536
+	    for (j = 0; j < a1->num; j++)
537
+		mpool_free(mempool, (a1->alt).f_str[j]);
538
+	    mpool_free(mempool, (a1->alt).f_str);
539
+	} else if (a1->type == AC_SPECIAL_ALT_STR) {
540
+	    b1 = (a1->alt).v_str;
541
+	    while (b1) {
542
+		b2 = b1->next;
543
+		mpool_free(mempool, b1->str);
544
+		mpool_free(mempool, b1);
545
+		b1 = b2;
546
+	    }
547
+	}
548
+	mpool_free(mempool, a1);
541 549
     }
542 550
     mpool_free(mempool, p->special_table);
543 551
 }
... ...
@@ -838,50 +865,58 @@ static int ac_findmatch_branch(const unsigned char *buffer, uint32_t offset, uin
838 838
 /* special handler */
839 839
 inline static int ac_findmatch_special(const unsigned char *buffer, uint32_t offset, uint32_t fileoffset, uint32_t length, const struct cli_ac_patt *pattern, uint32_t pattoffset, uint16_t specialcnt, uint32_t *end)
840 840
 {
841
-    int match;
841
+    int match, cmp;
842 842
     uint16_t j, b = buffer[offset];
843 843
     struct cli_ac_special *special = pattern->special_table[specialcnt];
844
+    struct cli_alt_node *alt = NULL;
844 845
 
845 846
     match = special->negative;
846 847
 
847 848
     switch(special->type) {
848
-    case AC_SPECIAL_ALT_CHAR:
849
-        for(j = 0; j < special->num; j++) {
850
-			if(special->str[j] == b) {
851
-			    match = !special->negative;
852
-			    break;
853
-			} else if(special->str[j] > b)
854
-			    break;
849
+    case AC_SPECIAL_ALT_CHAR: /* single-byte */
850
+        for (j = 0; j < special->num; j++) {
851
+	    if ((special->alt).byte[j] == b) {
852
+		match = !special->negative;
853
+		break;
854
+	    } else if ((special->alt).byte[j] > b)
855
+		break;
855 856
         }
856 857
         break;
857 858
 
858
-    case AC_SPECIAL_ALT_STR_FIXED: /* old */
859
-	/*
860
-        while(special) {
861
-            if(bp + special->len <= length) {
862
-                if(!memcmp(&buffer[bp], special->str, special->len)) {
863
-                    match = (!special->negative) * special->len;
864
-                    break;
865
-                }
866
-            }
867
-            special = special->next;
868
-        }
869
-	*/
859
+    case AC_SPECIAL_ALT_STR_FIXED: /* fixed length multi-byte */
860
+	if (offset + special->len > length)
861
+	    break;
862
+
863
+	for (j = 0; j < special->num; j++) {
864
+	    if (!memcmp(&buffer[offset], (special->alt).f_str[j], special->len)) {
865
+		match = (!special->negative) * special->len;
866
+		break;
867
+	    } /* TODO - handle sorting case */
868
+	}
870 869
         break;
871 870
 
872 871
     case AC_SPECIAL_ALT_STR: /* generic */
873 872
         /* branch for backtracking */
874
-        while(special) {
875
-            if(offset + special->len <= length) {
876
-                if((!special->negative && !memcmp(&buffer[offset], special->str, special->len)) || special->negative) {
877
-                    match = ac_findmatch_branch(buffer, offset+special->len, fileoffset, length, pattern, pattoffset+1, specialcnt+1, end);
878
-                    if (match)
879
-                        return -1; /* contrary to popular belief, this is good */
880
-                }
881
-            }
882
-            special = special->next;
883
-        }
884
-        break;
873
+	alt = (special->alt).v_str;
874
+	while (alt) {
875
+	    if (offset + alt->len > length) {
876
+		alt = alt->next;
877
+		continue;
878
+	    }
879
+
880
+	    /* note that generic alternates CANNOT be negated */
881
+	    /* generic alternates are sorted alphabetically   */
882
+	    cmp = memcmp(&buffer[offset], alt->str, alt->len);
883
+	    if (!cmp) {
884
+		match = ac_findmatch_branch(buffer, offset+alt->len, fileoffset, length, pattern, pattoffset+1, specialcnt+1, end);
885
+		if (match)
886
+		    return -1; /* alerts caller that match has been resolved in child callee */
887
+	    } else if (cmp < 0)
888
+		break;
889
+
890
+	    alt = alt->next;
891
+	}
892
+	break;
885 893
 
886 894
     case AC_SPECIAL_LINE_MARKER:
887 895
         if(b == '\n')
... ...
@@ -1781,6 +1816,16 @@ int cli_ac_scanbuff(const unsigned char *buffer, uint32_t length, const char **v
1781 1781
     return (mode & AC_SCAN_FT) ? type : CL_CLEAN;
1782 1782
 }
1783 1783
 
1784
+/* TODO - remove debugging on production */
1785
+#define ALTN_DEBUG 0
1786
+#if ALTN_DEBUG == 1
1787
+#define cli_altnmsg(...) cli_dbgmsg(__VA_ARGS__)
1788
+#elif ALTN_DEBUG == 2
1789
+#define cli_altnmsg(...) cli_errmsg(__VA_ARGS__)
1790
+#else
1791
+#define cli_altnmsg(...)
1792
+#endif
1793
+
1784 1794
 static int qcompare(const void *a, const void *b)
1785 1795
 {
1786 1796
     return *(const unsigned char *)a - *(const unsigned char *)b;
... ...
@@ -1808,255 +1853,351 @@ inline static int find_paren_end(char *hexstr, char **end)
1808 1808
     return nest;
1809 1809
 }
1810 1810
 
1811
-/* analyzes expr, returns number of subexpr, if fixed length subexpr and longest subexpr len  */
1812
-inline static int ac_analyze_expr(char *hexstr, int *fixed_len, int *slen)
1811
+/* analyzes expr, returns number of subexpr, if fixed length subexpr and longest subexpr len *
1812
+ * goes to either end of string or to closing parenthesis; allowed to be unbalanced          *
1813
+ * counts applied to start of expr (not end, i.e. numexpr starts at 1 for the first expr     */
1814
+inline static int ac_analyze_expr(char *hexstr, int *fixed_len, int *sub_len)
1813 1815
 {
1814
-    /* len calc is off */
1815 1816
     int i, level = 0, len = 0, numexpr = 1;
1817
+    int flen, slen;
1816 1818
 
1817
-    *fixed_len = 1;
1818
-    *slen = 0;
1819
+    flen = 1;
1820
+    slen = 0;
1819 1821
     for (i = 0; i < strlen(hexstr); i++) {
1820 1822
 	if (hexstr[i] == '(') {
1821
-	    *fixed_len = 0;
1823
+	    flen = 0;
1822 1824
 	    level++;
1823 1825
 	} else if (hexstr[i] == ')') {
1824 1826
 	    if (!level) {
1825
-		if (!(*slen)) {
1826
-		    *slen = len;
1827
-		} else if (len != *slen) {
1828
-		    *fixed_len = 0;
1829
-		    if (len > *slen)
1830
-			*slen = len;
1827
+		if (!slen) {
1828
+		    slen = len;
1829
+		} else if (len != slen) {
1830
+		    flen = 0;
1831
+		    if (len > slen)
1832
+			slen = len;
1831 1833
 		}
1832
-
1833
-		numexpr++;
1834 1834
 		break;
1835 1835
 	    }
1836 1836
 	    level--;
1837 1837
 	}
1838 1838
 	if (!level && hexstr[i] == '|') {
1839
-	    if (!(*slen)) {
1840
-		*slen = len;
1841
-	    } else if (len != *slen) {
1842
-		*fixed_len = 0;
1843
-		if (len > *slen)
1844
-		    *slen = len;
1839
+	    if (!slen) {
1840
+		slen = len;
1841
+	    } else if (len != slen) {
1842
+		flen = 0;
1843
+		if (len > slen)
1844
+		    slen = len;
1845 1845
 	    }
1846 1846
 	    len = 0;
1847 1847
 	    numexpr++;
1848 1848
 	} else {
1849 1849
 	    len++;
1850 1850
 	}
1851
-	//cli_errmsg("%c, %d\n", hexstr[i], len);
1851
+	//cli_altnmsg("%c, %d\n", hexstr[i], len);
1852 1852
     }
1853
-    if (len > *slen)
1854
-	*slen = len;
1853
+    if (len > slen)
1854
+	slen = len;
1855
+
1856
+    if (sub_len)
1857
+	*sub_len = slen;
1858
+    if (fixed_len)
1859
+	*fixed_len = flen;
1855 1860
 
1856 1861
     return numexpr;
1857 1862
 }
1858 1863
 
1859
-/* recursive special handler for alternate string specials (so many specials!) */
1860
-static int ac_addspecial_alt(const char *hexpr, int neg)
1864
+/* add new generic alternate node to special */
1865
+inline static int ac_addspecial_add_alt_node(const char *subexpr, struct cli_ac_special *special, struct cli_matcher *root)
1861 1866
 {
1862
-    char *hexprcpy;
1863
-    int ret, num, fixed, slen, len;
1864
-
1865
-    cli_errmsg("called ac_addspecial_alt\n");
1867
+    struct cli_alt_node *newnode, **prev, *ins;
1868
+    char *c;
1869
+    int cmp;
1870
+
1871
+    newnode = (struct cli_alt_node *)mpool_calloc(root->mempool, 1, sizeof(struct cli_alt_node));
1872
+    if (!newnode) {
1873
+	cli_errmsg("ac_addspecial_add_alt_node: Can't allocate new alternate node\n");
1874
+	return CL_EMEM;
1875
+    }
1876
+
1877
+    c = (char *)cli_mpool_hex2str(root->mempool, subexpr);
1878
+    if (!c) {
1879
+	free(newnode);
1880
+	return CL_EMALFDB;
1881
+    }
1882
+
1883
+    newnode->str = c;
1884
+    newnode->len = strlen(subexpr)/2;
1885
+
1886
+    /* search for location to insert node (alphabetically through memcmp) */
1887
+    prev = &((special->alt).v_str);
1888
+    ins = (special->alt).v_str;
1889
+    while (ins) {
1890
+	if (ins->len == newnode->len) {
1891
+	    cmp = memcmp(newnode->str, ins->str, ins->len); /* TODO - change when uint16_t is used */
1892
+	    if (cmp == 0) { /* duplicate */
1893
+		free(newnode);
1894
+		return CL_SUCCESS;
1895
+	    } else if (cmp < 0) {
1896
+		break;
1897
+	    }
1898
+	}
1866 1899
 
1867
-    if (!(hexprcpy = cli_strdup(hexpr))) {
1868
-	cli_errmsg("ac_addspecial_alt: Can't duplicate alternate expression\n");
1869
-	return CL_EDUP;
1900
+	prev = &(ins->next);
1901
+	ins = ins->next;
1870 1902
     }
1871 1903
 
1872
-    len = strlen(hexpr);
1873
-    num = ac_analyze_expr(hexprcpy, &fixed, &slen);
1874
-
1875
-    cli_errmsg("-----------------------------------\n");
1876
-    cli_errmsg("hexpr: %s\n", hexprcpy);
1877
-    cli_errmsg("%d strings of %d len %s\n", num, slen, fixed ? "(fixed)" : "(max)");
1878
-    cli_errmsg("-----------------------------------\n");
1904
+    *prev = newnode;
1905
+    newnode->next = ins;
1906
+    special->num++;
1907
+    return CL_SUCCESS;
1908
+}
1879 1909
 
1880
-    if (fixed) {
1881
-	if (slen == 2) /* single-bytes are len 2 in hex */
1882
-	    cli_errmsg("ac_addspecial_alt: discovered AC_SPECIAL_ALT_CHAR\n");
1883
-	else
1884
-	    cli_errmsg("ac_addspecial_alt: discovered AC_SPECIAL_ALT_STR_FIXED\n");
1885
-	/* just use the tokenizer here */
1886
-    } else { /* generic alternate string */
1887
-	/* ------------------(----------)--------(------)--------- */
1888
-	/* ^                 ^   */
1889
-	/* expr             npt  */
1890
-
1891
-	/* things get complicated */
1892
-	/* utilize similar string method as normal parents */
1893
-	/* reconstruct the final string, the ac_analyze_expr determines that slen is longest possible length */
1894
-
1895
-	char *sexpr, *sexpr_new, *spt, term;
1896
-	int sexpr_len = slen+1, subneg, scnt = 0;
1897
-
1898
-	cli_errmsg("ac_addspecial_alt: discovered AC_SPECIAL_ALT_STR\n");
1899
-	if (neg) {
1900
-	    cli_errmsg("ac_addspecial_alt: Can't apply negation operation to generic alternate strings\n");
1901
-	    free(hexprcpy);
1902
-	    return CL_EMALFDB;
1910
+/* recursive special handler for expanding and adding generic alternates */
1911
+static int ac_special_altexpand(char *hexpr, char *subexpr, uint16_t maxlen, int lvl, int maxlvl, int rec, struct cli_ac_special *special, struct cli_matcher *root)
1912
+{
1913
+    int ret, scnt = 0, numexpr;
1914
+    char *ept, *sexpr, *end, term;
1915
+    char *fp;
1916
+
1917
+    ept = sexpr = hexpr;
1918
+    fp = subexpr + strlen(subexpr);
1919
+
1920
+    numexpr = ac_analyze_expr(hexpr, NULL, NULL);
1921
+    //cli_altnmsg("hexpr: %s\n", hexpr);
1922
+    //cli_altnmsg("numexpr: %d [%d,%d]\n", numexpr, lvl, maxlvl);
1923
+
1924
+    /* while there are expressions to resolve */
1925
+    while (scnt < numexpr) {
1926
+	scnt++;
1927
+	while ((*ept != '(') && (*ept != '|') && (*ept != ')') && (*ept != '\0'))
1928
+	    ept++;
1929
+
1930
+	/* check for invalid negation */
1931
+	term = *ept;
1932
+	if ((*ept == '(') && (ept >= hexpr+1)) {
1933
+	    if (ept[-1] == '!') {
1934
+		cli_errmsg("ac_special_altexpand: Generic alternates cannot contain negations\n");
1935
+		return CL_EMALFDB;
1936
+	    }
1903 1937
 	}
1904 1938
 
1905
-	if (!(sexpr_new = cli_calloc(sexpr_len, sizeof(char)))) {
1906
-	    cli_errmsg("ac_addspecial_alt: Can't allocate space for reconstructed string\n");
1907
-	    free(hexprcpy);
1939
+	/* appended token */
1940
+	*ept = 0;
1941
+	if (cli_strlcat(subexpr, sexpr, maxlen) >= maxlen) {
1942
+	    cli_errmsg("ac_special_altexpand: Unexpected expression larger than expected\n");
1908 1943
 	    return CL_EMEM;
1909 1944
 	}
1945
+	*ept++ = term;
1910 1946
 
1911
-	spt = sexpr = hexprcpy;
1912
-	/* loop starts here */
1913
-	while (scnt < num) {
1914
-	    //cli_errmsg("cycling\n");
1915
-	    while ((*spt != '(') && (*spt != '|') && (*spt != ')') && (*spt != '\0'))
1916
-		spt++;
1917
-
1918
-	    term = *spt;
1919
-	    subneg = 0;
1920
-	    if ((term == '(') && (spt >= sexpr+1)) {
1921
-		if (spt[-1] == '!') {
1922
-		    subneg = 1;
1923
-		    spt[-1] = 0;
1924
-		}
1925
-	    }
1926
-	    *spt++ = 0;
1947
+	sexpr = ept;
1948
+	//cli_altnmsg("ac_special_altexpand: %s\n", subexpr);
1927 1949
 
1928
-	    //cli_errmsg("sexpr_new: %s\n", sexpr_new);
1929
-	    //cli_errmsg("sexpr: %s\n", sexpr);
1930
-	    //cli_errmsg("sexpr_len: %d\n", sexpr_len);
1950
+	if (term == '|') {
1951
+	    //cli_altnmsg("ept: %s\n", ept);
1952
+	    if (lvl == 0) {
1953
+		cli_altnmsg("export: %s\n", subexpr);
1954
+		if ((ret = ac_addspecial_add_alt_node(subexpr, special, root)) != CL_SUCCESS)
1955
+		    return ret;
1956
+	    } else {
1957
+		find_paren_end(ept, &end);
1958
+		if (!end) {
1959
+		    cli_errmsg("ac_special_altexpand: Missing closing parenthesis\n");
1960
+		    return CL_EMALFDB;
1961
+		}
1962
+		end++;
1931 1963
 
1932
-	    /* consume token */
1933
-	    if ((ret = cli_strlcat(sexpr_new, sexpr, sexpr_len)) >= sexpr_len) {
1934
-		//cli_errmsg("sexpr_new: %s\n", sexpr_new);
1935
-		//cli_errmsg("cli_strlcat_ret: %d\n", ret);
1964
+		//cli_altnmsg("descending recursive call on %s\n", end);
1965
+                if ((ret = ac_special_altexpand(end, subexpr, maxlen, lvl-1, lvl, special, root)) != CL_SUCCESS)
1966
+		    return ret;
1967
+                //cli_altnmsg("return descending recursive call\n");
1968
+	    }
1936 1969
 
1937
-		cli_errmsg("ac_addspecial_alt: Unexpected expression larger than expected\n");
1938
-		free(sexpr_new);
1939
-		free(hexprcpy);
1940
-		/* TODO - clean up */
1970
+	    *fp = 0;
1971
+	} else if (term == ')') {
1972
+	    //cli_altnmsg("ept: %s\n", ept);
1973
+	    if (lvl == 0) {
1974
+		cli_errmsg("ac_special_altexpand: Unexpected closing parenthesis\n");
1941 1975
 		return CL_EPARSE;
1942 1976
 	    }
1943 1977
 
1944
-	    if (term == '(') {
1945
-		/* recursive call */
1946
-		cli_errmsg("encountered '(': %s\n", sexpr_new);
1947
-		sexpr = spt;
1948
-		find_paren_end(spt, &spt);
1949
-		*spt++ = 0;
1978
+	    //cli_altnmsg("descending recursive call\n");
1979
+	    if ((ret = ac_special_altexpand(ept, subexpr, maxlen, lvl-1, lvl, special, root)) != CL_SUCCESS)
1980
+		return ret;
1981
+	    //cli_altnmsg("return descending recursive call\n");
1982
+	    break;
1983
+	} else if (term == '(') {
1984
+	    int inner, found;
1985
+	    find_paren_end(ept, &end);
1986
+	    if (!end) {
1987
+		cli_errmsg("ac_special_altexpand: Missing closing parenthesis\n");
1988
+		return CL_EMALFDB;
1989
+	    }
1990
+	    end++;
1950 1991
 
1951
-		if ((ret = ac_addspecial_alt(sexpr, subneg)) != CL_SUCCESS) {
1952
-		    cli_errmsg("returned ac_addspecial_alt %d\n", ret);
1953
-		    free(sexpr_new);
1954
-		    free(hexprcpy);
1955
-		    /* TODO - clean up */
1956
-		    return CL_EPARSE;
1992
+	    //cli_altnmsg("ascending recursive call\n");
1993
+	    if ((ret = ac_special_altexpand(ept, subexpr, maxlen, lvl+1, lvl+1, special, root)) != CL_SUCCESS)
1994
+		return ret;
1995
+	    //cli_altnmsg("return ascending recursive call\n");
1996
+
1997
+	    /* move ept to end of current alternate expression */
1998
+	    ept = end;
1999
+	    inner = 0;
2000
+	    found = 0;
2001
+	    while (!found && *ept != '\0') {
2002
+		switch(*ept) {
2003
+		case '|':
2004
+		    if (!inner)
2005
+			found = 1;
2006
+		    break;
2007
+		case '(':
2008
+		    inner++;
2009
+		    break;
2010
+		case ')':
2011
+		    inner--;
2012
+		    break;
1957 2013
 		}
1958
-		cli_strlcat(sexpr_new, "()", sexpr_len);
1959
-		cli_errmsg("returned ac_addspecial_alt %d\n", ret);
1960
-	    } else if (term == '|') {
1961
-		/* push special */
1962
-		cli_errmsg("encountered '|': %s\n", sexpr_new);
1963
-		memset(sexpr_new, 0, sexpr_len);
1964
-		scnt++;
1965
-	    } else if (term == '\0') {
1966
-		/* push special and break */
1967
-		cli_errmsg("encountered FOE: %s\n", sexpr_new);
1968
-		scnt++;
1969
-		break;
1970
-	    } else {
1971
-		cli_errmsg("ac_addspecial_alt: Unexpected end of expression: %s\n", sexpr);
1972
-		/* TODO - clean up */
2014
+		ept++;
1973 2015
 	    }
1974
-
1975
-	    sexpr = spt;
2016
+	    if (*ept == '|')
2017
+		ept++;
2018
+
2019
+	    sexpr = ept;
2020
+	    *fp = 0;
2021
+	} else if (term == '\0') {
2022
+	    cli_altnmsg("export: %s\n", subexpr);
2023
+	    if ((ret = ac_addspecial_add_alt_node(subexpr, special, root)) != CL_SUCCESS)
2024
+		return ret;
2025
+	    break;
1976 2026
 	}
1977 2027
 
1978
-	free(sexpr_new);
1979
-	if ((scnt != num) || (spt - hexprcpy - 1 != len)) {
1980
-	    //cli_errmsg("scnt %d num %d, parsed %d len %d\n", scnt, num, spt - hexprcpy, len);
1981
-	    cli_errmsg("ac_addspecial: Mismatch in parsed and expected signature\n");
1982
-	    free(hexprcpy);
1983
-	    return CL_EPARSE;
1984
-	} else
1985
-	    cli_errmsg("subexpr cnt OK\n");
2028
+	//cli_altnmsg("%d %d\n", lvl, maxlvl);
2029
+	if (lvl != maxlvl)
2030
+	    return CL_SUCCESS;
1986 2031
     }
2032
+    if (scnt != numexpr) {
2033
+	//cli_altnmsg("scnt %d numexpr %d\n", scnt, numexpr);
2034
+	cli_errmsg("ac_addspecial: Mismatch in parsed and expected signature\n");
2035
+	return CL_EMALFDB;
2036
+    }
2037
+#if ALTN_DEBUG
2038
+    else
2039
+	cli_altnmsg("subexpr cnt OK\n");
2040
+#endif
1987 2041
 
1988
-    free(hexprcpy);
1989 2042
     return CL_SUCCESS;
1990 2043
 }
1991 2044
 
1992
-#define ARBITRARY_NEST_LIMIT 100 /* 0 = NO nesting (sorry birds...) */
1993
-/* special block '(', ')' handler */
1994
-inline static int ac_addspecial(const char *hexsig)
2045
+/* alternate string specials (so many specials!) */
2046
+inline static int ac_special_altstr(const char *hexpr, struct cli_ac_special *special, struct cli_matcher *root)
1995 2047
 {
1996
-    struct cli_ac_special *newspecial;
1997
-    char *start, *pt, *hexcpy;
1998
-    int nest, ret;
2048
+    char *hexprcpy, *h, *c;
2049
+    int i, ret, num, fixed, slen, len;
2050
+
2051
+    //cli_errmsg("called ac_special_altstr\n");
1999 2052
 
2000
-    if (!(hexcpy = cli_strdup(hexsig))) {
2001
-	cli_errmsg("ac_addspecial: Can't duplicate hexsig\n");
2053
+    if (!(hexprcpy = cli_strdup(hexpr))) {
2054
+	cli_errmsg("ac_special_altstr: Can't duplicate alternate expression\n");
2002 2055
 	return CL_EDUP;
2003 2056
     }
2004 2057
 
2005
-    start = pt = hexcpy;
2006
-    /* search for parenthesis sequence */
2007
-    while((pt = strchr(start, '('))) {
2008
-	int neg = 0;
2009
-	*pt++ = 0;
2058
+    len = strlen(hexpr);
2059
+    num = ac_analyze_expr(hexprcpy, &fixed, &slen);
2010 2060
 
2011
-	/* check for negation */
2012
-	if(pt >= hexcpy + 2) {
2013
-	    if(pt[-2] == '!') {
2014
-		neg = 1;
2015
-		pt[-2] = 0;
2061
+    cli_altnmsg("-----------------------------------\n");
2062
+    cli_altnmsg("hexpr: %s\n", hexprcpy);
2063
+    cli_altnmsg("%d strings of %d len %s\n", num, slen, fixed ? "(fixed)" : "(max)");
2064
+    cli_altnmsg("-----------------------------------\n");
2065
+
2066
+    if (fixed) {
2067
+	special->num = 0;
2068
+	special->len = slen / 2;
2069
+	/* single-bytes are len 2 in hex */
2070
+	if (slen == 2) {
2071
+	    cli_altnmsg("ac_special_altstr: discovered AC_SPECIAL_ALT_CHAR\n");
2072
+	    special->type = AC_SPECIAL_ALT_CHAR;
2073
+	    (special->alt).byte = (unsigned char *) mpool_malloc(root->mempool, num);
2074
+	    if (!((special->alt).byte)) {
2075
+		cli_errmsg("cli_ac_special_altstr: Can't allocate newspecial->str\n");
2076
+		free(hexprcpy);
2077
+		return CL_EMEM;
2078
+	    }
2079
+	} else {
2080
+	    cli_altnmsg("ac_special_altstr: discovered AC_SPECIAL_ALT_STR_FIXED\n");
2081
+	    special->type = AC_SPECIAL_ALT_STR_FIXED;
2082
+	    (special->alt).f_str = (unsigned char **) mpool_malloc(root->mempool, num);
2083
+	    if (!((special->alt).f_str)) {
2084
+		cli_errmsg("cli_ac_special_altstr: Can't allocate newspecial->str\n");
2085
+		free(hexprcpy);
2086
+		return CL_EMEM;
2016 2087
 	    }
2017 2088
 	}
2018 2089
 
2019
-	/* determine if nesting is occuring */
2020
-	nest = find_paren_end(pt, &start);
2021
-	if (!start) {
2022
-	    cli_errmsg("ac_addspecial: Missing closing parenthesis\n");
2023
-	    return CL_EMALFDB;
2090
+	for (i = 0; i < num; i++) {
2091
+	    if (num == 1) {
2092
+		c = (char *) cli_mpool_hex2str(root->mempool, hexprcpy);
2093
+	    } else {
2094
+		if(!(h = cli_strtok(hexprcpy, i, "|"))) {
2095
+		    free(hexprcpy);
2096
+		    return CL_EMEM;
2097
+		}
2098
+		c = (char *) cli_mpool_hex2str(root->mempool, h);
2099
+		free(h);
2100
+	    }
2101
+	    if (!c) {
2102
+		free(hexprcpy);
2103
+		return CL_EMALFDB;
2104
+	    }
2105
+
2106
+	    if (special->type == AC_SPECIAL_ALT_CHAR) {
2107
+		(special->alt).byte[i] = *c;
2108
+		mpool_free(root->mempool, c);
2109
+	    } else {
2110
+		(special->alt).f_str[i] = c;
2111
+	    }
2112
+	    special->num++;
2024 2113
 	}
2025
-	*start++ = 0;
2026
-	if (!strlen(pt)) {
2027
-	    cli_errmsg("ac_addspecial: Empty block\n");
2028
-	    free(hexcpy);
2114
+	/* sorting byte alternates */
2115
+	if (special->num > 1 && special->type == AC_SPECIAL_ALT_CHAR)
2116
+	    cli_qsort((special->alt).byte, special->num, sizeof(unsigned char), qcompare);
2117
+	/* TODO - sorting str alternates */
2118
+    } else { /* generic alternates */
2119
+	char *subexpr;
2120
+	cli_altnmsg("ac_special_altstr: discovered AC_SPECIAL_ALT_STR\n");
2121
+	if (special->negative) {
2122
+	    cli_errmsg("ac_special_altstr: Can't apply negation operation to generic alternate strings\n");
2123
+	    free(hexprcpy);
2029 2124
 	    return CL_EMALFDB;
2030 2125
 	}
2031 2126
 
2032
-	cli_errmsg("nest %d\n", nest);
2127
+	special->type = AC_SPECIAL_ALT_STR;
2033 2128
 
2034
-	if (nest > ARBITRARY_NEST_LIMIT) {
2035
-	    cli_errmsg("ac_addspecial: We've gone too deep!\n");
2036
-	    free(hexcpy);
2037
-	    return CL_EMALFDB;
2129
+	/* allocate reusable subexpr */
2130
+	if (!(subexpr = cli_calloc(slen+1, sizeof(char)))) {
2131
+	    cli_errmsg("ac_special_altstr: Can't allocate subexpr container\n");
2132
+	    return CL_EMEM;
2038 2133
 	}
2039 2134
 
2040
-	/* character class handling */
2041
-	if(!strcmp(hexpr, "B")) {
2042
-	    cli_errmsg("encountered 'B': ()\n");
2043
-	} else if(!strcmp(hexpr, "L")) {
2044
-	    cli_errmsg("encountered 'L': ()\n");
2045
-	} else if(!strcmp(hexpr, "W")) {
2046
-	    cli_errmsg("encountered 'W': ()\n");
2047
-	} else {
2048
-	    if ((ret = ac_addspecial_alt(pt, neg)) != CL_SUCCESS) {
2049
-		cli_errmsg("returned ac_addspecial_alt %d\n", ret);
2050
-		free(hexcpy);
2051
-		return ret;
2052
-	    }
2135
+	// static int ac_special_altexpand(char *hexpr, char *subexpr, uint16_t maxlen, int lvl, int maxlvl, struct cli_ac_special *special, struct cli_matcher *root)
2136
+	ret = ac_special_altexpand(hexprcpy, subexpr, slen+1, 0, 0, special, root);
2137
+
2138
+#if ALTN_DEBUG
2139
+	struct cli_alt_node *node = (special->alt).v_str;
2140
+	while (node) {
2141
+	    cli_errmsg("%d: %s\n", node->len, node->str);
2142
+	    node = node->next;
2053 2143
 	}
2144
+#endif
2145
+
2146
+	free(subexpr);
2147
+	free(hexprcpy);
2148
+	return ret;
2054 2149
     }
2055 2150
 
2056
-    free(hexcpy);
2151
+    free(hexprcpy);
2057 2152
     return CL_SUCCESS;
2058 2153
 }
2059 2154
 
2155
+#define ARBITRARY_NEST_LIMIT 100 /* 0 = NO nesting (sorry birds...) */
2060 2156
 /* FIXME: clean up the code */
2061 2157
 int cli_ac_addsig(struct cli_matcher *root, const char *virname, const char *hexsig, uint8_t sigopts, uint32_t sigid, uint16_t parts, uint16_t partno, uint16_t rtype, uint16_t type, uint32_t mindist, uint32_t maxdist, const char *offset, const uint32_t *lsigid, unsigned int options)
2062 2158
 {
... ...
@@ -2190,7 +2331,8 @@ int cli_ac_addsig(struct cli_matcher *root, const char *virname, const char *hex
2190 2190
     }
2191 2191
 
2192 2192
     if(strchr(hexsig, '(')) {
2193
-	    char *hexnew, *start, *h, *c;
2193
+	    char *hexnew, *start;
2194
+	    uint8_t nest;
2194 2195
 	    size_t hexnewsz;
2195 2196
 
2196 2197
 	if(hex) {
... ...
@@ -2200,8 +2342,6 @@ int cli_ac_addsig(struct cli_matcher *root, const char *virname, const char *hex
2200 2200
 	    return CL_EMEM;
2201 2201
 	}
2202 2202
 
2203
-	ac_addspecial(hexcpy);
2204
-
2205 2203
 	hexnewsz = strlen(hexsig) + 1;
2206 2204
 	if(!(hexnew = (char *) cli_calloc(1, hexnewsz))) {
2207 2205
 	    free(new);
... ...
@@ -2231,7 +2371,9 @@ int cli_ac_addsig(struct cli_matcher *root, const char *virname, const char *hex
2231 2231
 	    }
2232 2232
 	    cli_strlcat(hexnew, start, hexnewsz);
2233 2233
 
2234
-	    if(!(start = strchr(pt, ')'))) {
2234
+	    nest = find_paren_end(pt, &start);
2235
+	    if(!start) {
2236
+		cli_errmsg("cli_ac_addsig: Missing closing parenthesis\n");
2235 2237
 		mpool_free(root->mempool, newspecial);
2236 2238
 		error = CL_EMALFDB;
2237 2239
 		break;
... ...
@@ -2239,10 +2381,17 @@ int cli_ac_addsig(struct cli_matcher *root, const char *virname, const char *hex
2239 2239
 	    *start++ = 0;
2240 2240
 	    if(!strlen(pt)) {
2241 2241
 		cli_errmsg("cli_ac_addsig: Empty block\n");
2242
+		mpool_free(root->mempool, newspecial);
2242 2243
 		error = CL_EMALFDB;
2243 2244
 		break;
2244 2245
 	    }
2245 2246
 
2247
+	    if (nest > ARBITRARY_NEST_LIMIT) {
2248
+		cli_errmsg("ac_addspecial: We've gone too deep!\n");
2249
+		free(hexcpy);
2250
+		return CL_EMALFDB;
2251
+	    }
2252
+
2246 2253
 	    if(!strcmp(pt, "B")) {
2247 2254
 		if(!*start) {
2248 2255
 		    new->boundary |= AC_BOUNDARY_RIGHT;
... ...
@@ -2306,72 +2455,11 @@ int cli_ac_addsig(struct cli_matcher *root, const char *virname, const char *hex
2306 2306
 	    } else if(!strcmp(pt, "W")) {
2307 2307
 		newspecial->type = AC_SPECIAL_WORD_MARKER;
2308 2308
 	    } else {
2309
-		newspecial->num = 1;
2310
-		for(i = 0; i < strlen(pt); i++)
2311
-		    if(pt[i] == '|')
2312
-			newspecial->num++;
2313
-
2314
-		if(3 * newspecial->num - 1 == (uint16_t) strlen(pt)) {
2315
-		    newspecial->type = AC_SPECIAL_ALT_CHAR;
2316
-		    newspecial->str = (unsigned char *) mpool_malloc(root->mempool, newspecial->num);
2317
-		    if(!newspecial->str) {
2318
-			cli_errmsg("cli_ac_addsig: Can't allocate newspecial->str\n");
2319
-			error = CL_EMEM;
2320
-			break;
2321
-		    }
2322
-		} else {
2323
-		    newspecial->type = AC_SPECIAL_ALT_STR;
2324
-		}
2325
-
2326
-		for(i = 0; i < newspecial->num; i++) {
2327
-			unsigned int clen;
2328
-
2329
-		    if(newspecial->num == 1) {
2330
-			c = (char *) cli_mpool_hex2str(root->mempool, pt);
2331
-			clen = strlen(pt) / 2;
2332
-		    } else {
2333
-			if(!(h = cli_strtok(pt, i, "|"))) {
2334
-			    error = CL_EMEM;
2335
-			    break;
2336
-			}
2337
-			c = (char *) cli_mpool_hex2str(root->mempool, h);
2338
-			clen = strlen(h) / 2;
2339
-			free(h);
2340
-		    }
2341
-		    if(!c) {
2342
-			error = CL_EMALFDB;
2343
-			break;
2344
-		    }
2345
-
2346
-		    if(newspecial->type == AC_SPECIAL_ALT_CHAR) {
2347
-			newspecial->str[i] = *c;
2348
-			mpool_free(root->mempool, c);
2349
-		    } else {
2350
-			if(i) {
2351
-			    specialpt = newspecial;
2352
-			    while(specialpt->next)
2353
-				specialpt = specialpt->next;
2354
-
2355
-			    specialpt->next = (struct cli_ac_special *) mpool_calloc(root->mempool, 1, sizeof(struct cli_ac_special));
2356
-			    if(!specialpt->next) {
2357
-				cli_errmsg("cli_ac_addsig: Can't allocate specialpt->next\n");
2358
-				error = CL_EMEM;
2359
-				free(c);
2360
-				break;
2361
-			    }
2362
-			    specialpt->next->str = (unsigned char *) c;
2363
-			    specialpt->next->len = clen;
2364
-			} else {
2365
-			    newspecial->str = (unsigned char *) c;
2366
-			    newspecial->len = clen;
2367
-			}
2368
-		    }
2369
-		}
2370
-		if(newspecial->num > 1 && newspecial->type == AC_SPECIAL_ALT_CHAR)
2371
-		    cli_qsort(newspecial->str, newspecial->num, sizeof(unsigned char), qcompare);
2372
-
2373
-		if(error)
2309
+		if ((ret = ac_special_altstr(pt, newspecial, root)) != CL_SUCCESS) {
2310
+		    //cli_altnmsg("returned ac_special_altstr %d\n", ret);
2311
+		    error = ret;
2374 2312
 		    break;
2313
+		}
2375 2314
 	    }
2376 2315
 	}
2377 2316
 
... ...
@@ -67,9 +67,21 @@ struct cli_ac_data {
67 67
     uint32_t min_partno;
68 68
 };
69 69
 
70
-struct cli_ac_special {
70
+struct cli_alt_node {
71
+    //uint16_t *str;
71 72
     unsigned char *str;
72
-    struct cli_ac_special *next;
73
+    uint16_t len;
74
+    struct cli_alt_node *next;
75
+};
76
+
77
+struct cli_ac_special {
78
+    union {
79
+        //uint16_t *byte;
80
+        //uint16_t **f_str;
81
+        unsigned char *byte;
82
+        unsigned char **f_str;
83
+        struct cli_alt_node *v_str;
84
+    } alt;
73 85
     uint16_t len, num;
74 86
     uint16_t type, negative;
75 87
 };