...
|
...
|
@@ -1527,11 +1527,14 @@ void dump_tree(struct tree_node* root)
|
1527
|
1527
|
*
|
1528
|
1528
|
* "go to parent" below actually means, return from recursive call.
|
1529
|
1529
|
*
|
|
1530
|
+ * fail_action: we need to return to closest failure point (recursive call point),
|
|
1531
|
+ * and switch current node to node pointed by fail_action
|
|
1532
|
+ *
|
1530
|
1533
|
* Node types:
|
1531
|
1534
|
* OP_ROOT: contains information that applies to the entire trie.
|
1532
|
1535
|
* it can only appear as root node, and not as child node.
|
1533
|
1536
|
* On child fail: match has failed
|
1534
|
|
- * This is a recursive call point
|
|
1537
|
+ * This is NOT a recursive call point
|
1535
|
1538
|
* OP_CHAR_BINARY_SEARCH: chooses a sub-trie, based on current character;
|
1536
|
1539
|
* using binary-search
|
1537
|
1540
|
* On fail: go to node indicated by fail_action, or if
|
...
|
...
|
@@ -1548,6 +1551,8 @@ void dump_tree(struct tree_node* root)
|
1548
|
1548
|
* On fail: fail_action, or parent if NULL
|
1549
|
1549
|
* On child fail: reduce match repeat count, try again on child, if
|
1550
|
1550
|
* repeat count<min_range, execute fail of current node
|
|
1551
|
+ * Also has a bitmap on what characters are accepted beyond it,
|
|
1552
|
+ * as an optimizations for the case, when a maximum match isn't possible
|
1551
|
1553
|
* Not recomended to use this when min_range=max_range=1
|
1552
|
1554
|
* This is a recursive call point
|
1553
|
1555
|
* OP_DOT_REPEAT: like OP_CHAR_REPEAT but accept any character
|
...
|
...
|
@@ -1577,6 +1582,7 @@ void dump_tree(struct tree_node* root)
|
1577
|
1577
|
*
|
1578
|
1578
|
*/
|
1579
|
1579
|
|
|
1580
|
+#include <string.h>
|
1580
|
1581
|
#include "cltypes.h"
|
1581
|
1582
|
#include "others.h"
|
1582
|
1583
|
|
...
|
...
|
@@ -1624,7 +1630,8 @@ struct trie_node_binary_search
|
1624
|
1624
|
struct trie_node node;
|
1625
|
1625
|
uint8_t children_count;/* number of children to search among -1! 255 = 256 children*/
|
1626
|
1626
|
struct trie_node* fail_action;
|
1627
|
|
- struct trie_node** children;
|
|
1627
|
+ unsigned char* char_choices;/* children_count elements */
|
|
1628
|
+ struct trie_node** children;/*children_count elements */
|
1628
|
1629
|
};
|
1629
|
1630
|
|
1630
|
1631
|
struct trie_node_alternatives
|
...
|
...
|
@@ -1642,6 +1649,9 @@ struct trie_node_char_repeat
|
1642
|
1642
|
struct trie_node node;
|
1643
|
1643
|
unsigned char character;
|
1644
|
1644
|
uint8_t range_min, range_max;/* according to POSIX we need not support more than 255 repetitions*/
|
|
1645
|
+ struct char_bitmap* bitmap_accept_after;/* bitmap of characters accepted after this,
|
|
1646
|
+ to optimize repeat < max_range case; if its NULL
|
|
1647
|
+ there is no optimization*/
|
1645
|
1648
|
struct trie_node* child;
|
1646
|
1649
|
struct trie_node* fail_action;
|
1647
|
1650
|
};
|
...
|
...
|
@@ -1650,6 +1660,9 @@ struct trie_node_dot_repeat
|
1650
|
1650
|
{
|
1651
|
1651
|
struct trie_node node;
|
1652
|
1652
|
uint8_t range_min, range_max;/* according to POSIX we need not support more than 255 repetitions*/
|
|
1653
|
+ struct char_bitmap* bitmap_accept_after;/* bitmap of characters accepted after this,
|
|
1654
|
+ to optimize repeat < max_range case; if its NULL
|
|
1655
|
+ there is no optimization*/
|
1653
|
1656
|
struct trie_node* child;
|
1654
|
1657
|
struct trie_node* fail_action;
|
1655
|
1658
|
};
|
...
|
...
|
@@ -1674,22 +1687,65 @@ struct trie_node_strcmp
|
1674
|
1674
|
uint8_t string_length;/* for longer strings a sequence of node_strcmp should be used */
|
1675
|
1675
|
unsigned char* string;
|
1676
|
1676
|
struct trie_node* child;
|
1677
|
|
- struct trie_node** fail_actions;/* this has string_length elements */
|
|
1677
|
+ struct trie_node** fail_actions;/* this has string_length elements, or NULL if no fail_actions are computed */
|
1678
|
1678
|
};
|
1679
|
1679
|
|
1680
|
1680
|
struct trie_node_char_class_repeat
|
1681
|
1681
|
{
|
1682
|
1682
|
struct trie_node node;
|
1683
|
1683
|
struct char_bitmap* bitmap;
|
|
1684
|
+ struct char_bitmap* bitmap_accept_after;
|
1684
|
1685
|
uint8_t range_min, range_max;
|
1685
|
1686
|
struct trie_node* child;
|
1686
|
1687
|
struct trie_node* fail_action;
|
1687
|
1688
|
};
|
1688
|
1689
|
|
|
1690
|
+static inline int bitmap_accepts(const struct char_bitmap* bitmap, const char c)
|
|
1691
|
+{
|
|
1692
|
+ /* TODO: check if c is accepted by bitmap */
|
|
1693
|
+ return 0;
|
|
1694
|
+}
|
|
1695
|
+
|
|
1696
|
+#define MATCH_FAILED 0
|
|
1697
|
+#define MATCH_OK 1
|
1689
|
1698
|
|
1690
|
|
-static int match_node(const struct trie_node* node)
|
|
1699
|
+#define FAIL_ACTION( fail_node ) (*fail_action = (fail_node), MATCH_FAILED)
|
|
1700
|
+
|
|
1701
|
+
|
|
1702
|
+#ifndef MIN
|
|
1703
|
+#define MIN(a,b) ((a)<(b) ? (a) : (b))
|
|
1704
|
+#endif
|
|
1705
|
+
|
|
1706
|
+static int match_node(const struct trie_node* node, const unsigned char* text, const unsigned char* text_end, const struct trie_node** fail_action);
|
|
1707
|
+
|
|
1708
|
+static int match_repeat(const unsigned char* text, const unsigned char* text_end, const size_t range_min, const size_t repeat_start,
|
|
1709
|
+ const struct char_bitmap* bitmap_accept_after, const struct trie_node* child, const struct trie_node** fail_action,
|
|
1710
|
+ const struct trie_node* this_fail_action)
|
1691
|
1711
|
{
|
1692
|
|
- while(node->type != OP_MATCH_OK) {
|
|
1712
|
+ size_t i;
|
|
1713
|
+ for(i = repeat_start;i > range_min;i--) {
|
|
1714
|
+ if(!bitmap_accept_after || bitmap_accepts( bitmap_accept_after, text[i-1])) {
|
|
1715
|
+ int rc = match_node(child, &text[i], text_end, fail_action);
|
|
1716
|
+ /* ignore fail_action for now, we have the bitmap_accepts_after optimization */
|
|
1717
|
+ if(rc) {
|
|
1718
|
+ return MATCH_OK;
|
|
1719
|
+ }
|
|
1720
|
+ }
|
|
1721
|
+ }
|
|
1722
|
+ if(!range_min) {
|
|
1723
|
+ /* this match is optional, try child only */
|
|
1724
|
+ int rc = match_node(child, text, text_end, fail_action);
|
|
1725
|
+ if(rc) {
|
|
1726
|
+ return MATCH_OK;
|
|
1727
|
+ }
|
|
1728
|
+ }
|
|
1729
|
+ return FAIL_ACTION(this_fail_action);
|
|
1730
|
+}
|
|
1731
|
+
|
|
1732
|
+/* text_end points to \0 in text */
|
|
1733
|
+static int match_node(const struct trie_node* node, const unsigned char* text, const unsigned char* text_end, const struct trie_node** fail_action)
|
|
1734
|
+{
|
|
1735
|
+ while(node && text < text_end) {
|
1693
|
1736
|
switch(node->type) {
|
1694
|
1737
|
case OP_ROOT:
|
1695
|
1738
|
{
|
...
|
...
|
@@ -1698,56 +1754,151 @@ static int match_node(const struct trie_node* node)
|
1698
|
1698
|
break;
|
1699
|
1699
|
}
|
1700
|
1700
|
case OP_CHAR_BINARY_SEARCH:
|
1701
|
|
- {
|
|
1701
|
+ {
|
1702
|
1702
|
const struct trie_node_binary_search* bin_node = container_of_const(node, const struct trie_node_binary_search, node);
|
1703
|
|
- /* TODO: binary search */
|
|
1703
|
+ const unsigned char csearch = *text;
|
|
1704
|
+ size_t mid, left = 0, right = bin_node->children_count-1;
|
|
1705
|
+ while(left<=right) {
|
|
1706
|
+ mid = left+(right-left)/2;
|
|
1707
|
+ if(bin_node->char_choices[mid] == csearch)
|
|
1708
|
+ break;
|
|
1709
|
+ else if(bin_node->char_choices[mid] < csearch)
|
|
1710
|
+ left = mid+1;
|
|
1711
|
+ else
|
|
1712
|
+ right = mid-1;
|
|
1713
|
+ }
|
|
1714
|
+ if(left <= right) {
|
|
1715
|
+ /* match successful */
|
|
1716
|
+ node = bin_node->children[mid];
|
|
1717
|
+ ++text;
|
|
1718
|
+ }
|
|
1719
|
+ else {
|
|
1720
|
+ return FAIL_ACTION( bin_node->fail_action );
|
|
1721
|
+ }
|
1704
|
1722
|
break;
|
1705
|
1723
|
}
|
1706
|
1724
|
case OP_ALTERNATIVES:
|
1707
|
1725
|
{
|
1708
|
1726
|
const struct trie_node_alternatives* alt_node = container_of_const(node, const struct trie_node_alternatives, node);
|
1709
|
|
- /* TODO: op_alt */
|
|
1727
|
+ size_t i;
|
|
1728
|
+ *fail_action = NULL;
|
|
1729
|
+ for(i=0;i < alt_node->alternatives_count;i++) {
|
|
1730
|
+ int rc = match_node(alt_node->children[i], text, text_end, fail_action);
|
|
1731
|
+ if(rc) {
|
|
1732
|
+ return MATCH_OK;
|
|
1733
|
+ }
|
|
1734
|
+ /* supporting fail_actions is tricky,
|
|
1735
|
+ * if we just go to the node specified, what happens if the match fails, and no
|
|
1736
|
+ * further fail_action is specified? We should know where to continue the search.
|
|
1737
|
+ * For now fail_action isn't supported for OP_ALTERNATIVES*/
|
|
1738
|
+ }
|
1710
|
1739
|
break;
|
1711
|
1740
|
}
|
1712
|
1741
|
case OP_CHAR_REPEAT:
|
1713
|
1742
|
{
|
1714
|
1743
|
const struct trie_node_char_repeat* char_rep_node = container_of_const(node, const struct trie_node_char_repeat, node);
|
1715
|
|
- break;
|
|
1744
|
+ const size_t max_len = MIN( text_end - text, char_rep_node->range_max-1);
|
|
1745
|
+ /* todo: what about the 8 bit limitation of range_max, and what about inf (+,*)? */
|
|
1746
|
+ const char caccept = char_rep_node->character;
|
|
1747
|
+ size_t rep;
|
|
1748
|
+
|
|
1749
|
+ if(max_len < char_rep_node->range_min)
|
|
1750
|
+ return FAIL_ACTION(char_rep_node->fail_action);
|
|
1751
|
+
|
|
1752
|
+ for(rep=0;rep < max_len;rep++) {
|
|
1753
|
+ if(text[rep] != caccept) {
|
|
1754
|
+ break;
|
|
1755
|
+ }
|
|
1756
|
+ }
|
|
1757
|
+
|
|
1758
|
+ return match_repeat(text, text_end, char_rep_node->range_min, rep,
|
|
1759
|
+ char_rep_node->bitmap_accept_after, char_rep_node->child, fail_action,
|
|
1760
|
+ char_rep_node->fail_action);
|
1716
|
1761
|
}
|
1717
|
1762
|
case OP_DOT_REPEAT:
|
1718
|
1763
|
{
|
1719
|
1764
|
const struct trie_node_dot_repeat* dot_rep_node = container_of_const(node, const struct trie_node_dot_repeat, node);
|
1720
|
|
- break;
|
|
1765
|
+ const size_t max_len = MIN( text_end - text, dot_rep_node->range_max-1);
|
|
1766
|
+ /* todo: what about the 8 bit limitation of range_max, and what about inf (+,*)? */
|
|
1767
|
+
|
|
1768
|
+ if(max_len < dot_rep_node->range_min)
|
|
1769
|
+ return FAIL_ACTION(dot_rep_node->fail_action);
|
|
1770
|
+
|
|
1771
|
+ return match_repeat(text, text_end, dot_rep_node->range_min, max_len,
|
|
1772
|
+ dot_rep_node->bitmap_accept_after, dot_rep_node->child, fail_action,
|
|
1773
|
+ dot_rep_node->fail_action);
|
1721
|
1774
|
}
|
1722
|
1775
|
case OP_CHAR_CLASS_REPEAT:
|
1723
|
1776
|
{
|
1724
|
1777
|
const struct trie_node_char_class_repeat* class_rep_node = container_of_const(node, const struct trie_node_char_class_repeat, node);
|
|
1778
|
+ const size_t max_len = MIN( text_end - text, class_rep_node->range_max-1);
|
|
1779
|
+ /* todo: what about the 8 bit limitation of range_max, and what about inf (+,*)? */
|
|
1780
|
+ size_t rep;
|
|
1781
|
+
|
|
1782
|
+ if(max_len < class_rep_node->range_min)
|
|
1783
|
+ return FAIL_ACTION(class_rep_node->fail_action);
|
|
1784
|
+
|
|
1785
|
+ for(rep=0;rep < max_len;rep++) {
|
|
1786
|
+ if(!bitmap_accepts( class_rep_node->bitmap, text[rep])) {
|
|
1787
|
+ break;
|
|
1788
|
+ }
|
|
1789
|
+ }
|
|
1790
|
+
|
|
1791
|
+ return match_repeat(text, text_end, class_rep_node->range_min, rep,
|
|
1792
|
+ class_rep_node->bitmap_accept_after, class_rep_node->child, fail_action,
|
|
1793
|
+ class_rep_node->fail_action);
|
1725
|
1794
|
break;
|
1726
|
1795
|
}
|
1727
|
1796
|
case OP_STRCMP:
|
1728
|
1797
|
{
|
1729
|
1798
|
const struct trie_node_strcmp* strcmp_node = container_of_const(node, const struct trie_node_strcmp, node);
|
|
1799
|
+ size_t i;
|
|
1800
|
+ if(strcmp_node->fail_actions) {
|
|
1801
|
+ const size_t max_len = MIN(strcmp_node->string_length, text_end-text);
|
|
1802
|
+ /* we don't use strncmp, because we need the exact match-fail point */
|
|
1803
|
+ for(i=0;i < max_len;i++) {
|
|
1804
|
+ if(text[i] != strcmp_node->string[i]) {
|
|
1805
|
+ return FAIL_ACTION( strcmp_node->fail_actions[i] );
|
|
1806
|
+ }
|
|
1807
|
+ }
|
|
1808
|
+ if(max_len < strcmp_node->string_length) {
|
|
1809
|
+ /* failed, because text was shorter */
|
|
1810
|
+ return FAIL_ACTION( strcmp_node->fail_actions[max_len] );
|
|
1811
|
+ }
|
|
1812
|
+ }
|
|
1813
|
+ else {
|
|
1814
|
+ /* no fail_actions computed, some shortcuts possible on compare */
|
|
1815
|
+ if((text_end - text < strcmp_node->string_length) ||
|
|
1816
|
+ strncmp((const char*)text, (const char*)strcmp_node->string, strcmp_node->string_length)) {
|
|
1817
|
+
|
|
1818
|
+ return FAIL_ACTION( NULL );
|
|
1819
|
+ }
|
|
1820
|
+ }
|
|
1821
|
+ /* match successful */
|
|
1822
|
+ node = strcmp_node->child;
|
|
1823
|
+ text += strcmp_node->string_length;
|
1730
|
1824
|
break;
|
1731
|
1825
|
}
|
1732
|
1826
|
case OP_GROUP_START:
|
1733
|
1827
|
{
|
1734
|
1828
|
const struct trie_node_group_start* group_start_node = container_of_const(node, const struct trie_node_group_start, node);
|
|
1829
|
+ /* TODO: implement */
|
1735
|
1830
|
break;
|
1736
|
1831
|
}
|
1737
|
1832
|
case OP_GROUP_END:
|
1738
|
|
- {
|
|
1833
|
+ {
|
1739
|
1834
|
const struct trie_node_group_end* group_end_node = container_of_const(node, const struct trie_node_group_end, node);
|
|
1835
|
+ /* TODO: implement */
|
1740
|
1836
|
break;
|
1741
|
1837
|
}
|
1742
|
|
- default:
|
|
1838
|
+ case OP_MATCH_OK:
|
1743
|
1839
|
{
|
1744
|
|
- cli_warnmsg("Unimplemented node type:%d", node->type);
|
1745
|
|
- return 0;
|
1746
|
|
- break;
|
|
1840
|
+ return MATCH_OK;
|
1747
|
1841
|
}
|
1748
|
1842
|
}
|
1749
|
1843
|
}
|
1750
|
|
- return 1;/* match */
|
|
1844
|
+ /* if fail_action was NULL, or text ended*/
|
|
1845
|
+ return MATCH_FAILED;
|
1751
|
1846
|
}
|
1752
|
1847
|
|
1753
|
1848
|
#endif
|