Browse code

ac_findmatch branching variant for alternating strings

Kevin Lin authored on 2015/05/07 02:34:15
Showing 1 changed files
... ...
@@ -832,117 +832,128 @@ int cli_ac_chklsig(const char *expr, const char *end, uint32_t *lsigcnt, unsigne
832 832
     }
833 833
 }
834 834
 
835
-/* 
836
- * FIXME: the current support for string alternatives uses a brute-force
837
- *        approach and doesn't perform any kind of verification and
838
- *        backtracking. This may easily lead to false negatives, eg. when
839
- *        an alternative contains strings of different lengths and 
840
- *        more than one of them can match at the current position.
841
- */
835
+static int ac_findmatch_branch(const unsigned char *buffer, uint32_t offset, uint32_t length, uint32_t fileoffset, const struct cli_ac_patt *pattern, uint32_t pattoffset, uint16_t specialcnt, uint32_t *end);
836
+
837
+/* special handler */
838
+inline static int ac_findmatch_special(const unsigned char *buffer, uint32_t offset, uint32_t fileoffset, uint32_t length, const struct cli_ac_patt *pattern, uint32_t pattoffset, uint16_t specialcnt, uint32_t *end)
839
+{
840
+    int match;
841
+    uint16_t j, b = buffer[offset];
842
+    struct cli_ac_special *special = pattern->special_table[specialcnt];
843
+
844
+    match = special->negative;
845
+
846
+    switch(special->type) {
847
+    case AC_SPECIAL_ALT_CHAR:
848
+        for(j = 0; j < special->num; j++) {
849
+			if(special->str[j] == b) {
850
+			    match = !special->negative;
851
+			    break;
852
+			} else if(special->str[j] > b)
853
+			    break;
854
+        }
855
+        break;
856
+
857
+    case AC_SPECIAL_ALT_STR:
858
+        /* branch for backtracking */
859
+        while(special) {
860
+            if(offset + special->len <= length) {
861
+                if((!special->negative && !memcmp(&buffer[offset], special->str, special->len)) || special->negative) {
862
+                    match = ac_findmatch_branch(buffer, offset+special->len, fileoffset, length, pattern, pattoffset+1, specialcnt+1, end);
863
+                    if (match)
864
+                        return -1; /* contrary to popular belief, this is good */
865
+                }
866
+            }
867
+            special = special->next;
868
+        }
869
+        break;
870
+
871
+    case AC_SPECIAL_LINE_MARKER:
872
+        if(b == '\n')
873
+            match = !special->negative;
874
+        else if(b == '\r' && (offset + 1 < length && buffer[offset + 1] == '\n'))
875
+            match = 2 * (!special->negative);
876
+        break;
877
+
878
+    case AC_SPECIAL_BOUNDARY:
879
+        if(boundary[b])
880
+            match = !special->negative;
881
+        break;
882
+
883
+    case AC_SPECIAL_WORD_MARKER:
884
+        if(!isalnum(b))
885
+            match = !special->negative;
886
+        break;
887
+
888
+    default:
889
+        cli_errmsg("ac_findmatch: Unknown special\n");
890
+        match = 0;
891
+    }
892
+
893
+    return match;
894
+}
895
+
896
+/* call only by ac_findmatch_branch! */
842 897
 #define AC_MATCH_CHAR(p,b)								\
843 898
     switch(wc = p & CLI_MATCH_METADATA) {						\
844
-	case CLI_MATCH_CHAR:								\
845
-	    if((unsigned char) p != b)							\
846
-		match = 0;								\
847
-	    break;									\
848
-											\
849
-	case CLI_MATCH_NOCASE:								\
850
-	    if((unsigned char)(p & 0xff) != cli_nocase(b))				\
851
-		match = 0;								\
852
-	    break;									\
853
-											\
854
-	case CLI_MATCH_IGNORE:								\
855
-	    break;									\
856
-											\
857
-	case CLI_MATCH_SPECIAL:								\
858
-	    special = pattern->special_table[specialcnt];				\
859
-	    match = special->negative;							\
860
-	    switch(special->type) {							\
861
-		case AC_SPECIAL_ALT_CHAR:						\
862
-		    for(j = 0; j < special->num; j++) {					\
863
-			if(special->str[j] == b) {					\
864
-			    match = !special->negative;					\
865
-			    break;							\
866
-			} else if(special->str[j] > b)					\
867
-			    break;							\
868
-		    }									\
869
-		    break;								\
870
-											\
871
-		case AC_SPECIAL_ALT_STR:						\
872
-		    while(special) {							\
873
-			if(bp + special->len <= length) {				\
874
-			    if(!memcmp(&buffer[bp], special->str, special->len)) {	\
875
-				match = !special->negative;				\
876
-				bp += special->len - 1;					\
877
-				break;							\
878
-			    }								\
879
-			}								\
880
-			special = special->next;					\
881
-		    }									\
882
-		    break;								\
883
-											\
884
-		case AC_SPECIAL_LINE_MARKER:						\
885
-		    if(b == '\n') {							\
886
-			match = !special->negative;					\
887
-		    } else if(b == '\r' && (bp + 1 < length && buffer[bp + 1] == '\n')) {   \
888
-			bp++;								\
889
-			match = !special->negative;					\
890
-		    }									\
891
-		    break;								\
892
-											\
893
-		case AC_SPECIAL_BOUNDARY:						\
894
-		    if(boundary[b])							\
895
-			match = !special->negative;					\
896
-		    break;								\
899
+    case CLI_MATCH_CHAR:								\
900
+	if((unsigned char) p != b)							\
901
+	    match = 0;									\
902
+	break;										\
897 903
 											\
898
-		case AC_SPECIAL_WORD_MARKER:						\
899
-		    if(!isalnum(b))							\
900
-			match = !special->negative;					\
901
-		    break;								\
904
+    case CLI_MATCH_NOCASE:								\
905
+	if((unsigned char)(p & 0xff) != cli_nocase(b))					\
906
+	    match = 0;									\
907
+	break;										\
902 908
 											\
903
-		default:								\
904
-		    cli_errmsg("ac_findmatch: Unknown special\n");			\
905
-		    match = 0;								\
906
-	    }										\
907
-	    specialcnt++;								\
908
-	    break;									\
909
+    case CLI_MATCH_IGNORE:								\
910
+	break;										\
909 911
 											\
910
-	case CLI_MATCH_NIBBLE_HIGH:							\
911
-	    if((unsigned char) (p & 0x00f0) != (b & 0xf0))				\
912
-		match = 0;								\
913
-	    break;									\
912
+    case CLI_MATCH_SPECIAL:								\
913
+	/* >1 = movement, 0 = fail, <1 = resolved in branch */				\
914
+	if ((match = ac_findmatch_special(buffer, bp, fileoffset, length, pattern, i,	\
915
+	    specialcnt, end)) <= 0)							\
916
+	    return match;								\
917
+	bp += match;									\
918
+	specialcnt++;									\
919
+	continue; /* match value includes bp++ */					\
914 920
 											\
915
-	case CLI_MATCH_NIBBLE_LOW:							\
916
-	    if((unsigned char) (p & 0x000f) != (b & 0x0f))				\
917
-		match = 0;								\
918
-	    break;									\
921
+    case CLI_MATCH_NIBBLE_HIGH:								\
922
+	if((unsigned char) (p & 0x00f0) != (b & 0xf0))					\
923
+	    match = 0;									\
924
+	break;										\
919 925
 											\
920
-	default:									\
921
-	    cli_errmsg("ac_findmatch: Unknown wildcard 0x%x\n", wc);			\
926
+    case CLI_MATCH_NIBBLE_LOW:								\
927
+	if((unsigned char) (p & 0x000f) != (b & 0x0f))					\
922 928
 	    match = 0;									\
929
+	break;										\
930
+											\
931
+    default:										\
932
+	cli_errmsg("ac_findmatch: Unknown wildcard 0x%x\n", wc);			\
933
+	match = 0;									\
923 934
     }
924 935
 
925
-inline static int ac_findmatch(const unsigned char *buffer, uint32_t offset, uint32_t fileoffset, uint32_t length, const struct cli_ac_patt *pattern, uint32_t *end)
926
-{
927
-    uint32_t bp, pstart, match;
928
-    uint16_t wc, i, j, specialcnt = pattern->special_pattern;
929
-    struct cli_ac_special *special;
930 936
 
931
-    if((offset + pattern->length > length) || (pattern->prefix_length > offset))
932
-        return 0;
937
+/* state should reset on call, recursion depth = number of alternate specials */
938
+static int ac_findmatch_branch(const unsigned char *buffer, uint32_t offset, uint32_t fileoffset, uint32_t length, const struct cli_ac_patt *pattern, uint32_t pattoffset, uint16_t specialcnt, uint32_t *end)
939
+{
940
+    int match;
941
+    uint32_t bp;
942
+    uint16_t wc, i;
933 943
 
934
-    bp = offset + pattern->depth;
944
+    bp = offset;
935 945
 
936 946
     match = 1;
937
-    for(i = pattern->depth; i < pattern->length && bp < length; i++) {
938
-        AC_MATCH_CHAR(pattern->pattern[i],buffer[bp]);
939
-        if(!match)
940
-            return 0;
947
+    for(i = pattoffset; i < pattern->length && bp < length; i++) {
948
+	AC_MATCH_CHAR(pattern->pattern[i],buffer[bp]);
949
+	if (!match)
950
+	    return 0;
941 951
 
942 952
         bp++;
943 953
     }
944 954
     *end = bp;
945 955
 
956
+    /* special boundary checks */
946 957
     if(pattern->boundary & AC_BOUNDARY_LEFT) {
947 958
         match = !!(pattern->boundary & AC_BOUNDARY_LEFT_NEGATIVE);
948 959
         if(!fileoffset || (offset && (boundary[buffer[offset - 1]] == 1 || boundary[buffer[offset - 1]] == 3)))
... ...
@@ -1020,6 +1031,7 @@ inline static int ac_findmatch(const unsigned char *buffer, uint32_t offset, uin
1020 1020
             return 0;
1021 1021
     }
1022 1022
 
1023
+    /* single-byte anchors */
1023 1024
     if(!(pattern->ch[1] & CLI_MATCH_IGNORE)) {
1024 1025
         bp += pattern->ch_mindist[1];
1025 1026
 
... ...
@@ -1077,6 +1089,20 @@ inline static int ac_findmatch(const unsigned char *buffer, uint32_t offset, uin
1077 1077
     return 1;
1078 1078
 }
1079 1079
 
1080
+inline static int ac_findmatch(const unsigned char *buffer, uint32_t offset, uint32_t fileoffset, uint32_t length, const struct cli_ac_patt *pattern, uint32_t *end)
1081
+{
1082
+    int match;
1083
+    uint16_t specialcnt = pattern->special_pattern;
1084
+
1085
+    if((offset + pattern->length > length) || (pattern->prefix_length > offset))
1086
+        return 0;
1087
+
1088
+    match = ac_findmatch_branch(buffer, offset+pattern->depth, fileoffset, length, pattern, pattern->depth, specialcnt, end);
1089
+    if(match)
1090
+	return 1;
1091
+    return 0;
1092
+}
1093
+
1080 1094
 int cli_ac_initdata(struct cli_ac_data *data, uint32_t partsigs, uint32_t lsigs, uint32_t reloffsigs, uint8_t tracklen)
1081 1095
 {
1082 1096
     unsigned int i, j;