Browse code

matcher-ac: restructed ac verification

Kevin Lin authored on 2015/07/07 08:05:36
Showing 1 changed files
... ...
@@ -860,8 +860,12 @@ int cli_ac_chklsig(const char *expr, const char *end, uint32_t *lsigcnt, unsigne
860 860
     }
861 861
 }
862 862
 
863
-inline static int ac_findmatch_special(const unsigned char *buffer, uint32_t offset, uint32_t bp, uint32_t fileoffset, uint32_t length, const struct cli_ac_patt *pattern, uint32_t pattoffset, uint16_t specialcnt, uint32_t *end);
864
-static int ac_findmatch_branch(const unsigned char *buffer, uint32_t offset, uint32_t bp, uint32_t length, uint32_t fileoffset, const struct cli_ac_patt *pattern, uint32_t pattoffset, uint16_t specialcnt, uint32_t *end);
863
+inline static int ac_findmatch_special(const unsigned char *buffer, uint32_t offset, uint32_t bp, uint32_t fileoffset, uint32_t length,
864
+                                       const struct cli_ac_patt *pattern, uint32_t pp, uint16_t specialcnt, uint32_t *start, uint32_t *end, int rev);
865
+static int ac_backward_match_branch(const unsigned char *buffer, uint32_t bp, uint32_t offset, uint32_t length, uint32_t fileoffset,
866
+                                    const struct cli_ac_patt *pattern, uint32_t pp, uint16_t specialcnt, uint32_t *start, uint32_t *end);
867
+static int ac_forward_match_branch(const unsigned char *buffer, uint32_t bp, uint32_t offset, uint32_t length, uint32_t fileoffset,
868
+                                   const struct cli_ac_patt *pattern, uint32_t pp, uint16_t specialcnt, uint32_t *start, uint32_t *end);
865 869
 
866 870
 /* call only by ac_findmatch_special! Does not handle recursive specials */
867 871
 #define AC_MATCH_CHAR2(p,b)                                                             \
... ...
@@ -894,8 +898,8 @@ static int ac_findmatch_branch(const unsigned char *buffer, uint32_t offset, uin
894 894
         match = 0;                                                                      \
895 895
     }
896 896
 
897
-/* call only by ac_findmatch_branch! */
898
-#define AC_MATCH_CHAR(p,b)                                                              \
897
+/* call only by ac_XX_match_branch! */
898
+#define AC_MATCH_CHAR(p,b,rev)                                                          \
899 899
     switch(wc = p & CLI_MATCH_METADATA) {                                               \
900 900
     case CLI_MATCH_CHAR:                                                                \
901 901
         if((unsigned char) p != b)                                                      \
... ...
@@ -913,10 +917,17 @@ static int ac_findmatch_branch(const unsigned char *buffer, uint32_t offset, uin
913 913
     case CLI_MATCH_SPECIAL:                                                             \
914 914
         /* >1 = movement, 0 = fail, <1 = resolved in branch */                          \
915 915
         if((match = ac_findmatch_special(buffer, offset, bp, fileoffset, length,        \
916
-                                         pattern, i, specialcnt, end)) <= 0)            \
916
+                                        pattern, i, specialcnt, start, end, rev)) <= 0) \
917 917
             return match;                                                               \
918
-        bp += match - 1; /* -1 is for bp++ in parent loop */                            \
919
-        specialcnt++;                                                                   \
918
+                                                                                        \
919
+        if (!rev) {                                                                     \
920
+            bp += (match - 1); /* -1 is for bp++ in parent loop */                      \
921
+            specialcnt++;                                                               \
922
+        } else {                                                                        \
923
+            bp = bp + 1 - match; /* +1 is for bp-- in parent loop */                    \
924
+            specialcnt--;                                                               \
925
+        }                                                                               \
926
+                                                                                        \
920 927
         break;                                                                          \
921 928
                                                                                         \
922 929
     case CLI_MATCH_NIBBLE_HIGH:                                                         \
... ...
@@ -936,11 +947,13 @@ static int ac_findmatch_branch(const unsigned char *buffer, uint32_t offset, uin
936 936
 
937 937
 
938 938
 /* special handler */
939
-inline static int ac_findmatch_special(const unsigned char *buffer, uint32_t offset, uint32_t bp, uint32_t fileoffset, uint32_t length, const struct cli_ac_patt *pattern, uint32_t pattoffset, uint16_t specialcnt, uint32_t *end)
939
+inline static int ac_findmatch_special(const unsigned char *buffer, uint32_t offset, uint32_t bp, uint32_t fileoffset, uint32_t length,
940
+                                       const struct cli_ac_patt *pattern, uint32_t pp, uint16_t specialcnt, uint32_t *start, uint32_t *end, int rev)
940 941
 {
941 942
     int match, cmp;
942 943
     uint16_t j, b = buffer[bp];
943 944
     uint16_t wc;
945
+    uint32_t subbp;
944 946
     struct cli_ac_special *special = pattern->special_table[specialcnt];
945 947
     struct cli_alt_node *alt = NULL;
946 948
 
... ...
@@ -959,12 +972,19 @@ inline static int ac_findmatch_special(const unsigned char *buffer, uint32_t off
959 959
         break;
960 960
 
961 961
     case AC_SPECIAL_ALT_STR_FIXED: /* fixed length multi-byte */
962
-        if (bp + special->len[0] > length)
963
-            break;
962
+        if (!rev) {
963
+            if (bp + special->len[0] > length)
964
+                break;
965
+            subbp = bp;
966
+        } else {
967
+            if (bp < (special->len[0] - 1))
968
+                break;
969
+            subbp = bp - (special->len[0] - 1);
970
+        }
964 971
 
965 972
         match *= special->len[0];
966 973
         for (j = 0; j < special->num; j++) {
967
-            cmp = memcmp(&buffer[bp], (special->alt).f_str[j], special->len[0]);
974
+            cmp = memcmp(&buffer[subbp], (special->alt).f_str[j], special->len[0]);
968 975
             if (cmp == 0) {
969 976
                 match = (!special->negative) * special->len[0];
970 977
                 break;
... ...
@@ -976,15 +996,24 @@ inline static int ac_findmatch_special(const unsigned char *buffer, uint32_t off
976 976
     case AC_SPECIAL_ALT_STR: /* generic */
977 977
         alt = (special->alt).v_str;
978 978
         while (alt) {
979
-            if (bp + alt->len > length) {
980
-                alt = alt->next;
981
-                continue;
979
+            if (!rev) {
980
+                if (bp + alt->len > length) {
981
+                    alt = alt->next;
982
+                    continue;
983
+                }
984
+                subbp = bp;
985
+            } else {
986
+                if (bp < (alt->len - 1)) {
987
+                    alt = alt->next;
988
+                    continue;
989
+                }
990
+                subbp = bp - (alt->len - 1);
982 991
             }
983 992
 
984 993
             /* note that generic alternates CANNOT be negated */
985 994
             match = 1;
986 995
             for (j = 0; j < alt->len; j++) {
987
-                AC_MATCH_CHAR2(alt->str[j],buffer[bp+j]);
996
+                AC_MATCH_CHAR2(alt->str[j],buffer[subbp+j]);
988 997
                 if (!match)
989 998
                     break;
990 999
             }
... ...
@@ -995,7 +1024,10 @@ inline static int ac_findmatch_special(const unsigned char *buffer, uint32_t off
995 995
                     break;
996 996
                 }
997 997
                 /* branch for backtracking */
998
-                match = ac_findmatch_branch(buffer, offset, bp+alt->len, fileoffset, length, pattern, pattoffset+1, specialcnt+1, end);
998
+                if (!rev)
999
+                    match = ac_forward_match_branch(buffer, subbp+alt->len, offset, fileoffset, length, pattern, pp+1, specialcnt+1, start, end);
1000
+                else
1001
+                    match = ac_backward_match_branch(buffer, subbp-1, offset, fileoffset, length, pattern, pp-1, specialcnt-1, start, end);
999 1002
                 if (match)
1000 1003
                     return -1; /* alerts caller that match has been resolved in child callee */
1001 1004
             }
... ...
@@ -1030,73 +1062,132 @@ inline static int ac_findmatch_special(const unsigned char *buffer, uint32_t off
1030 1030
 }
1031 1031
 
1032 1032
 /* state should reset on call, recursion depth = number of alternate specials */
1033
-static int ac_findmatch_branch(const unsigned char *buffer, uint32_t offset, uint32_t bp, uint32_t fileoffset, uint32_t length, const struct cli_ac_patt *pattern, uint32_t pattoffset, uint16_t specialcnt, uint32_t *end)
1033
+/* each loop iteration starts on the NEXT sequence to be validated */
1034
+static int ac_backward_match_branch(const unsigned char *buffer, uint32_t bp, uint32_t offset, uint32_t fileoffset, uint32_t length,
1035
+                                    const struct cli_ac_patt *pattern, uint32_t pp, uint16_t specialcnt, uint32_t *start, uint32_t *end)
1034 1036
 {
1035 1037
     int match;
1036 1038
     uint16_t wc, i;
1039
+    uint32_t filestart;
1037 1040
 
1038
-    match = 1;
1039
-    for(i = pattoffset; i < pattern->length[0] && bp < length; i++) {
1040
-        AC_MATCH_CHAR(pattern->pattern[i],buffer[bp]);
1041
-        if (!match)
1042
-            return 0;
1041
+    /* backwards (prefix) validation, determines start */
1042
+    if(pattern->prefix) {
1043
+        match = 1;
1043 1044
 
1044
-        bp++;
1045
+        for (i = pp; 1; i--) {
1046
+            AC_MATCH_CHAR(pattern->prefix[i],buffer[bp],1);
1047
+            if(!match)
1048
+                return 0;
1049
+
1050
+            /* needs to perform check before decrement due to unsignedness */
1051
+            if (i == 0 || bp == 0)
1052
+                break;
1053
+
1054
+            bp--;
1055
+        }
1056
+
1057
+        *start = bp;
1058
+        filestart = fileoffset - offset + bp;
1059
+    } else {
1060
+        /* bp is set to buffer offset */
1061
+        *start = bp = offset;
1062
+        filestart = fileoffset;
1045 1063
     }
1046
-    *end = bp;
1047 1064
 
1048
-    /* special boundary checks */
1065
+    /* left-side special checks, bp = start */
1049 1066
     if(pattern->boundary & AC_BOUNDARY_LEFT) {
1050 1067
         match = !!(pattern->boundary & AC_BOUNDARY_LEFT_NEGATIVE);
1051
-        if(!fileoffset || (offset && (boundary[buffer[offset - 1]] == 1 || boundary[buffer[offset - 1]] == 3)))
1068
+        if(!filestart || (bp && (boundary[buffer[bp - 1]] == 1 || boundary[buffer[bp - 1]] == 3)))
1052 1069
             match = !match;
1053 1070
 
1054 1071
         if(!match)
1055 1072
             return 0;
1056 1073
     }
1057 1074
 
1058
-    if(pattern->boundary & AC_BOUNDARY_RIGHT) {
1059
-        match = !!(pattern->boundary & AC_BOUNDARY_RIGHT_NEGATIVE);
1060
-        if((length <= SCANBUFF) && (bp == length || boundary[buffer[bp]] >= 2))
1075
+    if(pattern->boundary & AC_LINE_MARKER_LEFT) {
1076
+        match = !!(pattern->boundary & AC_LINE_MARKER_LEFT_NEGATIVE);
1077
+        if(!filestart || (bp && (buffer[bp - 1] == '\n')))
1061 1078
             match = !match;
1062 1079
 
1063 1080
         if(!match)
1064 1081
             return 0;
1065 1082
     }
1066 1083
 
1067
-    if(pattern->boundary & AC_LINE_MARKER_LEFT) {
1068
-        match = !!(pattern->boundary & AC_LINE_MARKER_LEFT_NEGATIVE);
1069
-        if(!fileoffset || (offset && (buffer[offset - 1] == '\n')))
1084
+
1085
+    if(pattern->boundary & AC_WORD_MARKER_LEFT) {
1086
+        match = !!(pattern->boundary & AC_WORD_MARKER_LEFT_NEGATIVE);
1087
+        if(!filestart)
1088
+            match = !match;
1089
+        else if(pattern->sigopts & ACPATT_OPTION_WIDE) {
1090
+            if(filestart-1 == 0)
1091
+                match = !match;
1092
+            if(bp - 1 && bp && !(isalnum(buffer[bp - 2]) && buffer[bp - 1] == '\0'))
1093
+                match = !match;
1094
+        }
1095
+        else if(bp && !isalnum(buffer[bp - 1]))
1070 1096
             match = !match;
1071 1097
 
1072 1098
         if(!match)
1073 1099
             return 0;
1074 1100
     }
1075 1101
 
1076
-    if(pattern->boundary & AC_LINE_MARKER_RIGHT) {
1077
-        match = !!(pattern->boundary & AC_LINE_MARKER_RIGHT_NEGATIVE);
1078
-        if((length <= SCANBUFF) && (bp == length || buffer[bp] == '\n' || (buffer[bp] == '\r' && bp + 1 < length && buffer[bp + 1] == '\n')))
1079
-            match = !match;
1102
+    /* bp is shifted for left anchor check, thus invalidated as pattern start */
1103
+    if(!(pattern->ch[0] & CLI_MATCH_IGNORE)) {
1104
+        if(pattern->ch_mindist[0] + (uint32_t) 1 > bp)
1105
+            return 0;
1106
+
1107
+        bp -= pattern->ch_mindist[0] + 1;
1108
+        for(i = pattern->ch_mindist[0]; i <= pattern->ch_maxdist[0]; i++) {
1109
+            match = 1;
1110
+            AC_MATCH_CHAR(pattern->ch[0],buffer[bp],1);
1111
+            if(match)
1112
+                break;
1080 1113
 
1114
+            if(!bp)
1115
+                return 0;
1116
+            else
1117
+                bp--;
1118
+        }
1081 1119
         if(!match)
1082 1120
             return 0;
1083 1121
     }
1084 1122
 
1085
-    if(pattern->boundary & AC_WORD_MARKER_LEFT) {
1086
-        match = !!(pattern->boundary & AC_WORD_MARKER_LEFT_NEGATIVE);
1087
-        /* absolute beginning of file */
1088
-        if(!fileoffset)
1123
+    return 1;
1124
+}
1125
+
1126
+/* state should reset on call, recursion depth = number of alternate specials */
1127
+/* each loop iteration starts on the NEXT sequence to validate */
1128
+static int ac_forward_match_branch(const unsigned char *buffer, uint32_t bp, uint32_t offset, uint32_t fileoffset, uint32_t length,
1129
+                                   const struct cli_ac_patt *pattern, uint32_t pp, uint16_t specialcnt, uint32_t *start, uint32_t *end)
1130
+{
1131
+    int match;
1132
+    uint16_t wc, i;
1133
+
1134
+    match = 1;
1135
+
1136
+    /* forward (pattern) validation; determines end */
1137
+    for(i = pp; i < pattern->length[0] && bp < length; i++) {
1138
+        AC_MATCH_CHAR(pattern->pattern[i],buffer[bp],0);
1139
+        if (!match)
1140
+            return 0;
1141
+
1142
+        bp++;
1143
+    }
1144
+    *end = bp;
1145
+
1146
+    /* right-side special checks, bp = end */
1147
+    if(pattern->boundary & AC_BOUNDARY_RIGHT) {
1148
+        match = !!(pattern->boundary & AC_BOUNDARY_RIGHT_NEGATIVE);
1149
+        if((length <= SCANBUFF) && (bp == length || boundary[buffer[bp]] >= 2))
1089 1150
             match = !match;
1090
-        /* 'wide' characters need a 'wider' check */
1091
-        else if(pattern->sigopts & ACPATT_OPTION_WIDE) {
1092
-            /* beginning of file has only one preceding character */
1093
-            if(fileoffset-1 == 0)
1094
-                match = !match;
1095
-            if(offset - 1 && offset && !(isalnum(buffer[offset - 2]) && buffer[offset - 1] == '\0'))
1096
-                match = !match;
1097
-        }
1098
-        /* 'normal' characters */
1099
-        else if(offset && !isalnum(buffer[offset - 1]))
1151
+
1152
+        if(!match)
1153
+            return 0;
1154
+    }
1155
+
1156
+    if(pattern->boundary & AC_LINE_MARKER_RIGHT) {
1157
+        match = !!(pattern->boundary & AC_LINE_MARKER_RIGHT_NEGATIVE);
1158
+        if((length <= SCANBUFF) && (bp == length || buffer[bp] == '\n' || (buffer[bp] == '\r' && bp + 1 < length && buffer[bp + 1] == '\n')))
1100 1159
             match = !match;
1101 1160
 
1102 1161
         if(!match)
... ...
@@ -1106,15 +1197,12 @@ static int ac_findmatch_branch(const unsigned char *buffer, uint32_t offset, uin
1106 1106
     if(pattern->boundary & AC_WORD_MARKER_RIGHT) {
1107 1107
         match = !!(pattern->boundary & AC_WORD_MARKER_RIGHT_NEGATIVE);
1108 1108
         if(length <= SCANBUFF) {
1109
-            /* absolute end of file */
1110 1109
             if(bp == length)
1111 1110
                 match = !match;
1112
-            /* 'wide' characters need a 'wider' check */
1113 1111
             else if((pattern->sigopts & ACPATT_OPTION_WIDE) && (bp+1 < length)) {
1114 1112
                 if(!(isalnum(buffer[bp]) && buffer[bp + 1] == '\0'))
1115 1113
                     match = !match;
1116 1114
             }
1117
-            /* 'normal' characters */
1118 1115
             else if(!isalnum(buffer[bp]))
1119 1116
                 match = !match;
1120 1117
         }
... ...
@@ -1123,7 +1211,7 @@ static int ac_findmatch_branch(const unsigned char *buffer, uint32_t offset, uin
1123 1123
             return 0;
1124 1124
     }
1125 1125
 
1126
-    /* single-byte anchors */
1126
+    /* bp is shifted for right anchor check, thus invalidated as pattern right-side */
1127 1127
     if(!(pattern->ch[1] & CLI_MATCH_IGNORE)) {
1128 1128
         bp += pattern->ch_mindist[1];
1129 1129
 
... ...
@@ -1132,7 +1220,7 @@ static int ac_findmatch_branch(const unsigned char *buffer, uint32_t offset, uin
1132 1132
                 return 0;
1133 1133
 
1134 1134
             match = 1;
1135
-            AC_MATCH_CHAR(pattern->ch[1],buffer[bp]);
1135
+            AC_MATCH_CHAR(pattern->ch[1],buffer[bp],0);
1136 1136
             if(match)
1137 1137
                 break;
1138 1138
 
... ...
@@ -1143,53 +1231,19 @@ static int ac_findmatch_branch(const unsigned char *buffer, uint32_t offset, uin
1143 1143
             return 0;
1144 1144
     }
1145 1145
 
1146
-    if(pattern->prefix) {
1147
-        specialcnt = 0;
1148
-        bp = offset - pattern->prefix_length[1];
1149
-        match = 1;
1150
-
1151
-        for(i = 0; i < pattern->prefix_length[0]; i++) {
1152
-            AC_MATCH_CHAR(pattern->prefix[i],buffer[bp]);
1153
-            if(!match)
1154
-                return 0;
1155
-
1156
-            bp++;
1157
-        }
1158
-    }
1159
-
1160
-    if(!(pattern->ch[0] & CLI_MATCH_IGNORE)) {
1161
-        bp = offset - pattern->prefix_length[1];
1162
-        if(pattern->ch_mindist[0] + (uint32_t) 1 > bp)
1163
-            return 0;
1164
-
1165
-        bp -= pattern->ch_mindist[0] + 1;
1166
-        for(i = pattern->ch_mindist[0]; i <= pattern->ch_maxdist[0]; i++) {
1167
-            match = 1;
1168
-            AC_MATCH_CHAR(pattern->ch[0],buffer[bp]);
1169
-            if(match)
1170
-                break;
1171
-
1172
-            if(!bp)
1173
-                return 0;
1174
-            else
1175
-                bp--;
1176
-        }
1177
-        if(!match)
1178
-            return 0;
1179
-    }
1180
-
1181
-    return 1;
1146
+    return ac_backward_match_branch(buffer, offset-1, offset, fileoffset, length, pattern, pattern->prefix_length[0]-1, pattern->special_pattern-1, start, end);
1182 1147
 }
1183 1148
 
1184
-inline static int ac_findmatch(const unsigned char *buffer, uint32_t offset, uint32_t fileoffset, uint32_t length, const struct cli_ac_patt *pattern, uint32_t *end)
1149
+inline static int ac_findmatch(const unsigned char *buffer, uint32_t offset, uint32_t fileoffset, uint32_t length, const struct cli_ac_patt *pattern, uint32_t *start, uint32_t *end)
1185 1150
 {
1186 1151
     int match;
1187 1152
     uint16_t specialcnt = pattern->special_pattern;
1188 1153
 
1154
+    /* minimal check as the maximum variable length may exceed the buffer */
1189 1155
     if((offset + pattern->length[1] > length) || (pattern->prefix_length[1] > offset))
1190 1156
         return 0;
1191 1157
 
1192
-    match = ac_findmatch_branch(buffer, offset, offset+pattern->depth, fileoffset, length, pattern, pattern->depth, specialcnt, end);
1158
+    match = ac_forward_match_branch(buffer, offset+pattern->depth, offset, fileoffset, length, pattern, pattern->depth, specialcnt, start, end);
1193 1159
     if(match)
1194 1160
         return 1;
1195 1161
     return 0;
... ...
@@ -1574,7 +1628,7 @@ int cli_ac_scanbuff(const unsigned char *buffer, uint32_t length, const char **v
1574 1574
     struct cli_ac_node *current;
1575 1575
     struct cli_ac_list *pattN, *ptN;
1576 1576
     struct cli_ac_patt *patt, *pt;
1577
-    uint32_t i, bp, realoff, matchend;
1577
+    uint32_t i, bp, exptoff[2], realoff, matchstart, matchend;
1578 1578
     uint16_t j;
1579 1579
     uint8_t found, viruses_found = 0;
1580 1580
     int32_t **offmatrix, swp;
... ...
@@ -1611,14 +1665,15 @@ int cli_ac_scanbuff(const unsigned char *buffer, uint32_t length, const char **v
1611 1611
                         pattN = pattN->next;
1612 1612
                         continue;
1613 1613
                     }
1614
-                    realoff = offset + bp - patt->prefix_length[1];
1614
+                    exptoff[0] = offset + bp - patt->prefix_length[2]; /* lower offset end */
1615
+                    exptoff[1] = offset + bp - patt->prefix_length[1]; /* higher offset end */
1615 1616
                     if(patt->offdata[0] == CLI_OFF_ABSOLUTE) {
1616
-                        if(patt->offset_max < realoff || patt->offset_min > realoff) {
1617
+                        if(patt->offset_max < exptoff[0] || patt->offset_min > exptoff[1]) {
1617 1618
                             pattN = pattN->next;
1618 1619
                             continue;
1619 1620
                         }
1620 1621
                     } else {
1621
-                        if(mdata->offset[patt->offset_min] == CLI_OFF_NONE || mdata->offset[patt->offset_max] < realoff || mdata->offset[patt->offset_min] > realoff) {
1622
+                        if(mdata->offset[patt->offset_min] == CLI_OFF_NONE || mdata->offset[patt->offset_max] < exptoff[0] || mdata->offset[patt->offset_min] > exptoff[1]) {
1622 1623
                             pattN = pattN->next;
1623 1624
                             continue;
1624 1625
                         }
... ...
@@ -1626,7 +1681,7 @@ int cli_ac_scanbuff(const unsigned char *buffer, uint32_t length, const char **v
1626 1626
                 }
1627 1627
 
1628 1628
                 ptN = pattN;
1629
-                if(ac_findmatch(buffer, bp, offset + bp - patt->prefix_length[1], length, patt, &matchend)) {
1629
+                if(ac_findmatch(buffer, bp, offset + bp, length, patt, &matchstart, &matchend)) {
1630 1630
                     while(ptN) {
1631 1631
                         pt = ptN->me;
1632 1632
                         if(pt->partno > mdata->min_partno)
... ...
@@ -1637,7 +1692,7 @@ int cli_ac_scanbuff(const unsigned char *buffer, uint32_t length, const char **v
1637 1637
                             continue;
1638 1638
                         }
1639 1639
 
1640
-                        realoff = offset + bp - pt->prefix_length[1];
1640
+                        realoff = offset + matchstart;
1641 1641
                         if(pt->offdata[0] == CLI_OFF_VERSION) {
1642 1642
                             if(!cli_hashset_contains_maybe_noalloc(mdata->vinfo, realoff)) {
1643 1643
                                 ptN = ptN->next_same;