Browse code

integrated sigopts (modifiers) into pcre subsigs using '+' re-factored sigopts_handler() code TODO: wide support for regex strings

Kevin Lin authored on 2015/03/10 06:17:15
Showing 1 changed files
... ...
@@ -117,122 +117,171 @@ char *cli_virname(const char *virname, unsigned int official)
117 117
 
118 118
 static int sigopts_handler(struct cli_matcher *root, const char *virname, const char *hexsig, uint8_t sigopts, uint16_t rtype, uint16_t type, const char *offset, uint8_t target, const uint32_t *lsigid, unsigned int options)
119 119
 {
120
-    char *hexcpy;
120
+    char *hexcpy, *start, *end;
121 121
     int i, ret = CL_SUCCESS;
122 122
 
123 123
     /* prevent cyclic loops with cli_parse_add on same hexsig
124 124
      * cyclic loops should be impossible though
125 125
      */
126
-    if (!(sigopts & ACPATT_OPTION_ONCE)) {
127
-        hexcpy = cli_strdup(hexsig);
128
-        if (!hexcpy)
129
-            return CL_EMEM;
126
+    if (sigopts & ACPATT_OPTION_ONCE) {
127
+        cli_errmsg("sigopts_handler: invalidly called multiple times!\n");
128
+        return CL_EPARSE;
129
+    }
130
+
131
+    hexcpy = cli_strdup(hexsig);
132
+    if (!hexcpy)
133
+        return CL_EMEM;
134
+
135
+    sigopts |= ACPATT_OPTION_ONCE;
130 136
 
131
-        sigopts |= ACPATT_OPTION_ONCE;
137
+    /* REGEX testing and sigopt handling */
138
+    start = strchr(hexcpy, '/');
139
+    end = strrchr(hexcpy, '/');
132 140
 
133
-        /* FULLWORD sigopt handling - only happens once */
141
+    if (start != end) {
142
+        /* FULLWORD regex sigopt handling */
134 143
         if (sigopts & ACPATT_OPTION_FULLWORD) {
135
-            char *rechar;
136
-            char *hexovr = cli_calloc(strlen(hexcpy)+7, sizeof(char));
144
+            size_t ovrlen = strlen(hexcpy)+5;
145
+            char *hexovr = cli_calloc(ovrlen, sizeof(char));
137 146
             if (!hexovr) {
138 147
                 free(hexcpy);
139 148
                 return CL_EMEM;
140 149
             }
141 150
 
142
-            snprintf(hexovr, strlen(hexcpy)+7, "(W)%s(W)", hexcpy);
143
-
144
-            /* change the '[' and ']' to '{' and '}' since there are now two bytes */
145
-            rechar = hexovr;
146
-            while ((rechar = strchr(rechar, '['))) { //TEST TODO
147
-                *rechar = '{';
151
+            *start++ = '\0';
152
+            *end++ = '\0';
148 153
 
149
-                if (!(rechar = strchr(rechar, ']'))) {
150
-                    cli_errmsg("cli_parse_add: unmatched '[' in signature %s\n", virname);
151
-                    free(hexcpy);
152
-                    free(hexovr);
153
-                    return CL_EMALFDB;
154
-                }
155
-                *rechar = '}';
156
-            }
154
+            snprintf(hexovr, ovrlen, "%s/\\W%s\\W/%s", hexcpy, start, end);
157 155
 
158 156
             free(hexcpy);
159 157
             hexcpy = hexovr;
160 158
         }
161
-
162
-        /* WIDE sigopt handling - only happens once (after fullword)
163
-         * TODO - consider handling in cli_ac_addpatt? (two pattern possibility)
164
-         */
165
-        if (sigopts & ACPATT_OPTION_WIDE) {
166
-            size_t ovrlen = 2*strlen(hexcpy)+1;
159
+        /* NOCASE sigopt is passed onto the regex-opt handler */
160
+        if (sigopts & ACPATT_OPTION_NOCASE) {
161
+            size_t ovrlen = strlen(hexcpy)+2;
167 162
             char *hexovr = cli_calloc(ovrlen, sizeof(char));
168 163
             if (!hexovr) {
169 164
                 free(hexcpy);
170 165
                 return CL_EMEM;
171 166
             }
172 167
 
173
-            /* clamav-specific wildcards need to be handled here! */
174
-            for (i = 0; i < strlen(hexcpy); ++i) {
175
-                size_t len = strlen(hexovr);
168
+            snprintf(hexovr, ovrlen, "%si", hexcpy);
176 169
 
177
-                if (hexcpy[i] == '*' || hexcpy[i] == '|' || hexcpy[i] == ')') {
178
-                    hexovr[len] = hexcpy[i];
179
-                } else if (hexcpy[i] == '[') {
180
-                    /* change the '[' and ']' to '{' and '}' since there are now two bytes */
181
-                    hexovr[len++] = '{';
182
-                    ++i;
183
-                    while (i < strlen(hexcpy) && hexcpy[i] != ']')
184
-                        hexovr[len++] = hexcpy[i++];
185
-
186
-                    hexovr[len] = '}';
187
-                } else if (hexcpy[i] == '{') {
188
-                    while (i < strlen(hexcpy) && hexcpy[i] != '}')
189
-                        hexovr[len++] = hexcpy[i++];
190
-
191
-                    hexovr[len] = '}';
192
-                } else if (hexcpy[i] == '!' || hexcpy[i] == '(') {
193
-                    if (hexcpy[i] == '!')
194
-                        hexovr[len++] = hexcpy[i++];
195
-
196
-                    /* copies '(' */
197
-                    hexovr[len] = hexcpy[i];
170
+            free(hexcpy);
171
+            hexcpy = hexovr;
172
+        }
173
+        /* WIDE sigopt is unsupported */
174
+        if (sigopts & ACPATT_OPTION_WIDE) {
175
+            cli_errmsg("cli_parse_add: wide modifier [w] is not supported for regex subsigs\n");
176
+            return CL_EMALFDB;
177
+        }
198 178
 
199
-                    if (hexcpy[i+1] == 'B' || hexcpy[i+1] == 'L' || hexcpy[i+1] == 'W') {
200
-                        ++len; ++i;
201
-                        hexovr[len++] = hexcpy[i++];
202
-                        if (hexcpy[i] != ')') {
203
-                            free(hexcpy);
204
-                            free(hexovr);
205
-                            return CL_EMALFDB;
206
-                        }
207
-                        hexovr[len] = hexcpy[i];
208
-                    }
209
-                } else {
210
-                    //snprintf(hexovr+len, ovrlen-len, "%02x%c%c", 0, hexcpy[i], hexcpy[i+1]);
211
-                    snprintf(hexovr+len, ovrlen-len, "%c%c%02x", hexcpy[i], hexcpy[i+1], 0);
212
-                    ++i;
213
-                }
214
-            }
179
+        ret = cli_parse_add(root, virname, hexcpy, sigopts, rtype, type, offset, target, lsigid, options);
180
+        free(hexcpy);
181
+        return ret;
182
+    }
183
+
184
+    /* NORMAL HEXSIG sigopt handling */
185
+    /* FULLWORD sigopt handling - only happens once */
186
+    if (sigopts & ACPATT_OPTION_FULLWORD) {
187
+        char *rechar;
188
+        size_t ovrlen = strlen(hexcpy)+7;
189
+        char *hexovr = cli_calloc(ovrlen, sizeof(char));
190
+        if (!hexovr) {
191
+            free(hexcpy);
192
+            return CL_EMEM;
193
+        }
194
+
195
+        snprintf(hexovr, ovrlen, "(W)%s(W)", hexcpy);
215 196
 
216
-            /* NOCASE sigopt is handled in cli_ac_addsig */
217
-            ret = cli_parse_add(root, virname, hexovr, sigopts, rtype, type, offset, target, lsigid, options);
218
-            free(hexovr);
219
-            if (ret != CL_SUCCESS || !(sigopts & ACPATT_OPTION_ASCII)) {
197
+        /* change the '[' and ']' to '{' and '}' since there are now two bytes */
198
+        rechar = hexovr;
199
+        while ((rechar = strchr(rechar, '['))) { //TEST TODO
200
+            *rechar = '{';
201
+
202
+            if (!(rechar = strchr(rechar, ']'))) {
203
+                cli_errmsg("cli_parse_add: unmatched '[' in signature %s\n", virname);
220 204
                 free(hexcpy);
221
-                return ret;
222
-            } else {
223
-                /* disable wide sigopt for ascii variant */
224
-                sigopts &= ~ACPATT_OPTION_WIDE;
205
+                free(hexovr);
206
+                return CL_EMALFDB;
225 207
             }
208
+            *rechar = '}';
226 209
         }
227 210
 
228
-        /* ASCII sigopt; NOCASE sigopt is handled in cli_ac_addsig */
229
-        ret = cli_parse_add(root, virname, hexcpy, sigopts, rtype, type, offset, target, lsigid, options);
230 211
         free(hexcpy);
231
-        return ret;
212
+        hexcpy = hexovr;
232 213
     }
233 214
 
234
-    cli_errmsg("sigopts_handler: invalidly called multiple times!\n");
235
-    return CL_EPARSE;
215
+    /* WIDE sigopt handling - only happens once (after fullword)
216
+     * TODO - consider handling in cli_ac_addpatt? (two pattern possibility)
217
+     */
218
+    if (sigopts & ACPATT_OPTION_WIDE) {
219
+        size_t ovrlen = 2*strlen(hexcpy)+1;
220
+        char *hexovr = cli_calloc(ovrlen, sizeof(char));
221
+        if (!hexovr) {
222
+            free(hexcpy);
223
+            return CL_EMEM;
224
+        }
225
+
226
+        /* clamav-specific wildcards need to be handled here! */
227
+        for (i = 0; i < strlen(hexcpy); ++i) {
228
+            size_t len = strlen(hexovr);
229
+
230
+            if (hexcpy[i] == '*' || hexcpy[i] == '|' || hexcpy[i] == ')') {
231
+                hexovr[len] = hexcpy[i];
232
+            } else if (hexcpy[i] == '[') {
233
+                /* change the '[' and ']' to '{' and '}' since there are now two bytes */
234
+                hexovr[len++] = '{';
235
+                ++i;
236
+                while (i < strlen(hexcpy) && hexcpy[i] != ']')
237
+                    hexovr[len++] = hexcpy[i++];
238
+
239
+                hexovr[len] = '}';
240
+            } else if (hexcpy[i] == '{') {
241
+                while (i < strlen(hexcpy) && hexcpy[i] != '}')
242
+                    hexovr[len++] = hexcpy[i++];
243
+
244
+                hexovr[len] = '}';
245
+            } else if (hexcpy[i] == '!' || hexcpy[i] == '(') {
246
+                if (hexcpy[i] == '!')
247
+                    hexovr[len++] = hexcpy[i++];
248
+
249
+                /* copies '(' */
250
+                hexovr[len] = hexcpy[i];
251
+
252
+                if (hexcpy[i+1] == 'B' || hexcpy[i+1] == 'L' || hexcpy[i+1] == 'W') {
253
+                    ++len; ++i;
254
+                    hexovr[len++] = hexcpy[i++];
255
+                    if (hexcpy[i] != ')') {
256
+                        free(hexcpy);
257
+                        free(hexovr);
258
+                        return CL_EMALFDB;
259
+                    }
260
+                    hexovr[len] = hexcpy[i];
261
+                }
262
+            } else {
263
+                //snprintf(hexovr+len, ovrlen-len, "%02x%c%c", 0, hexcpy[i], hexcpy[i+1]);
264
+                snprintf(hexovr+len, ovrlen-len, "%c%c%02x", hexcpy[i], hexcpy[i+1], 0);
265
+                ++i;
266
+            }
267
+        }
268
+
269
+        /* NOCASE sigopt is handled in cli_ac_addsig */
270
+        ret = cli_parse_add(root, virname, hexovr, sigopts, rtype, type, offset, target, lsigid, options);
271
+        free(hexovr);
272
+        if (ret != CL_SUCCESS || !(sigopts & ACPATT_OPTION_ASCII)) {
273
+            free(hexcpy);
274
+            return ret;
275
+        } else {
276
+            /* disable wide sigopt for ascii variant */
277
+            sigopts &= ~ACPATT_OPTION_WIDE;
278
+        }
279
+    }
280
+
281
+    /* ASCII sigopt; NOCASE sigopt is handled in cli_ac_addsig */
282
+    ret = cli_parse_add(root, virname, hexcpy, sigopts, rtype, type, offset, target, lsigid, options);
283
+    free(hexcpy);
284
+    return ret;
236 285
 }
237 286
 
238 287
 #define PCRE_TOKENS 4
... ...
@@ -299,7 +348,7 @@ int cli_parse_add(struct cli_matcher *root, const char *virname, const char *hex
299 299
         return CL_SUCCESS;
300 300
     }
301 301
     if (strrchr(hexsig, '/')) {
302
-        char *start, *end;
302
+        char *start, *end, *sub;
303 303
 
304 304
         /* get copied */
305 305
         hexcpy = cli_strdup(hexsig);
... ...
@@ -310,8 +359,13 @@ int cli_parse_add(struct cli_matcher *root, const char *virname, const char *hex
310 310
         start = strchr(hexcpy, '/');
311 311
         end = strrchr(hexcpy, '/');
312 312
 
313
+        /* get plus-ed (modifiers) */
314
+        sub = strchr(end, '+');
315
+        if (sub && start == end)
316
+            *end = '\0';
317
+
313 318
         /* get pcre-ed */
314
-        if (start != end) {
319
+        if (!sub && start != end) {
315 320
 #if HAVE_PCRE
316 321
             /* expected format => ^offset:trigger/regex/[cflags]$ */
317 322
             const char *trigger, *pattern, *cflags;
... ...
@@ -344,11 +398,10 @@ int cli_parse_add(struct cli_matcher *root, const char *virname, const char *hex
344 344
 #endif
345 345
         } else { /* get option-ed */
346 346
             /* get NULL-ed */
347
-            char *opt = end+1;
347
+            char *opt = sub ? sub : end;
348 348
             uint8_t sigopts = 0;
349 349
 
350
-            *end = '\0';
351
-
350
+            *opt++ = '\0';
352 351
             while (*opt != '\0') {
353 352
                 switch (*opt) {
354 353
                 case 'i':