Browse code

Dramatically reduce complexity of parsing Yara hex signatures.

Shawn Webb authored on 2014/10/28 08:51:05
Showing 1 changed files
... ...
@@ -2647,15 +2647,12 @@ static int cli_loadopenioc(FILE *fs, const char *dbname, struct cl_engine *engin
2647 2647
 
2648 2648
 #ifndef _WIN32
2649 2649
 #define YARATARGET "Target:0;"
2650
-static char **parse_yara_hex_string(YR_STRING *string);
2650
+static char *parse_yara_hex_string(YR_STRING *string);
2651 2651
 
2652
-static char **parse_yara_hex_string(YR_STRING *string)
2652
+static char *parse_yara_hex_string(YR_STRING *string)
2653 2653
 {
2654
-    char **res=NULL, **tres;
2655
-    size_t nres=1, tnres, slen, *ssizes=NULL, *tssizes;
2656
-    char *str, *p1, *p2, *p3;
2657
-    size_t i, j;
2658
-    unsigned long m, n;
2654
+    char *res, *str;
2655
+    size_t slen, reslen=0, i, j;
2659 2656
 
2660 2657
     if (!(string) || !(string->string))
2661 2658
         return NULL;
... ...
@@ -2664,184 +2661,59 @@ static char **parse_yara_hex_string(YR_STRING *string)
2664 2664
         return NULL;
2665 2665
 
2666 2666
     str = (char *)(string->string);
2667
-    slen = strlen(str);
2668 2667
 
2669
-    /* First calculate how many strings we need and how long each string needs to be */
2670
-    p1 = strchr(str, '{')+1;
2671
-    while ((size_t)(p1-str) < slen-1) {
2672
-        switch (*p1) {
2668
+    if ((slen = strlen(str)) == 0)
2669
+        return NULL;
2670
+
2671
+    str = strchr(str, '{')+1;
2672
+
2673
+    for (i=0; i < slen-1; i++) {
2674
+        switch (str[i]) {
2673 2675
         case ' ':
2674
-        case '\n':
2675
-        case '\r':
2676 2676
         case '\t':
2677
-            break;
2678
-        case '?':
2677
+        case '\r':
2678
+        case '\n':
2679 2679
             break;
2680 2680
         case '[':
2681
-            /*
2682
-             * Jump instruction
2683
-             * Format: [m-n] where 0 <= m <= n
2684
-             * There can be arbitrary whitespace between each token
2685
-             * e.g.:
2686
-             *     [5-10]
2687
-             *     [ 5-10]
2688
-             *     [ 5 -10]
2689
-             *     [ 5 - 10]
2690
-             *     [ 5 - 10 ]
2691
-             *
2692
-             * Most of the following code is just sanity checking
2693
-             */
2694
-            p2 = p1+1;
2695
-            for (p2 = p1+1; (size_t)(p2 - str) < slen; p2++)
2696
-                if (*p2 == ']')
2697
-                    break;
2698
-
2699
-            if ((size_t)(p2 - str) == slen)
2700
-                goto err;
2701
-            if (*p2 != ']')
2702
-                break;
2703
-
2704
-            for (p3 = p1+1; p3 < p2; p3++)
2705
-                if (*p3 == '-')
2706
-                    break;
2707
-
2708
-            if (p3 >= p2-1) {
2709
-                /* We need at least a single digit between the - and the ] */
2710
-                goto err;
2711
-            }
2712
-
2713
-            m = strtoul(p1+1, &p3, 10);
2714
-            if (m == 0 && errno == ERANGE)
2715
-                goto err;
2716
-
2717
-            n = strtoul(p3+1, &p3, 10);
2718
-
2719
-            if (m > n)
2720
-                goto err;
2721
-
2722
-            if (n - m == 0) {
2723
-                for (i=0; i < nres; i++)
2724
-                    ssizes[nres]++; /* [n-n] behaves as a match-all wildcard (*) */
2725
-                p1 = p3;
2726
-                break;
2727
-            }
2728
-
2729
-            /* Now reallocate for the number of strings we need */
2730
-
2731
-            tnres = nres;
2732
-            nres += n - m;
2733
-
2734
-            tssizes = cli_realloc(ssizes, sizeof(size_t) * nres);
2735
-            if (!(tssizes))
2736
-                goto err;
2737
-
2738
-            ssizes = tssizes;
2739
-
2740
-            /* Bump up the sizes */
2741
-
2742
-            for (i=0; i < (n - m); i++)
2743
-                ssizes[tnres + i] = ssizes[i];
2744
-
2745
-            for (i=0; i <= (n - m); i++)
2746
-                ssizes[(tnres + i) - 1] += m + (i*2);
2747
-
2748
-            p1 = p3;
2681
+            /* ClamAV's Aho-Corasic algorithm requires at least two known bytes before {n,m} wildcard */
2682
+            if (reslen < 4)
2683
+                return NULL;
2684
+            reslen += 2;
2749 2685
             break;
2750 2686
         default:
2751
-            if ((*p1 >= 'a' && *p1 <= 'f') || (*p1 >= 'A' && *p1 <= 'F') || (*p1 >= '0' && *p1 <= '9')) {
2752
-                if (!(ssizes)) {
2753
-                    ssizes = cli_calloc(nres, sizeof(size_t));
2754
-                    if (!(ssizes))
2755
-                        goto err;
2756
-                }
2757
-
2758
-                for (i=0; i < nres; i++)
2759
-                    ssizes[i]++;
2760
-
2761
-                break;
2762
-            }
2763
-
2764
-            cli_errmsg("Incorrect character ('%c') in Yara hex string \"%s\"\n", *p1, str);
2765
-            goto err;
2687
+            reslen++;
2688
+            break;
2766 2689
         }
2767
-
2768
-        p1++;
2769 2690
     }
2770 2691
 
2771
-    /* Allocate the space needed for the strings */
2772
-    res = cli_calloc(nres+1, sizeof(char **)); /* +1 for terminating NULL */
2692
+    reslen++;
2693
+    res = cli_calloc(reslen, 1);
2773 2694
     if (!(res))
2774
-        goto err;
2775
-
2776
-    for (i=0; i<nres; i++) {
2777
-        res[i] = cli_calloc(ssizes[i]+1, 1);
2778
-        if (!(res[i]))
2779
-            goto err;
2780
-    }
2695
+        return NULL;
2781 2696
 
2782
-    /* Copy over the strings */
2783
-    tnres=1;
2784
-    p1 = strchr(str, '{')+1;
2785
-    while ((size_t)(p1 - str) < slen-1) {
2786
-        switch (*p1) {
2697
+    for (i=0, j=0; i < slen-1 && j < reslen; i++) {
2698
+        switch (str[i]) {
2787 2699
         case ' ':
2788
-        case '\n':
2789
-        case '\r':
2790 2700
         case '\t':
2701
+        case '\r':
2702
+        case '\n':
2703
+        case '}':
2791 2704
             break;
2792 2705
         case '[':
2793
-            p2 = p1+1;
2794
-            for (p2 = p1+1; (size_t)(p2 - str) < slen; p2++)
2795
-                if (*p2 == ']')
2796
-                    break;
2797
-
2798
-            for (p3 = p1+1; p3 < p2; p3++)
2799
-                if (*p3 == '-')
2800
-                    break;
2801
-
2802
-            m = strtoul(p1+1, &p3, 10);
2803
-            if (m == 0 && errno == ERANGE)
2804
-                goto err;
2805
-
2806
-            n = strtoul(p3+1, &p3, 10);
2807
-
2808
-            if (m > n)
2809
-                goto err;
2810
-
2811
-            if (n - m == 0) {
2812
-                for (i=0; i < nres; i++)
2813
-                    res[i][strlen(res[i])-1] = '*';
2814
-                p1 = p3;
2815
-                break;
2816
-            }
2817
-
2818
-            tnres += n - m;
2819
-            for (i=1; i <= tnres; i++) {
2820
-                for (j=0; j<i; j++)
2821
-                    sprintf(res[i-1]+strlen(res[i-1]), "??");
2822
-            }
2823
-
2824
-            p1=p3;
2706
+            res[j++] = '?';
2707
+            res[j++] = '?';
2708
+            res[j++] = '{';
2709
+            break;
2710
+        case ']':
2711
+            res[j++] = '}';
2825 2712
             break;
2826 2713
         default:
2827
-            for (i=0; i < nres; i++)
2828
-                res[i][strlen(res[i])] = *p1;
2714
+            res[j++] = str[i];
2829 2715
             break;
2830 2716
         }
2831
-
2832
-        p1++;
2833
-    }
2834
-
2835
-    cli_errmsg("Yara string \"%s\" has %zu substrings\n", str, nres);
2836
-    for (i = 0; i < nres; i++) {
2837
-        cli_errmsg("    substring[%zu] (%zu:%zu): \"%s\"\n", i, ssizes[i], strlen(res[i]), res[i]);
2838 2717
     }
2839 2718
 
2840
-    return NULL;
2841
-
2842
-err:
2843
-    /* TODO: Free all the things! */
2844
-    return NULL;
2719
+    return res;
2845 2720
 }
2846 2721
 
2847 2722
 static int cli_loadyara(FILE *fs, const char *dbname, struct cl_engine *engine, unsigned int options, struct cli_dbio *dbio)
... ...
@@ -2930,16 +2802,17 @@ static int cli_loadyara(FILE *fs, const char *dbname, struct cl_engine *engine,
2930 2930
             STAILQ_REMOVE(&rule->strings, string, _yc_string, link);
2931 2931
 
2932 2932
             if (STRING_IS_HEX(string)) {
2933
-                size_t len = strlen(string->string);
2934
-                size_t rulelen = strlen(rulestr);
2935
-                size_t j;
2936
-                cli_errmsg("Yara hex string: \"%s\"\n", string->string);
2937
-                for (j=0, i=0; i < len; i++) {
2938
-                    int ch = string->string[i];
2939
-                    if (isalnum(ch))
2940
-                        rulestr[rulelen+(j++)] = string->string[i];
2933
+                char *substr = parse_yara_hex_string(string);
2934
+                size_t len = strlen(rulestr);
2935
+
2936
+                substr = parse_yara_hex_string(string);
2937
+#if 1
2938
+                cli_errmsg("Yara hex string: \"%s\"\n", substr);
2939
+#endif
2940
+                if (substr) {
2941
+                    snprintf(rulestr+len, totsize-len, "%s", substr);
2942
+                    free(substr);
2941 2943
                 }
2942
-                rulestr[rulelen + j] = '\0';
2943 2944
             } else {
2944 2945
                 for (i=0; i < strlen(string->string); i++) {
2945 2946
                     size_t len = strlen(rulestr);
... ...
@@ -2958,7 +2831,9 @@ static int cli_loadyara(FILE *fs, const char *dbname, struct cl_engine *engine,
2958 2958
         if (rulestr[strlen(rulestr)-1] == ';')
2959 2959
             rulestr[strlen(rulestr)-1] = '\0';
2960 2960
 
2961
+#if 1
2961 2962
         printf("[+] computed ldb: \"%s\"\n", rulestr);
2963
+#endif
2962 2964
         ruledup = cli_malloc(strlen(rulestr)+1);
2963 2965
         if (!ruledup) {
2964 2966
             free(rulestr);