... | ... |
@@ -2647,15 +2647,12 @@ static int cli_loadopenioc(FILE *fs, const char *dbname, struct cl_engine *engin |
2647 | 2647 |
|
2648 | 2648 |
#ifndef _WIN32 |
2649 | 2649 |
#define YARATARGET "Target:0;" |
2650 |
-static char **parse_yara_hex_string(YR_STRING *string); |
|
2650 |
+static char *parse_yara_hex_string(YR_STRING *string); |
|
2651 | 2651 |
|
2652 |
-static char **parse_yara_hex_string(YR_STRING *string) |
|
2652 |
+static char *parse_yara_hex_string(YR_STRING *string) |
|
2653 | 2653 |
{ |
2654 |
- char **res=NULL, **tres; |
|
2655 |
- size_t nres=1, tnres, slen, *ssizes=NULL, *tssizes; |
|
2656 |
- char *str, *p1, *p2, *p3; |
|
2657 |
- size_t i, j; |
|
2658 |
- unsigned long m, n; |
|
2654 |
+ char *res, *str; |
|
2655 |
+ size_t slen, reslen=0, i, j; |
|
2659 | 2656 |
|
2660 | 2657 |
if (!(string) || !(string->string)) |
2661 | 2658 |
return NULL; |
... | ... |
@@ -2664,184 +2661,59 @@ static char **parse_yara_hex_string(YR_STRING *string) |
2664 | 2664 |
return NULL; |
2665 | 2665 |
|
2666 | 2666 |
str = (char *)(string->string); |
2667 |
- slen = strlen(str); |
|
2668 | 2667 |
|
2669 |
- /* First calculate how many strings we need and how long each string needs to be */ |
|
2670 |
- p1 = strchr(str, '{')+1; |
|
2671 |
- while ((size_t)(p1-str) < slen-1) { |
|
2672 |
- switch (*p1) { |
|
2668 |
+ if ((slen = strlen(str)) == 0) |
|
2669 |
+ return NULL; |
|
2670 |
+ |
|
2671 |
+ str = strchr(str, '{')+1; |
|
2672 |
+ |
|
2673 |
+ for (i=0; i < slen-1; i++) { |
|
2674 |
+ switch (str[i]) { |
|
2673 | 2675 |
case ' ': |
2674 |
- case '\n': |
|
2675 |
- case '\r': |
|
2676 | 2676 |
case '\t': |
2677 |
- break; |
|
2678 |
- case '?': |
|
2677 |
+ case '\r': |
|
2678 |
+ case '\n': |
|
2679 | 2679 |
break; |
2680 | 2680 |
case '[': |
2681 |
- /* |
|
2682 |
- * Jump instruction |
|
2683 |
- * Format: [m-n] where 0 <= m <= n |
|
2684 |
- * There can be arbitrary whitespace between each token |
|
2685 |
- * e.g.: |
|
2686 |
- * [5-10] |
|
2687 |
- * [ 5-10] |
|
2688 |
- * [ 5 -10] |
|
2689 |
- * [ 5 - 10] |
|
2690 |
- * [ 5 - 10 ] |
|
2691 |
- * |
|
2692 |
- * Most of the following code is just sanity checking |
|
2693 |
- */ |
|
2694 |
- p2 = p1+1; |
|
2695 |
- for (p2 = p1+1; (size_t)(p2 - str) < slen; p2++) |
|
2696 |
- if (*p2 == ']') |
|
2697 |
- break; |
|
2698 |
- |
|
2699 |
- if ((size_t)(p2 - str) == slen) |
|
2700 |
- goto err; |
|
2701 |
- if (*p2 != ']') |
|
2702 |
- break; |
|
2703 |
- |
|
2704 |
- for (p3 = p1+1; p3 < p2; p3++) |
|
2705 |
- if (*p3 == '-') |
|
2706 |
- break; |
|
2707 |
- |
|
2708 |
- if (p3 >= p2-1) { |
|
2709 |
- /* We need at least a single digit between the - and the ] */ |
|
2710 |
- goto err; |
|
2711 |
- } |
|
2712 |
- |
|
2713 |
- m = strtoul(p1+1, &p3, 10); |
|
2714 |
- if (m == 0 && errno == ERANGE) |
|
2715 |
- goto err; |
|
2716 |
- |
|
2717 |
- n = strtoul(p3+1, &p3, 10); |
|
2718 |
- |
|
2719 |
- if (m > n) |
|
2720 |
- goto err; |
|
2721 |
- |
|
2722 |
- if (n - m == 0) { |
|
2723 |
- for (i=0; i < nres; i++) |
|
2724 |
- ssizes[nres]++; /* [n-n] behaves as a match-all wildcard (*) */ |
|
2725 |
- p1 = p3; |
|
2726 |
- break; |
|
2727 |
- } |
|
2728 |
- |
|
2729 |
- /* Now reallocate for the number of strings we need */ |
|
2730 |
- |
|
2731 |
- tnres = nres; |
|
2732 |
- nres += n - m; |
|
2733 |
- |
|
2734 |
- tssizes = cli_realloc(ssizes, sizeof(size_t) * nres); |
|
2735 |
- if (!(tssizes)) |
|
2736 |
- goto err; |
|
2737 |
- |
|
2738 |
- ssizes = tssizes; |
|
2739 |
- |
|
2740 |
- /* Bump up the sizes */ |
|
2741 |
- |
|
2742 |
- for (i=0; i < (n - m); i++) |
|
2743 |
- ssizes[tnres + i] = ssizes[i]; |
|
2744 |
- |
|
2745 |
- for (i=0; i <= (n - m); i++) |
|
2746 |
- ssizes[(tnres + i) - 1] += m + (i*2); |
|
2747 |
- |
|
2748 |
- p1 = p3; |
|
2681 |
+ /* ClamAV's Aho-Corasic algorithm requires at least two known bytes before {n,m} wildcard */ |
|
2682 |
+ if (reslen < 4) |
|
2683 |
+ return NULL; |
|
2684 |
+ reslen += 2; |
|
2749 | 2685 |
break; |
2750 | 2686 |
default: |
2751 |
- if ((*p1 >= 'a' && *p1 <= 'f') || (*p1 >= 'A' && *p1 <= 'F') || (*p1 >= '0' && *p1 <= '9')) { |
|
2752 |
- if (!(ssizes)) { |
|
2753 |
- ssizes = cli_calloc(nres, sizeof(size_t)); |
|
2754 |
- if (!(ssizes)) |
|
2755 |
- goto err; |
|
2756 |
- } |
|
2757 |
- |
|
2758 |
- for (i=0; i < nres; i++) |
|
2759 |
- ssizes[i]++; |
|
2760 |
- |
|
2761 |
- break; |
|
2762 |
- } |
|
2763 |
- |
|
2764 |
- cli_errmsg("Incorrect character ('%c') in Yara hex string \"%s\"\n", *p1, str); |
|
2765 |
- goto err; |
|
2687 |
+ reslen++; |
|
2688 |
+ break; |
|
2766 | 2689 |
} |
2767 |
- |
|
2768 |
- p1++; |
|
2769 | 2690 |
} |
2770 | 2691 |
|
2771 |
- /* Allocate the space needed for the strings */ |
|
2772 |
- res = cli_calloc(nres+1, sizeof(char **)); /* +1 for terminating NULL */ |
|
2692 |
+ reslen++; |
|
2693 |
+ res = cli_calloc(reslen, 1); |
|
2773 | 2694 |
if (!(res)) |
2774 |
- goto err; |
|
2775 |
- |
|
2776 |
- for (i=0; i<nres; i++) { |
|
2777 |
- res[i] = cli_calloc(ssizes[i]+1, 1); |
|
2778 |
- if (!(res[i])) |
|
2779 |
- goto err; |
|
2780 |
- } |
|
2695 |
+ return NULL; |
|
2781 | 2696 |
|
2782 |
- /* Copy over the strings */ |
|
2783 |
- tnres=1; |
|
2784 |
- p1 = strchr(str, '{')+1; |
|
2785 |
- while ((size_t)(p1 - str) < slen-1) { |
|
2786 |
- switch (*p1) { |
|
2697 |
+ for (i=0, j=0; i < slen-1 && j < reslen; i++) { |
|
2698 |
+ switch (str[i]) { |
|
2787 | 2699 |
case ' ': |
2788 |
- case '\n': |
|
2789 |
- case '\r': |
|
2790 | 2700 |
case '\t': |
2701 |
+ case '\r': |
|
2702 |
+ case '\n': |
|
2703 |
+ case '}': |
|
2791 | 2704 |
break; |
2792 | 2705 |
case '[': |
2793 |
- p2 = p1+1; |
|
2794 |
- for (p2 = p1+1; (size_t)(p2 - str) < slen; p2++) |
|
2795 |
- if (*p2 == ']') |
|
2796 |
- break; |
|
2797 |
- |
|
2798 |
- for (p3 = p1+1; p3 < p2; p3++) |
|
2799 |
- if (*p3 == '-') |
|
2800 |
- break; |
|
2801 |
- |
|
2802 |
- m = strtoul(p1+1, &p3, 10); |
|
2803 |
- if (m == 0 && errno == ERANGE) |
|
2804 |
- goto err; |
|
2805 |
- |
|
2806 |
- n = strtoul(p3+1, &p3, 10); |
|
2807 |
- |
|
2808 |
- if (m > n) |
|
2809 |
- goto err; |
|
2810 |
- |
|
2811 |
- if (n - m == 0) { |
|
2812 |
- for (i=0; i < nres; i++) |
|
2813 |
- res[i][strlen(res[i])-1] = '*'; |
|
2814 |
- p1 = p3; |
|
2815 |
- break; |
|
2816 |
- } |
|
2817 |
- |
|
2818 |
- tnres += n - m; |
|
2819 |
- for (i=1; i <= tnres; i++) { |
|
2820 |
- for (j=0; j<i; j++) |
|
2821 |
- sprintf(res[i-1]+strlen(res[i-1]), "??"); |
|
2822 |
- } |
|
2823 |
- |
|
2824 |
- p1=p3; |
|
2706 |
+ res[j++] = '?'; |
|
2707 |
+ res[j++] = '?'; |
|
2708 |
+ res[j++] = '{'; |
|
2709 |
+ break; |
|
2710 |
+ case ']': |
|
2711 |
+ res[j++] = '}'; |
|
2825 | 2712 |
break; |
2826 | 2713 |
default: |
2827 |
- for (i=0; i < nres; i++) |
|
2828 |
- res[i][strlen(res[i])] = *p1; |
|
2714 |
+ res[j++] = str[i]; |
|
2829 | 2715 |
break; |
2830 | 2716 |
} |
2831 |
- |
|
2832 |
- p1++; |
|
2833 |
- } |
|
2834 |
- |
|
2835 |
- cli_errmsg("Yara string \"%s\" has %zu substrings\n", str, nres); |
|
2836 |
- for (i = 0; i < nres; i++) { |
|
2837 |
- cli_errmsg(" substring[%zu] (%zu:%zu): \"%s\"\n", i, ssizes[i], strlen(res[i]), res[i]); |
|
2838 | 2717 |
} |
2839 | 2718 |
|
2840 |
- return NULL; |
|
2841 |
- |
|
2842 |
-err: |
|
2843 |
- /* TODO: Free all the things! */ |
|
2844 |
- return NULL; |
|
2719 |
+ return res; |
|
2845 | 2720 |
} |
2846 | 2721 |
|
2847 | 2722 |
static int cli_loadyara(FILE *fs, const char *dbname, struct cl_engine *engine, unsigned int options, struct cli_dbio *dbio) |
... | ... |
@@ -2930,16 +2802,17 @@ static int cli_loadyara(FILE *fs, const char *dbname, struct cl_engine *engine, |
2930 | 2930 |
STAILQ_REMOVE(&rule->strings, string, _yc_string, link); |
2931 | 2931 |
|
2932 | 2932 |
if (STRING_IS_HEX(string)) { |
2933 |
- size_t len = strlen(string->string); |
|
2934 |
- size_t rulelen = strlen(rulestr); |
|
2935 |
- size_t j; |
|
2936 |
- cli_errmsg("Yara hex string: \"%s\"\n", string->string); |
|
2937 |
- for (j=0, i=0; i < len; i++) { |
|
2938 |
- int ch = string->string[i]; |
|
2939 |
- if (isalnum(ch)) |
|
2940 |
- rulestr[rulelen+(j++)] = string->string[i]; |
|
2933 |
+ char *substr = parse_yara_hex_string(string); |
|
2934 |
+ size_t len = strlen(rulestr); |
|
2935 |
+ |
|
2936 |
+ substr = parse_yara_hex_string(string); |
|
2937 |
+#if 1 |
|
2938 |
+ cli_errmsg("Yara hex string: \"%s\"\n", substr); |
|
2939 |
+#endif |
|
2940 |
+ if (substr) { |
|
2941 |
+ snprintf(rulestr+len, totsize-len, "%s", substr); |
|
2942 |
+ free(substr); |
|
2941 | 2943 |
} |
2942 |
- rulestr[rulelen + j] = '\0'; |
|
2943 | 2944 |
} else { |
2944 | 2945 |
for (i=0; i < strlen(string->string); i++) { |
2945 | 2946 |
size_t len = strlen(rulestr); |
... | ... |
@@ -2958,7 +2831,9 @@ static int cli_loadyara(FILE *fs, const char *dbname, struct cl_engine *engine, |
2958 | 2958 |
if (rulestr[strlen(rulestr)-1] == ';') |
2959 | 2959 |
rulestr[strlen(rulestr)-1] = '\0'; |
2960 | 2960 |
|
2961 |
+#if 1 |
|
2961 | 2962 |
printf("[+] computed ldb: \"%s\"\n", rulestr); |
2963 |
+#endif |
|
2962 | 2964 |
ruledup = cli_malloc(strlen(rulestr)+1); |
2963 | 2965 |
if (!ruledup) { |
2964 | 2966 |
free(rulestr); |