libclamav/matcher-bm.c
8000d078
 /*
33872a43
  *  Copyright (C) 2007-2009 Sourcefire, Inc.
2023340a
  *
  *  Authors: Tomasz Kojm
8000d078
  *
  *  This program is free software; you can redistribute it and/or modify
bb34cb31
  *  it under the terms of the GNU General Public License version 2 as
  *  published by the Free Software Foundation.
8000d078
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
48b7b4a7
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
8000d078
  */
 
bedc58de
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
 
 #include <stdio.h>
563582a1
 #include <assert.h>
8000d078
 #include "clamav.h"
 #include "memory.h"
 #include "others.h"
 #include "cltypes.h"
b68d11d2
 #include "matcher.h"
079229d6
 #include "matcher-bm.h"
73218de2
 #include "filetypes.h"
3e975a60
 #include "filtering.h"
8000d078
 
b94e66c4
 #include "mpool.h"
 
7fd366a3
 #define BM_MIN_LENGTH	3
242efc14
 #define BM_BLOCK_SIZE	3
4e9ab8ed
 #define HASH(a,b,c) (211 * a + 37 * b + c)
8000d078
 
33872a43
 int cli_bm_addpatt(struct cli_matcher *root, struct cli_bm_patt *pattern, const char *offset)
8000d078
 {
ab1db3b3
 	uint16_t idx, i;
4e9ab8ed
 	const unsigned char *pt = pattern->pattern;
8000d078
 	struct cli_bm_patt *prev, *next = NULL;
33872a43
 	int ret;
8000d078
 
 
     if(pattern->length < BM_MIN_LENGTH) {
871177cd
 	cli_errmsg("cli_bm_addpatt: Signature for %s is too short\n", pattern->virname);
 	return CL_EMALFDB;
8000d078
     }
 
294558a5
     if((ret = cli_caloff(offset, NULL, root->type, pattern->offdata, &pattern->offset_min, &pattern->offset_max))) {
33872a43
 	cli_errmsg("cli_bm_addpatt: Can't calculate offset for signature %s\n", pattern->virname);
 	return ret;
     }
aca9ea82
     if(pattern->offdata[0] != CLI_OFF_ANY) {
 	if(pattern->offdata[0] == CLI_OFF_ABSOLUTE)
 	    root->bm_absoff_num++;
 	else
 	    root->bm_reloff_num++;
     }
33872a43
 
aa47fb3b
     /* bm_offmode doesn't use the prefilter for BM signatures anyway, so
      * don't add these to the filter. */
     if(root->filter && !root->bm_offmode) {
02eabc6d
 	/* the bm_suffix load balancing below can shorten the sig,
 	 * we want to see the entire signature! */
 	if (filter_add_static(root->filter, pattern->pattern, pattern->length, pattern->virname) == -1) {
 	    cli_warnmsg("cli_bm_addpatt: cannot use filter for trie\n");
 	    mpool_free(root->mempool, root->filter);
 	    root->filter = NULL;
 	}
380ae304
 	/* TODO: should this affect maxpatlen? */
02eabc6d
     }
 
ab1db3b3
 #if BM_MIN_LENGTH == BM_BLOCK_SIZE
     /* try to load balance bm_suffix (at the cost of bm_shift) */
     for(i = 0; i < pattern->length - BM_BLOCK_SIZE + 1; i++) {
242efc14
 	idx = HASH(pt[i], pt[i + 1], pt[i + 2]);
ab1db3b3
 	if(!root->bm_suffix[idx]) {
 	    if(i) {
 		pattern->prefix = pattern->pattern;
 		pattern->prefix_length = i;
 		pattern->pattern = &pattern->pattern[i];
 		pattern->length -= i;
 		pt = pattern->pattern;
 	    }
 	    break;
 	}
8000d078
     }
ab1db3b3
 #endif
8000d078
 
ab1db3b3
     for(i = 0; i <= BM_MIN_LENGTH - BM_BLOCK_SIZE; i++) {
 	idx = HASH(pt[i], pt[i + 1], pt[i + 2]);
 	root->bm_shift[idx] = MIN(root->bm_shift[idx], BM_MIN_LENGTH - BM_BLOCK_SIZE - i);
     }
8000d078
 
     prev = next = root->bm_suffix[idx];
     while(next) {
f70b93e1
 	if(pt[0] >= next->pattern0)
8000d078
 	    break;
 	prev = next;
 	next = next->next;
     }
 
c54133a1
     if(next == root->bm_suffix[idx]) {
8000d078
 	pattern->next = root->bm_suffix[idx];
ab1db3b3
 	if(root->bm_suffix[idx])
 	    pattern->cnt = root->bm_suffix[idx]->cnt;
8000d078
 	root->bm_suffix[idx] = pattern;
     } else {
 	pattern->next = prev->next;
 	prev->next = pattern;
     }
f70b93e1
     pattern->pattern0 = pattern->pattern[0];
ab1db3b3
     root->bm_suffix[idx]->cnt++;
8000d078
 
006f5fe6
     if(root->bm_offmode) {
 	root->bm_pattab = (struct cli_bm_patt **) mpool_realloc2(root->mempool, root->bm_pattab, (root->bm_patterns + 1) * sizeof(struct cli_bm_patt *));
 	if(!root->bm_pattab) {
 	    cli_errmsg("cli_bm_addpatt: Can't allocate memory for root->bm_pattab\n");
 	    return CL_EMEM;
 	}
 	root->bm_pattab[root->bm_patterns] = pattern;
 	if(pattern->offdata[0] != CLI_OFF_ABSOLUTE)
 	    pattern->offset_min = root->bm_patterns;
     }
 
4addba22
     root->bm_patterns++;
ab1db3b3
     return CL_SUCCESS;
8000d078
 }
 
5612732c
 int cli_bm_init(struct cli_matcher *root)
8000d078
 {
ab1db3b3
 	uint16_t i, size = HASH(255, 255, 255) + 1;
563582a1
 #ifdef USE_MPOOL
     assert (root->mempool && "mempool must be initialized");
 #endif
006f5fe6
 
47d40feb
     if(!(root->bm_shift = (uint8_t *) mpool_calloc(root->mempool, size, sizeof(uint8_t))))
8000d078
 	return CL_EMEM;
 
47d40feb
     if(!(root->bm_suffix = (struct cli_bm_patt **) mpool_calloc(root->mempool, size, sizeof(struct cli_bm_patt *)))) {
 	mpool_free(root->mempool, root->bm_shift);
8000d078
 	return CL_EMEM;
     }
 
e6e7bbee
     for(i = 0; i < size; i++)
8000d078
 	root->bm_shift[i] = BM_MIN_LENGTH - BM_BLOCK_SIZE + 1;
 
ab1db3b3
     return CL_SUCCESS;
8000d078
 }
 
294558a5
 int cli_bm_initoff(const struct cli_matcher *root, struct cli_bm_off *data, const struct cli_target_info *info)
006f5fe6
 {
 	int ret;
 	unsigned int i;
 	struct cli_bm_patt *patt;
 
 
     if(!root->bm_patterns) {
6039e6d4
 	data->offtab = data->offset = NULL;
006f5fe6
 	data->cnt = data->pos = 0;
7962075a
 	return CL_SUCCESS;
006f5fe6
     }
 
     data->cnt = data->pos = 0;
e6f5ac51
     data->offtab = (uint32_t *) cli_malloc(root->bm_patterns * sizeof(uint32_t));
006f5fe6
     if(!data->offtab) {
 	cli_errmsg("cli_bm_initoff: Can't allocate memory for data->offtab\n");
 	return CL_EMEM;
     }
e6f5ac51
     data->offset = (uint32_t *) cli_malloc(root->bm_patterns * sizeof(uint32_t));
006f5fe6
     if(!data->offset) {
 	cli_errmsg("cli_bm_initoff: Can't allocate memory for data->offset\n");
e6f5ac51
 	free(data->offtab);
006f5fe6
 	return CL_EMEM;
     }
     for(i = 0; i < root->bm_patterns; i++) {
 	patt = root->bm_pattab[i];
 	if(patt->offdata[0] == CLI_OFF_ABSOLUTE) {
 	    data->offtab[data->cnt] = patt->offset_min + patt->prefix_length;
294558a5
 	    if(data->offtab[data->cnt] >= info->fsize)
fdabe265
 		continue;
006f5fe6
 	    data->cnt++;
294558a5
 	} else if((ret = cli_caloff(NULL, info, root->type, patt->offdata, &data->offset[patt->offset_min], NULL))) {
006f5fe6
 	    cli_errmsg("cli_bm_initoff: Can't calculate relative offset in signature for %s\n", patt->virname);
e6f5ac51
 	    free(data->offtab);
 	    free(data->offset);
006f5fe6
 	    return ret;
294558a5
 	} else if((data->offset[patt->offset_min] != CLI_OFF_NONE) && (data->offset[patt->offset_min] + patt->length <= info->fsize)) {
9d886862
 	    if(!data->cnt || (data->offset[patt->offset_min] + patt->prefix_length != data->offtab[data->cnt - 1])) {
006f5fe6
 		data->offtab[data->cnt] = data->offset[patt->offset_min] + patt->prefix_length;
294558a5
 		if(data->offtab[data->cnt] >= info->fsize)
fdabe265
 		    continue;
006f5fe6
 		data->cnt++;
 	    }
 	}
     }
 
424d41d3
     cli_qsort(data->offtab, data->cnt, sizeof(uint32_t), NULL);
006f5fe6
     return CL_SUCCESS;
 }
 
e6f5ac51
 void cli_bm_freeoff(struct cli_bm_off *data)
006f5fe6
 {
e6f5ac51
     free(data->offset);
     data->offset = NULL;
     free(data->offtab);
     data->offtab = NULL;
006f5fe6
 }
 
5612732c
 void cli_bm_free(struct cli_matcher *root)
8000d078
 {
ab1db3b3
 	struct cli_bm_patt *patt, *prev;
 	uint16_t i, size = HASH(255, 255, 255) + 1;
d4fb658e
 
 
8000d078
     if(root->bm_shift)
47d40feb
 	mpool_free(root->mempool, root->bm_shift);
8000d078
 
006f5fe6
     if(root->bm_pattab)
 	mpool_free(root->mempool, root->bm_pattab);
 
d4fb658e
     if(root->bm_suffix) {
e6e7bbee
 	for(i = 0; i < size; i++) {
ab1db3b3
 	    patt = root->bm_suffix[i];
 	    while(patt) {
 		prev = patt;
 		patt = patt->next;
 		if(prev->prefix)
47d40feb
 		    mpool_free(root->mempool, prev->prefix);
ab1db3b3
 		else
47d40feb
 		    mpool_free(root->mempool, prev->pattern);
ab1db3b3
 		if(prev->virname)
47d40feb
 		    mpool_free(root->mempool, prev->virname);
 		mpool_free(root->mempool, prev);
d4fb658e
 	    }
 	}
47d40feb
 	mpool_free(root->mempool, root->bm_suffix);
d4fb658e
     }
8000d078
 }
 
fb0a54dd
 int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, const struct cli_bm_patt **patt, const struct cli_matcher *root, uint32_t offset, const struct cli_target_info *info, struct cli_bm_off *offdata, uint32_t *viroffset)
8000d078
 {
aca9ea82
 	uint32_t i, j, off, off_min, off_max;
ab1db3b3
 	uint8_t found, pchain, shift;
 	uint16_t idx, idxchk;
8000d078
 	struct cli_bm_patt *p;
ab1db3b3
 	const unsigned char *bp, *pt;
4e9ab8ed
 	unsigned char prefix;
33872a43
         int ret;
8000d078
 
1f2ebf0c
     if(!root || !root->bm_shift)
cdbf8c8e
 	return CL_CLEAN;
 
cd4db869
     if(length < BM_MIN_LENGTH)
 	return CL_CLEAN;
 
006f5fe6
     i = BM_MIN_LENGTH - BM_BLOCK_SIZE;
9b7f6ede
     if(offdata) {
0d926a18
 	if(!offdata->cnt)
 	    return CL_CLEAN;
a205da7f
 	if(offdata->pos == offdata->cnt)
 	    offdata->pos--;
26f558a6
 	for(; offdata->pos && offdata->offtab[offdata->pos] > offset; offdata->pos--);
 	if(offdata->offtab[offdata->pos] < offset)
 	    offdata->pos++;
 	if(offdata->pos >= offdata->cnt)
006f5fe6
 	    return CL_CLEAN;
fedd1ac5
 	i += offdata->offtab[offdata->pos] - offset;
006f5fe6
     }
     for(; i < length - BM_BLOCK_SIZE + 1; ) {
242efc14
 	idx = HASH(buffer[i], buffer[i + 1], buffer[i + 2]);
8000d078
 	shift = root->bm_shift[idx];
 
 	if(shift == 0) {
 	    prefix = buffer[i - BM_MIN_LENGTH + BM_BLOCK_SIZE];
 	    p = root->bm_suffix[idx];
f28db33f
 	    if(p && p->cnt == 1 && p->pattern0 != prefix) {
9b7f6ede
 		if(offdata) {
006f5fe6
 		    off = offset + i - BM_MIN_LENGTH + BM_BLOCK_SIZE;
550dc2b3
 		    for(; offdata->pos < offdata->cnt && off >= offdata->offtab[offdata->pos]; offdata->pos++);
006f5fe6
 		    if(offdata->pos == offdata->cnt || off >= offdata->offtab[offdata->pos])
 			return CL_CLEAN;
 		    i += offdata->offtab[offdata->pos] - off;
 		} else {
 		    i++;
 		}
f28db33f
 		continue;
 	    }
ab1db3b3
 	    pchain = 0;
 	    while(p) {
f70b93e1
 		if(p->pattern0 != prefix) {
ab1db3b3
 		    if(pchain)
 			break;
 		    p = p->next;
 		    continue;
 		} else pchain = 1;
8000d078
 
 		off = i - BM_MIN_LENGTH + BM_BLOCK_SIZE;
 		bp = buffer + off;
242efc14
 
ab1db3b3
 		if((off + p->length > length) || (p->prefix_length > off)) {
242efc14
 		    p = p->next;
 		    continue;
 		}
 
9b7f6ede
 		if(offdata) {
006f5fe6
 		    if(p->offdata[0] == CLI_OFF_ABSOLUTE) {
 			if(p->offset_min != offset + off - p->prefix_length) {
 			    p = p->next;
 			    continue;
 			}
 		    } else if((offdata->offset[p->offset_min] == CLI_OFF_NONE) || (offdata->offset[p->offset_min] != offset + off - p->prefix_length)) {
 			p = p->next;
 			continue;
 		    }
 		}
 
ab1db3b3
 		idxchk = MIN(p->length, length - off) - 1;
 		if(idxchk) {
 		    if((bp[idxchk] != p->pattern[idxchk]) ||  (bp[idxchk / 2] != p->pattern[idxchk / 2])) {
dd7c2206
 			p = p->next;
 			continue;
 		    }
 		}
 
ab1db3b3
 		if(p->prefix_length) {
 		    off -= p->prefix_length;
 		    bp -= p->prefix_length;
 		    pt = p->prefix;
 		} else {
 		    pt = p->pattern;
 		}
 
8000d078
 		found = 1;
ab1db3b3
 		for(j = 0; j < p->length + p->prefix_length && off < length; j++, off++) {
 		    if(bp[j] != pt[j]) {
8000d078
 			found = 0;
 			break;
 		    }
 		}
5afda272
 
04133ff9
 		if(found && (p->boundary & BM_BOUNDARY_EOL)) {
 		    if(off != length) {
 			p = p->next;
 			continue;
 		    }
 		}
 
ab1db3b3
 		if(found && p->length + p->prefix_length == j) {
9b7f6ede
 		    if(!offdata && (p->offset_min != CLI_OFF_ANY)) {
 			if(p->offdata[0] != CLI_OFF_ABSOLUTE) {
e766ee04
 			    if(!info) {
 				p = p->next;
 				continue;
 			    }
294558a5
 			    ret = cli_caloff(NULL, info, root->type, p->offdata, &off_min, &off_max);
9b7f6ede
 			    if(ret != CL_SUCCESS) {
 				cli_errmsg("cli_bm_scanbuff: Can't calculate relative offset in signature for %s\n", p->virname);
 				return ret;
33872a43
 			    }
9b7f6ede
 			} else {
 			    off_min = p->offset_min;
 			    off_max = p->offset_max;
 			}
 			off = offset + i - p->prefix_length - BM_MIN_LENGTH + BM_BLOCK_SIZE;
684399dc
 			if(off_min == CLI_OFF_NONE || off_max < off || off_min > off) {
9b7f6ede
 			    p = p->next;
 			    continue;
7ec67e94
 			}
b68d11d2
 		    }
fb0a54dd
 		    if(virname) {
9b7f6ede
 			*virname = p->virname;
fb0a54dd
 			if(viroffset)
 			    *viroffset = offset + i + j - BM_MIN_LENGTH + BM_BLOCK_SIZE;
 		    }
6c26e99c
 		    if(patt)
 			*patt = p;
9b7f6ede
 		    return CL_VIRUS;
8000d078
 		}
 		p = p->next;
 	    }
 	    shift = 1;
 	}
 
6d3c5bec
 	if(offdata) {
006f5fe6
 	    off = offset + i - BM_MIN_LENGTH + BM_BLOCK_SIZE;
550dc2b3
 	    for(; offdata->pos < offdata->cnt && off >= offdata->offtab[offdata->pos]; offdata->pos++);
006f5fe6
 	    if(offdata->pos == offdata->cnt || off >= offdata->offtab[offdata->pos])
 		return CL_CLEAN;
 	    i += offdata->offtab[offdata->pos] - off;
 	} else {
 	    i += shift;
 	}
 
8000d078
     }
 
841161e0
     return CL_CLEAN;
8000d078
 }