clamav-devel/libclamav/matcher-bm.c
8000d078
 /*
81df5163
  *  Copyright (C) 2004 - 2005 Tomasz Kojm <tkojm@clamav.net>
8000d078
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 2 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
48b7b4a7
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
8000d078
  */
 
 #include "clamav.h"
 #include "memory.h"
 #include "others.h"
 #include "cltypes.h"
b68d11d2
 #include "matcher.h"
079229d6
 #include "matcher-bm.h"
8000d078
 
7fd366a3
 /* TODO: Check prefix regularity and automatically transfer some signatures
  *	 to AC
  */
 
 #define BM_MIN_LENGTH	3
 /* #define BM_TEST_OFFSET	5 */
242efc14
 #define BM_BLOCK_SIZE	3
8000d078
 
242efc14
 #define HASH(a,b,c) 211 * (unsigned char) a + 37 * (unsigned char) b + (unsigned char) c
e6e7bbee
 #define DHASH(a,b,c) 211 * a + 37 * b + c
8000d078
 
 
5612732c
 int cli_bm_addpatt(struct cli_matcher *root, struct cli_bm_patt *pattern)
8000d078
 {
 	int i;
 	uint16_t idx;
 	const char *pt = pattern->pattern;
 	struct cli_bm_patt *prev, *next = NULL;
 
 
     if(pattern->length < BM_MIN_LENGTH) {
a040281a
 	cli_errmsg("Signature for %s is too short\n", pattern->virname);
 	return CL_EPATSHORT;
8000d078
     }
 
     for(i = BM_MIN_LENGTH - BM_BLOCK_SIZE; i >= 0; i--) {
242efc14
 	idx = HASH(pt[i], pt[i + 1], pt[i + 2]);
8000d078
 	root->bm_shift[idx] = MIN(root->bm_shift[idx], BM_MIN_LENGTH - BM_BLOCK_SIZE - i);
     }
 
     i = BM_MIN_LENGTH - BM_BLOCK_SIZE;
242efc14
     idx = HASH(pt[i], pt[i + 1], pt[i + 2]);
8000d078
 
     prev = next = root->bm_suffix[idx];
 
     while(next) {
c54133a1
 	if(pt[0] >= next->pattern[0])
8000d078
 	    break;
 	prev = next;
 	next = next->next;
     }
 
c54133a1
     if(next == root->bm_suffix[idx]) {
8000d078
 	pattern->next = root->bm_suffix[idx];
 	root->bm_suffix[idx] = pattern;
     } else {
 	pattern->next = prev->next;
 	prev->next = pattern;
     }
 
     return 0;
 }
 
5612732c
 int cli_bm_init(struct cli_matcher *root)
8000d078
 {
33f89aa5
 	unsigned int i;
e6e7bbee
 	unsigned int size = DHASH(256, 256, 256);
8000d078
 
 
     cli_dbgmsg("in cli_bm_init()\n");
e6e7bbee
     cli_dbgmsg("BM: Number of indexes = %d\n", size);
8000d078
 
e6e7bbee
     if(!(root->bm_shift = (int *) cli_malloc(size * sizeof(int))))
8000d078
 	return CL_EMEM;
 
e6e7bbee
     if(!(root->bm_suffix = (struct cli_bm_patt **) cli_calloc(size, sizeof(struct cli_bm_patt *)))) {
8000d078
 	free(root->bm_shift);
 	return CL_EMEM;
     }
 
e6e7bbee
     for(i = 0; i < size; i++)
8000d078
 	root->bm_shift[i] = BM_MIN_LENGTH - BM_BLOCK_SIZE + 1;
 
     return 0;
 }
 
5612732c
 void cli_bm_free(struct cli_matcher *root)
8000d078
 {
d4fb658e
 	struct cli_bm_patt *b1, *b2;
33f89aa5
 	unsigned int i;
e6e7bbee
 	unsigned int size = DHASH(256, 256, 256);
d4fb658e
 
 
8000d078
     if(root->bm_shift)
 	free(root->bm_shift);
 
d4fb658e
     if(root->bm_suffix) {
e6e7bbee
 	for(i = 0; i < size; i++) {
d4fb658e
 	    b1 = root->bm_suffix[i];
 	    while(b1) {
 		b2 = b1;
 		b1 = b1->next;
 		if(b2->virname)
 		    free(b2->virname);
b68d11d2
 		if(b2->offset)
 		    free(b2->offset);
d4fb658e
 		if(b2->pattern)
 		    free(b2->pattern);
 		free(b2);
 	    }
 	}
8000d078
 	free(root->bm_suffix);
d4fb658e
     }
8000d078
 }
 
5612732c
 int cli_bm_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, unsigned long int offset, unsigned short ftype, int fd)
8000d078
 {
33f89aa5
 	unsigned int i, j, shift, off, found = 0;
8000d078
 	uint16_t idx;
 	struct cli_bm_patt *p;
 	const char *bp;
 	char prefix;
 
 
cdbf8c8e
     if(!root->bm_shift)
 	return CL_CLEAN;
 
cd4db869
     if(length < BM_MIN_LENGTH)
 	return CL_CLEAN;
 
621939fa
     for(i = BM_MIN_LENGTH - BM_BLOCK_SIZE; i < length - BM_BLOCK_SIZE + 1; ) {
242efc14
 	idx = HASH(buffer[i], buffer[i + 1], buffer[i + 2]);
8000d078
 
 	shift = root->bm_shift[idx];
 
 	if(shift == 0) {
 
 	    prefix = buffer[i - BM_MIN_LENGTH + BM_BLOCK_SIZE];
 	    p = root->bm_suffix[idx];
 
 	    while(p && p->pattern[0] != prefix)
 		p = p->next;
 
 	    while(p && p->pattern[0] == prefix) {
 		off = i - BM_MIN_LENGTH + BM_BLOCK_SIZE;
 		bp = buffer + off;
242efc14
 
7fd366a3
 #ifdef BM_TEST_OFFSET
242efc14
 		if(bp[BM_TEST_OFFSET] != p->pattern[BM_TEST_OFFSET]) {
 		    p = p->next;
 		    continue;
 		}
7fd366a3
 #endif
242efc14
 
8000d078
 		found = 1;
 		for(j = 0; j < p->length && off < length; j++, off++) {
 		    if(bp[j] != p->pattern[j]) {
 			found = 0;
 			break;
 		    }
 		}
5afda272
 
8000d078
 		if(found && p->length == j) {
7ec67e94
 
 		    if(p->target || p->offset) {
30c2ea29
 			off = offset + i - BM_MIN_LENGTH + BM_BLOCK_SIZE;
 
81df5163
 			if((fd == -1 && !ftype) || !cli_validatesig(p->target, ftype, p->offset, off, fd, p->virname)) {
7ec67e94
 			    p = p->next;
 			    continue;
 			}
b68d11d2
 		    }
 
8000d078
 		    if(virname)
 			*virname = p->virname;
 
 		    return CL_VIRUS;
 		}
5afda272
 
8000d078
 		p = p->next;
 	    }
 
 	    shift = 1;
 	}
 
 	i += shift;
     }
 
     return 0;
 }