libclamav/matcher-bm.c
f91f55e0
 /*
e483f51a
  *  Copyright (C) 2004 - 2005 Tomasz Kojm <tkojm@clamav.net>
f91f55e0
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 2 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #include "clamav.h"
 #include "memory.h"
 #include "others.h"
 #include "cltypes.h"
df757556
 #include "matcher.h"
f91f55e0
 
9e0b42ba
 /* TODO: Check prefix regularity and automatically transfer some signatures
  *	 to AC
  */
 
 #define BM_MIN_LENGTH	3
 /* #define BM_TEST_OFFSET	5 */
3bb47bb7
 #define BM_BLOCK_SIZE	3
f91f55e0
 
 #define MIN(a,b) (a < b) ? a : b
3bb47bb7
 #define HASH(a,b,c) 211 * (unsigned char) a + 37 * (unsigned char) b + (unsigned char) c
4cf072ae
 #define DHASH(a,b,c) 211 * a + 37 * b + c
f91f55e0
 
 
 int cli_bm_addpatt(struct cl_node *root, struct cli_bm_patt *pattern)
 {
 	int i;
 	uint16_t idx;
 	const char *pt = pattern->pattern;
 	struct cli_bm_patt *prev, *next = NULL;
 
 
     if(pattern->length < BM_MIN_LENGTH) {
028684cc
 	cli_errmsg("Signature for %s is too short\n", pattern->virname);
 	return CL_EPATSHORT;
f91f55e0
     }
 
     for(i = BM_MIN_LENGTH - BM_BLOCK_SIZE; i >= 0; i--) {
3bb47bb7
 	idx = HASH(pt[i], pt[i + 1], pt[i + 2]);
f91f55e0
 	root->bm_shift[idx] = MIN(root->bm_shift[idx], BM_MIN_LENGTH - BM_BLOCK_SIZE - i);
     }
 
     i = BM_MIN_LENGTH - BM_BLOCK_SIZE;
3bb47bb7
     idx = HASH(pt[i], pt[i + 1], pt[i + 2]);
f91f55e0
 
     prev = next = root->bm_suffix[idx];
 
     while(next) {
c1c326a6
 	if(pt[0] >= next->pattern[0])
f91f55e0
 	    break;
 	prev = next;
 	next = next->next;
     }
 
c1c326a6
     if(next == root->bm_suffix[idx]) {
f91f55e0
 	pattern->next = root->bm_suffix[idx];
 	root->bm_suffix[idx] = pattern;
     } else {
 	pattern->next = prev->next;
 	prev->next = pattern;
     }
 
     return 0;
 }
 
 int cli_bm_init(struct cl_node *root)
 {
a8b53539
 	unsigned int i;
4cf072ae
 	unsigned int size = DHASH(256, 256, 256);
f91f55e0
 
 
     cli_dbgmsg("in cli_bm_init()\n");
4cf072ae
     cli_dbgmsg("BM: Number of indexes = %d\n", size);
f91f55e0
 
4cf072ae
     if(!(root->bm_shift = (int *) cli_malloc(size * sizeof(int))))
f91f55e0
 	return CL_EMEM;
 
4cf072ae
     if(!(root->bm_suffix = (struct cli_bm_patt **) cli_calloc(size, sizeof(struct cli_bm_patt *)))) {
f91f55e0
 	free(root->bm_shift);
 	return CL_EMEM;
     }
 
4cf072ae
     for(i = 0; i < size; i++)
f91f55e0
 	root->bm_shift[i] = BM_MIN_LENGTH - BM_BLOCK_SIZE + 1;
 
     return 0;
 }
 
 void cli_bm_free(struct cl_node *root)
 {
fe1df175
 	struct cli_bm_patt *b1, *b2;
a8b53539
 	unsigned int i;
4cf072ae
 	unsigned int size = DHASH(256, 256, 256);
fe1df175
 
 
f91f55e0
     if(root->bm_shift)
 	free(root->bm_shift);
 
fe1df175
     if(root->bm_suffix) {
4cf072ae
 	for(i = 0; i < size; i++) {
fe1df175
 	    b1 = root->bm_suffix[i];
 	    while(b1) {
 		b2 = b1;
 		b1 = b1->next;
 		if(b2->virname)
 		    free(b2->virname);
df757556
 		if(b2->offset)
 		    free(b2->offset);
fe1df175
 		if(b2->pattern)
 		    free(b2->pattern);
 		free(b2);
 	    }
 	}
f91f55e0
 	free(root->bm_suffix);
fe1df175
     }
f91f55e0
 }
 
856d9c84
 int cli_bm_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_node *root, unsigned long int offset, unsigned short ftype, int fd)
f91f55e0
 {
a8b53539
 	unsigned int i, j, shift, off, found = 0;
f91f55e0
 	uint16_t idx;
 	struct cli_bm_patt *p;
 	const char *bp;
 	char prefix;
 
 
e263999b
     if(!root->bm_shift)
 	return CL_CLEAN;
 
6d095996
     if(length < BM_MIN_LENGTH)
 	return CL_CLEAN;
 
aef3cfea
     for(i = BM_MIN_LENGTH - BM_BLOCK_SIZE; i < length - BM_BLOCK_SIZE + 1; ) {
3bb47bb7
 	idx = HASH(buffer[i], buffer[i + 1], buffer[i + 2]);
f91f55e0
 
 	shift = root->bm_shift[idx];
 
 	if(shift == 0) {
 
 	    prefix = buffer[i - BM_MIN_LENGTH + BM_BLOCK_SIZE];
 	    p = root->bm_suffix[idx];
 
 	    while(p && p->pattern[0] != prefix)
 		p = p->next;
 
 	    while(p && p->pattern[0] == prefix) {
 		off = i - BM_MIN_LENGTH + BM_BLOCK_SIZE;
 		bp = buffer + off;
3bb47bb7
 
9e0b42ba
 #ifdef BM_TEST_OFFSET
3bb47bb7
 		if(bp[BM_TEST_OFFSET] != p->pattern[BM_TEST_OFFSET]) {
 		    p = p->next;
 		    continue;
 		}
9e0b42ba
 #endif
3bb47bb7
 
f91f55e0
 		found = 1;
 		for(j = 0; j < p->length && off < length; j++, off++) {
 		    if(bp[j] != p->pattern[j]) {
 			found = 0;
 			break;
 		    }
 		}
e7f29ff2
 
f91f55e0
 		if(found && p->length == j) {
856d9c84
 
 		    if(p->target || p->offset) {
bc10f08f
 			off = offset + i - BM_MIN_LENGTH + BM_BLOCK_SIZE;
 
e483f51a
 			if((fd == -1 && !ftype) || !cli_validatesig(p->target, ftype, p->offset, off, fd, p->virname)) {
856d9c84
 			    p = p->next;
 			    continue;
 			}
df757556
 		    }
 
f91f55e0
 		    if(virname)
 			*virname = p->virname;
 
 		    return CL_VIRUS;
 		}
e7f29ff2
 
f91f55e0
 		p = p->next;
 	    }
 
 	    shift = 1;
 	}
 
 	i += shift;
     }
 
     return 0;
 }