libclamav/matcher.c
b151ef55
 /*
  *  C implementation of the Aho-Corasick pattern matching algorithm. It's based
  *  on ScannerDaemon's Java version by Kurt Huwig and
  *  http://www-sr.informatik.uni-tuebingen.de/~buehler/AC/AC.html
  *  Thanks to Kurt Huwig for pointing me to this page.
  *
cc938d61
  *  Copyright (C) 2002 - 2004 Tomasz Kojm <tkojm@clamav.net>
b151ef55
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 2 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
8b242bb9
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
 
b151ef55
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
 #include <unistd.h>
 
 #include "clamav.h"
 #include "others.h"
 #include "matcher.h"
 #include "unrarlib.h"
 #include "defaults.h"
2fe19b26
 #include "filetypes.h"
b151ef55
 
442d8407
 int cli_addpatt(struct cl_node *root, struct cli_patt *pattern)
b151ef55
 {
ff8cb48b
 	struct cli_ac_node *pos, *next;
b151ef55
 	int i;
 
     if(pattern->length < CL_MIN_LENGTH) {
 	return CL_EPATSHORT;
     }
 
ff8cb48b
     pos = root->ac_root;
b151ef55
 
     for(i = 0; i < CL_MIN_LENGTH; i++) {
 	next = pos->trans[((unsigned char) pattern->pattern[i]) & 0xff]; 
 
 	if(!next) {
ff8cb48b
 	    next = (struct cli_ac_node *) cli_calloc(1, sizeof(struct cli_ac_node));
9c1c9007
 	    if(!next) {
 		cli_dbgmsg("Unable to allocate pattern node (%d)\n", sizeof(struct cl_node));
b151ef55
 		return CL_EMEM;
9c1c9007
 	    }
b151ef55
 
 	    root->nodes++;
ff8cb48b
 	    root->nodetable = (struct cli_ac_node **) realloc(root->nodetable, (root->nodes) * sizeof(struct cli_ac_node *));
9c1c9007
 	    if (root->nodetable == NULL) {
 		cli_dbgmsg("Unable to realloc nodetable (%d)\n", (root->nodes) * sizeof(struct cl_node *));
 		return CL_EMEM;
 	    }
b151ef55
 	    root->nodetable[root->nodes - 1] = next;
 
 	    pos->trans[((unsigned char) pattern->pattern[i]) & 0xff] = next;
 	}
 
 	pos = next;
     }
 
     pos->islast = 1;
 
     pattern->next = pos->list;
     pos->list = pattern;
 
     return 0;
 }
 
ff8cb48b
 static int cli_enqueue(struct nodelist **bfs, struct cli_ac_node *n)
b151ef55
 {
 	struct nodelist *new;
 
     new = (struct nodelist *) cli_calloc(1, sizeof(struct nodelist));
9c1c9007
     if (new == NULL) {
 	cli_dbgmsg("Unable to allocate node list (%d)\n", sizeof(struct nodelist));
e8217f5a
 	return CL_EMEM;
9c1c9007
     }
b151ef55
 
     new->next = *bfs;
     new->node = n;
     *bfs = new;
e8217f5a
     return 0;
b151ef55
 }
 
ff8cb48b
 static struct cli_ac_node *cli_dequeue(struct nodelist **bfs)
b151ef55
 {
 	struct nodelist *handler, *prev = NULL;
ff8cb48b
 	struct cli_ac_node *pt;
b151ef55
 
     handler = *bfs;
 
     while(handler && handler->next) {
 	prev = handler;
 	handler = handler->next;
     }
 
     if(!handler) {
 	return NULL;
     } else {
 	pt = handler->node;
 	free(handler);
 	if(prev)
 	    prev->next = NULL;
 	else
 	    *bfs = NULL;
 
 	return pt;
     }
 }
 
cf0744c5
 static int cli_maketrans(struct cl_node *root)
b151ef55
 {
 	struct nodelist *bfs = NULL;
ff8cb48b
 	struct cli_ac_node *ac_root = root->ac_root, *child, *node;
e8217f5a
 	int i, ret;
b151ef55
 
 
ff8cb48b
     ac_root->fail = NULL;
     if((ret = cli_enqueue(&bfs, ac_root)) != 0) {
e8217f5a
 	return ret;
     }
b151ef55
 
     while((node = cli_dequeue(&bfs))) {
 	if(node->islast)
 	    continue;
 
 	for(i = 0; i < CL_NUM_CHILDS; i++) {
 	    child = node->trans[i];
 	    if(!child) {
 		if(node->fail)
 		    node->trans[i] = (node->fail)->trans[i];
 		else
ff8cb48b
 		    node->trans[i] = ac_root;
b151ef55
 	    } else {
 		if(node->fail)
 		    child->fail = (node->fail)->trans[i];
 		else
ff8cb48b
 		    child->fail = ac_root;
b151ef55
 
e8217f5a
 		if((ret = cli_enqueue(&bfs, child)) != 0) {
 		    return ret;
 		}
b151ef55
 	    }
 	}
     }
e8217f5a
     return 0;
b151ef55
 }
 
e8217f5a
 int cl_buildtrie(struct cl_node *root)
b151ef55
 {
2fe19b26
 	int ret;
 
     if(!root)
 	return CL_EMALFDB;
 
ff8cb48b
     if(!root->ac_root) {
 	cli_dbgmsg("Pattern matcher not initialised\n");
 	return 0;
     }
 
2fe19b26
     if((ret = cli_addtypesigs(root)))
 	return ret;
 
e8217f5a
     return cli_maketrans(root);
b151ef55
 }
 
cf0744c5
 static void cli_freepatt(struct cli_patt *list)
b151ef55
 {
442d8407
 	struct cli_patt *handler, *prev;
d898865b
 	int i;
b151ef55
 
 
     handler = list;
 
     while(handler) {
 	free(handler->pattern);
 	free(handler->virname);
d898865b
 	if(handler->alt) {
 	    free(handler->altn);
 	    for(i = 0; i < handler->alt; i++)
 		free(handler->altc[i]);
 	    free(handler->altc);
 	}
b151ef55
 	prev = handler;
 	handler = handler->next;
 	free(prev);
     }
 }
 
 void cl_freetrie(struct cl_node *root)
 {
cf0744c5
 	unsigned int i;
b151ef55
 
ff8cb48b
 
b151ef55
     for(i = 0; i < root->nodes; i++) {
 	cli_freepatt(root->nodetable[i]->list);
 	free(root->nodetable[i]);
     }
 
     free(root->nodetable);
ff8cb48b
     free(root->ac_root);
b151ef55
     free(root);
 }
 
1f73f3ff
 int inline cli_findpos(const char *buffer, int offset, int length, const struct cli_patt *pattern)
 {
 	int bufferpos = offset + CL_MIN_LENGTH;
 	int postfixend = offset + length;
d898865b
 	unsigned int i, j, alt = 0, found = 0;
1f73f3ff
 
d898865b
 
     if(bufferpos >= length)
1f73f3ff
 	bufferpos %= length;
 
     for(i = CL_MIN_LENGTH; i < pattern->length; i++) {
 
 	if(bufferpos == postfixend)
 	    return 0;
 
d898865b
 	if(pattern->pattern[i] == CLI_ALT) {
 	    for(j = 0; j < pattern->altn[alt]; j++) {
 		if(pattern->altc[alt][j] == buffer[bufferpos])
 		    found = 1;
 	    }
 
 	    if(!found)
 		return 0;
 	    alt++;
 
 	} else if(pattern->pattern[i] != CLI_IGN && (char) pattern->pattern[i] != buffer[bufferpos])
1f73f3ff
 	    return 0;
 
 	bufferpos++;
 
d898865b
 	if(bufferpos == length)
1f73f3ff
 	    bufferpos = 0;
     }
 
     return 1;
 }
 
d898865b
 int cli_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_node *root, int *partcnt, int typerec, unsigned long int offset, unsigned long int *partoff)
b151ef55
 {
ff8cb48b
 	struct cli_ac_node *current;
442d8407
 	struct cli_patt *pt;
d898865b
 	int position, type = CL_CLEAN, dist;
cf0744c5
         unsigned int i;
b151ef55
 
cc938d61
 
ff8cb48b
     if(!root->ac_root) {
 	cli_dbgmsg("cli_scanbuff: Pattern matcher not initialised\n");
 	return CL_CLEAN;
     }
b151ef55
 
d898865b
     if(!partcnt || !partoff) {
 	cli_dbgmsg("cli_scanbuff(): partcnt == NULL || partoff == NULL\n");
cc938d61
 	return CL_EMEM;
9c1c9007
     }
 
ff8cb48b
     current = root->ac_root;
 
b151ef55
     for(i = 0; i < length; i++)  {
 	current = current->trans[(unsigned char) buffer[i] & 0xff];
 
 	if(current->islast) {
 	    position = i - CL_MIN_LENGTH + 1;
 
 	    pt = current->list;
 	    while(pt) {
 		if(cli_findpos(buffer, position, length, pt)) {
 		    if(pt->sigid) { /* it's a partial signature */
 			if(partcnt[pt->sigid] + 1 == pt->partno) {
2fe19b26
 
d898865b
 			    dist = 1;
 			    if(pt->maxdist)
 				if(offset + i - partoff[pt->sigid] > pt->maxdist)
 				    dist = 0;
 
 			    if(dist && pt->mindist)
 				if(offset + i - partoff[pt->sigid] < pt->mindist)
 				    dist = 0;
 
 			    if(dist) {
 				partoff[pt->sigid] = offset + i + pt->length;
 
 				if(++partcnt[pt->sigid] == pt->parts) { /* the last one */
 				    if(pt->type) {
 					if(typerec) {
c00d0af2
 					    if(pt->type > type) {
 						cli_dbgmsg("Matched signature for file type: %s\n", pt->virname);
d898865b
 						type = pt->type;
c00d0af2
 					    }
d898865b
 					}
 				    } else {
 					if(virname)
 					    *virname = pt->virname;
 
 					return CL_VIRUS;
 				    }
2fe19b26
 				}
b151ef55
 			    }
 			}
d898865b
 
b151ef55
 		    } else { /* old type signature */
2fe19b26
 			if(pt->type) {
 			    if(typerec) {
c00d0af2
 				if(pt->type > type) {
 				    cli_dbgmsg("Matched signature for file type: %s\n", pt->virname);
 
 				    type = pt->type;
 				}
2fe19b26
 			    }
 			} else {
 			    if(virname)
 				*virname = pt->virname;
 
 			    return CL_VIRUS;
 			}
b151ef55
 		    }
 		}
 
 		pt = pt->next;
 	    }
 
 	    current = current->fail;
 	}
     }
 
2fe19b26
     return typerec ? type : CL_CLEAN;
b151ef55
 }
 
fdfb0dd7
 int cl_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_node *root)
 
 {
cc938d61
 	int ret, *partcnt;
d898865b
 	unsigned long int *partoff;
cc938d61
 
2fe19b26
 
cc938d61
     if((partcnt = (int *) cli_calloc(root->partsigs + 1, sizeof(int))) == NULL) {
 	cli_dbgmsg("cli_scanbuff(): unable to cli_calloc(%d, %d)\n", root->partsigs + 1, sizeof(int));
 	return CL_EMEM;
     }
 
d898865b
     if((partoff = (unsigned long int *) cli_calloc(root->partsigs + 1, sizeof(unsigned long int))) == NULL) {
 	cli_dbgmsg("cli_scanbuff(): unable to cli_calloc(%d, %d)\n", root->partsigs + 1, sizeof(unsigned long int));
 	return CL_EMEM;
     }
 
     ret = cli_scanbuff(buffer, length, virname, root, partcnt, 0, 0, partoff);
cc938d61
 
     free(partcnt);
     return ret;
fdfb0dd7
 }