libclamav/matcher-ac.c
f91f55e0
 /*
  *  C implementation of the Aho-Corasick pattern matching algorithm. It's based
05c85779
  *  on the ScannerDaemon's version (coded in Java) by Kurt Huwig and
f91f55e0
  *  http://www-sr.informatik.uni-tuebingen.de/~buehler/AC/AC.html
  *  Thanks to Kurt Huwig for pointing me to this page.
  *
7f7736bb
  *  Copyright (C) 2002 - 2006 Tomasz Kojm <tkojm@clamav.net>
f91f55e0
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 2 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
30738099
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
f91f55e0
  */
 
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
 
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
59afa53d
 #ifdef	HAVE_UNISTD_H
f91f55e0
 #include <unistd.h>
59afa53d
 #endif
f91f55e0
 
 #include "clamav.h"
 #include "others.h"
df757556
 #include "matcher.h"
f91f55e0
 #include "matcher-ac.h"
 #include "defaults.h"
 #include "filetypes.h"
ee246f49
 #include "cltypes.h"
f91f55e0
 
 struct nodelist {
     struct cli_ac_node *node;
     struct nodelist *next;
 };
 
9f986368
 unsigned short ac_depth = AC_DEFAULT_DEPTH;
 
f477691c
 int cli_ac_addpatt(struct cli_matcher *root, struct cli_ac_patt *pattern)
f91f55e0
 {
 	struct cli_ac_node *pos, *next;
 	int i;
 
9f986368
     if(pattern->length < ac_depth)
f91f55e0
 	return CL_EPATSHORT;
 
     pos = root->ac_root;
 
9f986368
     for(i = 0; i < ac_depth; i++) {
f91f55e0
 	next = pos->trans[((unsigned char) pattern->pattern[i]) & 0xff]; 
 
 	if(!next) {
 	    next = (struct cli_ac_node *) cli_calloc(1, sizeof(struct cli_ac_node));
 	    if(!next) {
d4405156
 		cli_errmsg("cli_ac_addpatt(): Unable to allocate AC node (%u bytes)\n", sizeof(struct cli_ac_node));
f91f55e0
 		return CL_EMEM;
 	    }
 
 	    root->ac_nodes++;
 	    root->ac_nodetable = (struct cli_ac_node **) cli_realloc(root->ac_nodetable, (root->ac_nodes) * sizeof(struct cli_ac_node *));
 	    if(root->ac_nodetable == NULL) {
d4405156
 		cli_errmsg("cli_ac_addpatt(): Unable to realloc nodetable (%u bytes)\n", (root->ac_nodes) * sizeof(struct cli_matcher *));
f91f55e0
 		return CL_EMEM;
 	    }
 	    root->ac_nodetable[root->ac_nodes - 1] = next;
 
 	    pos->trans[((unsigned char) pattern->pattern[i]) & 0xff] = next;
 	}
 
 	pos = next;
     }
 
     pos->islast = 1;
 
     pattern->next = pos->list;
     pos->list = pattern;
 
7f7736bb
     return CL_SUCCESS;
f91f55e0
 }
 
 static int cli_enqueue(struct nodelist **bfs, struct cli_ac_node *n)
 {
 	struct nodelist *new;
 
     new = (struct nodelist *) cli_calloc(1, sizeof(struct nodelist));
     if (new == NULL) {
d4405156
 	cli_errmsg("cli_enqueue(): Unable to allocate node list (%u bytes)\n", sizeof(struct nodelist));
f91f55e0
 	return CL_EMEM;
     }
 
     new->next = *bfs;
     new->node = n;
     *bfs = new;
7f7736bb
     return CL_SUCCESS;
f91f55e0
 }
 
 static struct cli_ac_node *cli_dequeue(struct nodelist **bfs)
 {
 	struct nodelist *handler, *prev = NULL;
 	struct cli_ac_node *pt;
 
     handler = *bfs;
 
     while(handler && handler->next) {
 	prev = handler;
 	handler = handler->next;
     }
 
     if(!handler) {
 	return NULL;
     } else {
 	pt = handler->node;
 	free(handler);
 	if(prev)
 	    prev->next = NULL;
 	else
 	    *bfs = NULL;
 
 	return pt;
     }
 }
 
f477691c
 static int cli_maketrans(struct cli_matcher *root)
f91f55e0
 {
 	struct nodelist *bfs = NULL;
 	struct cli_ac_node *ac_root = root->ac_root, *child, *node;
 	int i, ret;
 
 
     ac_root->fail = NULL;
     if((ret = cli_enqueue(&bfs, ac_root)) != 0) {
 	return ret;
     }
 
     while((node = cli_dequeue(&bfs))) {
 	if(node->islast)
 	    continue;
 
 	for(i = 0; i < 256; i++) {
 	    child = node->trans[i];
 	    if(!child) {
 		if(node->fail)
 		    node->trans[i] = (node->fail)->trans[i];
 		else
 		    node->trans[i] = ac_root;
 	    } else {
 		if(node->fail)
 		    child->fail = (node->fail)->trans[i];
 		else
 		    child->fail = ac_root;
 
 		if((ret = cli_enqueue(&bfs, child)) != 0) {
 		    return ret;
 		}
 	    }
 	}
     }
7f7736bb
     return CL_SUCCESS;
f91f55e0
 }
 
f477691c
 int cli_ac_buildtrie(struct cli_matcher *root)
f91f55e0
 {
 
     if(!root)
 	return CL_EMALFDB;
 
     if(!root->ac_root) {
d4405156
 	cli_dbgmsg("cli_ac_buildtrie(): AC pattern matcher is not initialised\n");
7f7736bb
 	return CL_SUCCESS;
f91f55e0
     }
 
     return cli_maketrans(root);
 }
 
 static void cli_freepatt(struct cli_ac_patt *list)
 {
 	struct cli_ac_patt *handler, *prev;
 	int i;
 
 
     handler = list;
 
     while(handler) {
ff5d8419
 	if(handler->prefix)
 	    free(handler->prefix);
 	else
 	    free(handler->pattern);
f91f55e0
 	free(handler->virname);
856d9c84
 	if(handler->offset && (!handler->sigid || handler->partno == 1))
df757556
 	    free(handler->offset);
f91f55e0
 	if(handler->alt) {
 	    free(handler->altn);
 	    for(i = 0; i < handler->alt; i++)
 		free(handler->altc[i]);
 	    free(handler->altc);
 	}
 	prev = handler;
 	handler = handler->next;
 	free(prev);
     }
 }
 
f477691c
 void cli_ac_free(struct cli_matcher *root)
f91f55e0
 {
 	unsigned int i;
 
 
     for(i = 0; i < root->ac_nodes; i++) {
 	cli_freepatt(root->ac_nodetable[i]->list);
 	free(root->ac_nodetable[i]);
     }
 
     if(root->ac_nodetable)
 	free(root->ac_nodetable);
 
     if(root->ac_root)
 	free(root->ac_root);
 }
 
d4405156
 inline static int cli_findpos(const unsigned char *buffer, unsigned int depth, unsigned int offset, unsigned int length, const struct cli_ac_patt *pattern)
f91f55e0
 {
2989d403
 	unsigned int bufferpos = offset + depth;
 	unsigned int postfixend = offset + length;
ff5d8419
 	unsigned int i, j, alt = pattern->alt_pattern, found;
 
f91f55e0
 
ff5d8419
     if(pattern->prefix)
 	if(pattern->prefix_length > offset)
 	    return 0;
f91f55e0
 
     if(bufferpos >= length)
 	bufferpos %= length;
 
2989d403
     for(i = depth; i < pattern->length; i++) {
f91f55e0
 
 	if(bufferpos == postfixend)
 	    return 0;
 
 	if(pattern->pattern[i] == CLI_ALT) {
4e8fccfd
 	    found = 0;
f91f55e0
 	    for(j = 0; j < pattern->altn[alt]; j++) {
677e8430
 		if(pattern->altc[alt][j] == buffer[bufferpos]) {
f91f55e0
 		    found = 1;
677e8430
 		    break;
 		}
f91f55e0
 	    }
 
 	    if(!found)
 		return 0;
 	    alt++;
 
d4405156
 	} else if(pattern->pattern[i] != CLI_IGN && (unsigned char) pattern->pattern[i] != buffer[bufferpos])
f91f55e0
 	    return 0;
 
 	bufferpos++;
 
 	if(bufferpos == length)
 	    bufferpos = 0;
     }
 
ff5d8419
     if(pattern->prefix) {
 	alt = 0;
 	bufferpos = offset - pattern->prefix_length;
 
 	for(i = 0; i < pattern->prefix_length; i++) {
 
 	    if(pattern->prefix[i] == CLI_ALT) {
 		found = 0;
 		for(j = 0; j < pattern->altn[alt]; j++) {
677e8430
 		    if(pattern->altc[alt][j] == buffer[bufferpos]) {
ff5d8419
 			found = 1;
677e8430
 			break;
 		    }
ff5d8419
 		}
 
 		if(!found)
 		    return 0;
 		alt++;
 
d4405156
 	    } else if(pattern->prefix[i] != CLI_IGN && (unsigned char) pattern->prefix[i] != buffer[bufferpos])
ff5d8419
 		return 0;
 
 	    bufferpos++;
 	}
     }
 
f91f55e0
     return 1;
 }
 
d4405156
 int cli_ac_initdata(struct cli_ac_data *data, unsigned int partsigs, unsigned int tracklen)
 {
 	unsigned int i, j;
 
 
     if(!data) {
 	cli_errmsg("cli_ac_init(): data == NULL\n");
 	return CL_ENULLARG;
     }
 
     data->partsigs = partsigs;
 
     if(!partsigs)
 	return CL_SUCCESS;
 
     data->partcnt = (unsigned int *) cli_calloc(partsigs, sizeof(unsigned int));
 
     if(!data->partcnt) {
 	cli_errmsg("cli_ac_init(): unable to cli_calloc(%u, %u)\n", partsigs, sizeof(unsigned int));
 	return CL_EMEM;
     }
 
ee246f49
     data->offcnt = (uint8_t *) cli_calloc(partsigs, sizeof(uint8_t));
d4405156
 
     if(!data->offcnt) {
ee246f49
 	cli_errmsg("cli_ac_init(): unable to cli_calloc(%u, %u)\n", partsigs, sizeof(uint8_t));
d4405156
 	free(data->partcnt);
 	return CL_EMEM;
     }
 
ee246f49
     data->offidx = (uint8_t *) cli_calloc(partsigs, sizeof(uint8_t));
 
     if(!data->offidx) {
 	cli_errmsg("cli_ac_init(): unable to cli_calloc(%u, %u)\n", partsigs, sizeof(uint8_t));
 	free(data->partcnt);
 	free(data->offcnt);
 	return CL_EMEM;
     }
 
d4405156
     data->maxshift = (int *) cli_malloc(partsigs * sizeof(int));
 
     if(!data->maxshift) {
 	cli_errmsg("cli_ac_init(): unable to cli_malloc(%u)\n", partsigs * sizeof(int));
 	free(data->partcnt);
 	free(data->offcnt);
ee246f49
 	free(data->offidx);
d4405156
 	return CL_EMEM;
     }
 
     memset(data->maxshift, -1, partsigs * sizeof(int));
 
     data->partoff = (unsigned int **) cli_calloc(partsigs, sizeof(unsigned int *));
 
     if(!data->partoff) {
 	cli_errmsg("cli_ac_init(): unable to cli_calloc(%u, %u)\n", partsigs, sizeof(unsigned int));
 	free(data->partcnt);
 	free(data->offcnt);
ee246f49
 	free(data->offidx);
d4405156
 	free(data->maxshift);
 	return CL_EMEM;
     }
 
     /* The number of multipart signatures is rather small so we already
      * allocate the memory for all parts here instead of using a runtime
      * allocation in cli_ac_scanbuff()
      */
 
     for(i = 0; i < partsigs; i++) {
 	data->partoff[i] = (unsigned int *) cli_calloc(tracklen, sizeof(unsigned int));
 
 	if(!data->partoff[i]) {
 	    for(j = 0; j < i; j++)
 		free(data->partoff[j]);
 
 	    free(data->partoff);
 	    free(data->partcnt);
 	    free(data->offcnt);
ee246f49
 	    free(data->offidx);
d4405156
 	    free(data->maxshift);
 	    cli_errmsg("cli_ac_init(): unable to cli_calloc(%u, %u)\n", tracklen, sizeof(unsigned int));
 	    return CL_EMEM;
 	}
     }
 
     return CL_SUCCESS;
 }
 
 void cli_ac_freedata(struct cli_ac_data *data)
 {
 	unsigned int i;
 
 
     if(data && data->partsigs) {
 	free(data->partcnt);
 	free(data->offcnt);
ee246f49
 	free(data->offidx);
d4405156
 	free(data->maxshift);
 
 	for(i = 0; i < data->partsigs; i++)
 	    free(data->partoff[i]);
 
 	free(data->partoff);
     }
 }
 
674f4b3c
 int cli_ac_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, struct cli_ac_data *mdata, unsigned short otfrec, unsigned long int offset, cli_file_t ftype, int fd, struct cli_matched_type **ftoffset)
f91f55e0
 {
 	struct cli_ac_node *current;
 	struct cli_ac_patt *pt;
674f4b3c
 	int type = CL_CLEAN, j;
ee246f49
         unsigned int i, position, curroff;
 	uint8_t offnum, found;
ed2cd7c1
 	struct cli_matched_type *tnode;
54c0fba0
 	struct cli_target_info info;
f91f55e0
 
 
e263999b
     if(!root->ac_root)
f91f55e0
 	return CL_CLEAN;
 
d4405156
     if(!mdata) {
 	cli_errmsg("cli_ac_scanbuff(): mdata == NULL\n");
f91f55e0
 	return CL_ENULLARG;
     }
 
54c0fba0
     memset(&info, 0, sizeof(info));
f91f55e0
     current = root->ac_root;
 
     for(i = 0; i < length; i++)  {
d4405156
 	current = current->trans[buffer[i] & 0xff];
f91f55e0
 
 	if(current->islast) {
9f986368
 	    position = i - ac_depth + 1;
f91f55e0
 
 	    pt = current->list;
 	    while(pt) {
9f986368
 		if(cli_findpos(buffer, ac_depth, position, length, pt)) {
d4405156
 		    curroff = offset + position - pt->prefix_length;
 
856d9c84
 		    if((pt->offset || pt->target) && (!pt->sigid || pt->partno == 1)) {
674f4b3c
 			if((fd == -1 && !ftype) || !cli_validatesig(ftype, pt->offset, curroff, &info, fd, pt->virname)) {
856d9c84
 			    pt = pt->next;
 			    continue;
 			}
 		    }
 
f91f55e0
 		    if(pt->sigid) { /* it's a partial signature */
 
d4405156
 			if(mdata->partcnt[pt->sigid - 1] + 1 == pt->partno) {
ee246f49
 			    offnum = mdata->offcnt[pt->sigid - 1];
 			    if(offnum < AC_DEFAULT_TRACKLEN) {
 				mdata->partoff[pt->sigid - 1][offnum] = curroff + pt->length;
d4405156
 
ee246f49
 				if(mdata->maxshift[pt->sigid - 1] == -1 || ((int) (mdata->partoff[pt->sigid - 1][offnum] - mdata->partoff[pt->sigid - 1][0]) <= mdata->maxshift[pt->sigid - 1]))
d4405156
 				    mdata->offcnt[pt->sigid - 1]++;
ee246f49
 			    } else {
 				if(mdata->maxshift[pt->sigid - 1] == -1 || ((int) (curroff + pt->length - mdata->partoff[pt->sigid - 1][0]) <= mdata->maxshift[pt->sigid - 1])) {
 				    if(!(mdata->offidx[pt->sigid - 1] %= AC_DEFAULT_TRACKLEN))
 					mdata->offidx[pt->sigid - 1]++;
 
 				    mdata->partoff[pt->sigid - 1][mdata->offidx[pt->sigid - 1]] = curroff + pt->length;
 				    mdata->offidx[pt->sigid - 1]++;
 				}
d4405156
 			    }
f91f55e0
 
d4405156
 			} else if(mdata->partcnt[pt->sigid - 1] + 2 == pt->partno) {
 			    found = 0;
 			    for(j = mdata->offcnt[pt->sigid - 1] - 1; j >= 0; j--) {
 				found = 1;
 				if(pt->maxdist)
 				    if(curroff - mdata->partoff[pt->sigid - 1][j] > pt->maxdist)
 					found = 0;
f91f55e0
 
d4405156
 				if(found && pt->mindist)
 				    if(curroff - mdata->partoff[pt->sigid - 1][j] < pt->mindist)
 					found = 0;
 
 				if(found)
 				    break;
 			    }
 
 			    if(found) {
 				mdata->maxshift[pt->sigid - 1] = mdata->partoff[pt->sigid - 1][j] + pt->maxdist - curroff;
 
 				mdata->partoff[pt->sigid - 1][0] = curroff + pt->length;
 				mdata->offcnt[pt->sigid - 1] = 1;
 
 				if(++mdata->partcnt[pt->sigid - 1] + 1 == pt->parts) {
f91f55e0
 				    if(pt->type) {
df757556
 					if(otfrec) {
ed2cd7c1
 					    if(pt->type > type || pt->type >= CL_TYPE_SFX) {
d4405156
 						cli_dbgmsg("Matched signature for file type %s\n", pt->virname);
f91f55e0
 						type = pt->type;
d53647d5
 						if(ftoffset && (!*ftoffset || (*ftoffset)->cnt < SFX_MAX_TESTS) && ftype == CL_TYPE_MSEXE && type >= CL_TYPE_SFX) {
ed2cd7c1
 						    if(!(tnode = cli_calloc(1, sizeof(struct cli_matched_type)))) {
d4405156
 							cli_errmsg("cli_ac_scanbuff(): Can't allocate memory for new type node\n");
54c0fba0
 							if(info.exeinfo.section)
 							    free(info.exeinfo.section);
ed2cd7c1
 							return CL_EMEM;
 						    }
d53647d5
 
ed2cd7c1
 						    tnode->type = type;
d4405156
 						    tnode->offset = -1; /* we don't remember the offset of the first part */
d53647d5
 
 						    if(*ftoffset)
 							tnode->cnt = (*ftoffset)->cnt + 1;
 						    else
 							tnode->cnt = 1;
 
ed2cd7c1
 						    tnode->next = *ftoffset;
 						    *ftoffset = tnode;
 						}
f91f55e0
 					    }
 					}
 				    } else {
 					if(virname)
 					    *virname = pt->virname;
 
54c0fba0
 					if(info.exeinfo.section)
 					    free(info.exeinfo.section);
f91f55e0
 					return CL_VIRUS;
 				    }
 				}
 			    }
 			}
 
 		    } else { /* old type signature */
 			if(pt->type) {
df757556
 			    if(otfrec) {
ed2cd7c1
 				if(pt->type > type || pt->type >= CL_TYPE_SFX) {
d4405156
 				    cli_dbgmsg("Matched signature for file type %s at %u\n", pt->virname, curroff);
f91f55e0
 				    type = pt->type;
d53647d5
 				    if(ftoffset && (!*ftoffset ||(*ftoffset)->cnt < SFX_MAX_TESTS) && ftype == CL_TYPE_MSEXE && type >= CL_TYPE_SFX) {
ed2cd7c1
 					if(!(tnode = cli_calloc(1, sizeof(struct cli_matched_type)))) {
d4405156
 					    cli_errmsg("cli_ac_scanbuff(): Can't allocate memory for new type node\n");
54c0fba0
 					    if(info.exeinfo.section)
 						free(info.exeinfo.section);
ed2cd7c1
 					    return CL_EMEM;
 					}
 					tnode->type = type;
d4405156
 					tnode->offset = curroff;
d53647d5
 
 					if(*ftoffset)
 					    tnode->cnt = (*ftoffset)->cnt + 1;
 					else
 					    tnode->cnt = 1;
 
ed2cd7c1
 					tnode->next = *ftoffset;
 					*ftoffset = tnode;
 				    }
f91f55e0
 				}
 			    }
 			} else {
 			    if(virname)
 				*virname = pt->virname;
 
54c0fba0
 			    if(info.exeinfo.section)
 				free(info.exeinfo.section);
f91f55e0
 			    return CL_VIRUS;
 			}
 		    }
 		}
 
 		pt = pt->next;
 	    }
 
 	    current = current->fail;
 	}
     }
 
54c0fba0
     if(info.exeinfo.section)
 	free(info.exeinfo.section);
 
df757556
     return otfrec ? type : CL_CLEAN;
f91f55e0
 }
9f986368
 
 void cli_ac_setdepth(unsigned int depth)
 {
     ac_depth = depth;
 }