b151ef55 |
/*
* C implementation of the Aho-Corasick pattern matching algorithm. It's based
* on ScannerDaemon's Java version by Kurt Huwig and
* http://www-sr.informatik.uni-tuebingen.de/~buehler/AC/AC.html
* Thanks to Kurt Huwig for pointing me to this page.
* |
cc938d61 |
* Copyright (C) 2002 - 2004 Tomasz Kojm <tkojm@clamav.net> |
b151ef55 |
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
|
8b242bb9 |
#if HAVE_CONFIG_H
#include "clamav-config.h"
#endif
|
b151ef55 |
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include "clamav.h"
#include "others.h"
#include "matcher.h"
#include "unrarlib.h"
#include "defaults.h" |
2fe19b26 |
#include "filetypes.h" |
b151ef55 |
|
442d8407 |
int cli_addpatt(struct cl_node *root, struct cli_patt *pattern) |
b151ef55 |
{ |
ff8cb48b |
struct cli_ac_node *pos, *next; |
b151ef55 |
int i;
if(pattern->length < CL_MIN_LENGTH) {
return CL_EPATSHORT;
}
|
ff8cb48b |
pos = root->ac_root; |
b151ef55 |
for(i = 0; i < CL_MIN_LENGTH; i++) {
next = pos->trans[((unsigned char) pattern->pattern[i]) & 0xff];
if(!next) { |
ff8cb48b |
next = (struct cli_ac_node *) cli_calloc(1, sizeof(struct cli_ac_node)); |
9c1c9007 |
if(!next) {
cli_dbgmsg("Unable to allocate pattern node (%d)\n", sizeof(struct cl_node)); |
b151ef55 |
return CL_EMEM; |
9c1c9007 |
} |
b151ef55 |
root->nodes++; |
ff8cb48b |
root->nodetable = (struct cli_ac_node **) realloc(root->nodetable, (root->nodes) * sizeof(struct cli_ac_node *)); |
9c1c9007 |
if (root->nodetable == NULL) {
cli_dbgmsg("Unable to realloc nodetable (%d)\n", (root->nodes) * sizeof(struct cl_node *));
return CL_EMEM;
} |
b151ef55 |
root->nodetable[root->nodes - 1] = next;
pos->trans[((unsigned char) pattern->pattern[i]) & 0xff] = next;
}
pos = next;
}
pos->islast = 1;
pattern->next = pos->list;
pos->list = pattern;
return 0;
}
|
ff8cb48b |
static int cli_enqueue(struct nodelist **bfs, struct cli_ac_node *n) |
b151ef55 |
{
struct nodelist *new;
new = (struct nodelist *) cli_calloc(1, sizeof(struct nodelist)); |
9c1c9007 |
if (new == NULL) {
cli_dbgmsg("Unable to allocate node list (%d)\n", sizeof(struct nodelist)); |
e8217f5a |
return CL_EMEM; |
9c1c9007 |
} |
b151ef55 |
new->next = *bfs;
new->node = n;
*bfs = new; |
e8217f5a |
return 0; |
b151ef55 |
}
|
ff8cb48b |
static struct cli_ac_node *cli_dequeue(struct nodelist **bfs) |
b151ef55 |
{
struct nodelist *handler, *prev = NULL; |
ff8cb48b |
struct cli_ac_node *pt; |
b151ef55 |
handler = *bfs;
while(handler && handler->next) {
prev = handler;
handler = handler->next;
}
if(!handler) {
return NULL;
} else {
pt = handler->node;
free(handler);
if(prev)
prev->next = NULL;
else
*bfs = NULL;
return pt;
}
}
|
cf0744c5 |
static int cli_maketrans(struct cl_node *root) |
b151ef55 |
{
struct nodelist *bfs = NULL; |
ff8cb48b |
struct cli_ac_node *ac_root = root->ac_root, *child, *node; |
e8217f5a |
int i, ret; |
b151ef55 |
|
ff8cb48b |
ac_root->fail = NULL;
if((ret = cli_enqueue(&bfs, ac_root)) != 0) { |
e8217f5a |
return ret;
} |
b151ef55 |
while((node = cli_dequeue(&bfs))) {
if(node->islast)
continue;
for(i = 0; i < CL_NUM_CHILDS; i++) {
child = node->trans[i];
if(!child) {
if(node->fail)
node->trans[i] = (node->fail)->trans[i];
else |
ff8cb48b |
node->trans[i] = ac_root; |
b151ef55 |
} else {
if(node->fail)
child->fail = (node->fail)->trans[i];
else |
ff8cb48b |
child->fail = ac_root; |
b151ef55 |
|
e8217f5a |
if((ret = cli_enqueue(&bfs, child)) != 0) {
return ret;
} |
b151ef55 |
}
}
} |
e8217f5a |
return 0; |
b151ef55 |
}
|
e8217f5a |
int cl_buildtrie(struct cl_node *root) |
b151ef55 |
{ |
2fe19b26 |
int ret;
if(!root)
return CL_EMALFDB;
|
ff8cb48b |
if(!root->ac_root) {
cli_dbgmsg("Pattern matcher not initialised\n");
return 0;
}
|
2fe19b26 |
if((ret = cli_addtypesigs(root)))
return ret;
|
e8217f5a |
return cli_maketrans(root); |
b151ef55 |
}
|
cf0744c5 |
static void cli_freepatt(struct cli_patt *list) |
b151ef55 |
{ |
442d8407 |
struct cli_patt *handler, *prev; |
d898865b |
int i; |
b151ef55 |
handler = list;
while(handler) {
free(handler->pattern);
free(handler->virname); |
d898865b |
if(handler->alt) {
free(handler->altn);
for(i = 0; i < handler->alt; i++)
free(handler->altc[i]);
free(handler->altc);
} |
b151ef55 |
prev = handler;
handler = handler->next;
free(prev);
}
}
void cl_freetrie(struct cl_node *root)
{ |
cf0744c5 |
unsigned int i; |
b151ef55 |
|
ff8cb48b |
|
b151ef55 |
for(i = 0; i < root->nodes; i++) {
cli_freepatt(root->nodetable[i]->list);
free(root->nodetable[i]);
}
free(root->nodetable); |
ff8cb48b |
free(root->ac_root); |
b151ef55 |
free(root);
}
|
1f73f3ff |
int inline cli_findpos(const char *buffer, int offset, int length, const struct cli_patt *pattern)
{
int bufferpos = offset + CL_MIN_LENGTH;
int postfixend = offset + length; |
d898865b |
unsigned int i, j, alt = 0, found = 0; |
1f73f3ff |
|
d898865b |
if(bufferpos >= length) |
1f73f3ff |
bufferpos %= length;
for(i = CL_MIN_LENGTH; i < pattern->length; i++) {
if(bufferpos == postfixend)
return 0;
|
d898865b |
if(pattern->pattern[i] == CLI_ALT) {
for(j = 0; j < pattern->altn[alt]; j++) {
if(pattern->altc[alt][j] == buffer[bufferpos])
found = 1;
}
if(!found)
return 0;
alt++;
} else if(pattern->pattern[i] != CLI_IGN && (char) pattern->pattern[i] != buffer[bufferpos]) |
1f73f3ff |
return 0;
bufferpos++;
|
d898865b |
if(bufferpos == length) |
1f73f3ff |
bufferpos = 0;
}
return 1;
}
|
d898865b |
int cli_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_node *root, int *partcnt, int typerec, unsigned long int offset, unsigned long int *partoff) |
b151ef55 |
{ |
ff8cb48b |
struct cli_ac_node *current; |
442d8407 |
struct cli_patt *pt; |
d898865b |
int position, type = CL_CLEAN, dist; |
cf0744c5 |
unsigned int i; |
b151ef55 |
|
cc938d61 |
|
ff8cb48b |
if(!root->ac_root) {
cli_dbgmsg("cli_scanbuff: Pattern matcher not initialised\n");
return CL_CLEAN;
} |
b151ef55 |
|
d898865b |
if(!partcnt || !partoff) {
cli_dbgmsg("cli_scanbuff(): partcnt == NULL || partoff == NULL\n"); |
cc938d61 |
return CL_EMEM; |
9c1c9007 |
}
|
ff8cb48b |
current = root->ac_root;
|
b151ef55 |
for(i = 0; i < length; i++) {
current = current->trans[(unsigned char) buffer[i] & 0xff];
if(current->islast) {
position = i - CL_MIN_LENGTH + 1;
pt = current->list;
while(pt) {
if(cli_findpos(buffer, position, length, pt)) {
if(pt->sigid) { /* it's a partial signature */
if(partcnt[pt->sigid] + 1 == pt->partno) { |
2fe19b26 |
|
d898865b |
dist = 1;
if(pt->maxdist)
if(offset + i - partoff[pt->sigid] > pt->maxdist)
dist = 0;
if(dist && pt->mindist)
if(offset + i - partoff[pt->sigid] < pt->mindist)
dist = 0;
if(dist) {
partoff[pt->sigid] = offset + i + pt->length;
if(++partcnt[pt->sigid] == pt->parts) { /* the last one */
if(pt->type) {
if(typerec) { |
c00d0af2 |
if(pt->type > type) {
cli_dbgmsg("Matched signature for file type: %s\n", pt->virname); |
d898865b |
type = pt->type; |
c00d0af2 |
} |
d898865b |
}
} else {
if(virname)
*virname = pt->virname;
return CL_VIRUS;
} |
2fe19b26 |
} |
b151ef55 |
}
} |
d898865b |
|
b151ef55 |
} else { /* old type signature */ |
2fe19b26 |
if(pt->type) {
if(typerec) { |
c00d0af2 |
if(pt->type > type) {
cli_dbgmsg("Matched signature for file type: %s\n", pt->virname);
type = pt->type;
} |
2fe19b26 |
}
} else {
if(virname)
*virname = pt->virname;
return CL_VIRUS;
} |
b151ef55 |
}
}
pt = pt->next;
}
current = current->fail;
}
}
|
2fe19b26 |
return typerec ? type : CL_CLEAN; |
b151ef55 |
}
|
fdfb0dd7 |
int cl_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_node *root)
{ |
cc938d61 |
int ret, *partcnt; |
d898865b |
unsigned long int *partoff; |
cc938d61 |
|
2fe19b26 |
|
cc938d61 |
if((partcnt = (int *) cli_calloc(root->partsigs + 1, sizeof(int))) == NULL) {
cli_dbgmsg("cli_scanbuff(): unable to cli_calloc(%d, %d)\n", root->partsigs + 1, sizeof(int));
return CL_EMEM;
}
|
d898865b |
if((partoff = (unsigned long int *) cli_calloc(root->partsigs + 1, sizeof(unsigned long int))) == NULL) {
cli_dbgmsg("cli_scanbuff(): unable to cli_calloc(%d, %d)\n", root->partsigs + 1, sizeof(unsigned long int));
return CL_EMEM;
}
ret = cli_scanbuff(buffer, length, virname, root, partcnt, 0, 0, partoff); |
cc938d61 |
free(partcnt);
return ret; |
fdfb0dd7 |
} |