git-svn: trunk@3218
Tomasz Kojm authored on 2007/09/14 03:14:20... | ... |
@@ -1,3 +1,10 @@ |
1 |
+Thu Sep 13 19:23:31 CEST 2007 (tk) |
|
2 |
+---------------------------------- |
|
3 |
+ * libclamav/matcher-ac.[ch]: various speed optimisations: |
|
4 |
+ - optimise node usage |
|
5 |
+ - try hard to not overload node 0x00.0x00[0x00] |
|
6 |
+ - optimise memory usage |
|
7 |
+ |
|
1 | 8 |
Thu Sep 13 17:37:31 BST 2007 (njh) |
2 | 9 |
---------------------------------- |
3 | 10 |
* libclamav: More optimisations |
... | ... |
@@ -39,6 +39,7 @@ |
39 | 39 |
int cli_ac_addpatt(struct cli_matcher *root, struct cli_ac_patt *pattern) |
40 | 40 |
{ |
41 | 41 |
struct cli_ac_node *pt, *next, **newtable; |
42 |
+ struct cli_ac_patt *ph; |
|
42 | 43 |
uint8_t i; |
43 | 44 |
uint16_t len = MIN(root->ac_maxdepth, pattern->length); |
44 | 45 |
|
... | ... |
@@ -114,6 +115,19 @@ int cli_ac_addpatt(struct cli_matcher *root, struct cli_ac_patt *pattern) |
114 | 114 |
|
115 | 115 |
pt->final = 1; |
116 | 116 |
pattern->depth = i; |
117 |
+ |
|
118 |
+ ph = pt->list; |
|
119 |
+ while(ph) { |
|
120 |
+ if((ph->length == pattern->length) && (ph->prefix_length == pattern->prefix_length)) { |
|
121 |
+ if(!memcmp(ph->pattern, pattern->pattern, ph->length * sizeof(uint16_t)) && !memcmp(ph->prefix, pattern->prefix, ph->prefix_length * sizeof(uint16_t))) { |
|
122 |
+ pattern->next_same = ph->next_same; |
|
123 |
+ ph->next_same = pattern; |
|
124 |
+ return CL_SUCCESS; |
|
125 |
+ } |
|
126 |
+ } |
|
127 |
+ ph = ph->next; |
|
128 |
+ } |
|
129 |
+ |
|
117 | 130 |
pattern->next = pt->list; |
118 | 131 |
pt->list = pattern; |
119 | 132 |
|
... | ... |
@@ -336,13 +350,9 @@ inline static int ac_findmatch(const unsigned char *buffer, uint32_t offset, uin |
336 | 336 |
uint8_t found; |
337 | 337 |
|
338 | 338 |
|
339 |
- if(offset + pattern->length > length) |
|
339 |
+ if((offset + pattern->length > length) || (pattern->prefix_length > offset)) |
|
340 | 340 |
return 0; |
341 | 341 |
|
342 |
- if(pattern->prefix) |
|
343 |
- if(pattern->prefix_length > offset) |
|
344 |
- return 0; |
|
345 |
- |
|
346 | 342 |
bp = offset + pattern->depth; |
347 | 343 |
|
348 | 344 |
for(i = pattern->depth; i < pattern->length; i++) { |
... | ... |
@@ -433,7 +443,7 @@ inline static int ac_addtype(struct cli_matched_type **list, cli_file_t type, of |
433 | 433 |
int cli_ac_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, const struct cli_matcher *root, struct cli_ac_data *mdata, uint8_t otfrec, uint32_t offset, cli_file_t ftype, int fd, struct cli_matched_type **ftoffset) |
434 | 434 |
{ |
435 | 435 |
struct cli_ac_node *current; |
436 |
- struct cli_ac_patt *pt; |
|
436 |
+ struct cli_ac_patt *patt, *pt; |
|
437 | 437 |
uint32_t i, bp, realoff; |
438 | 438 |
uint16_t j; |
439 | 439 |
int32_t **offmatrix; |
... | ... |
@@ -461,130 +471,138 @@ int cli_ac_scanbuff(const unsigned char *buffer, uint32_t length, const char **v |
461 | 461 |
current = current->trans[buffer[i]]; |
462 | 462 |
|
463 | 463 |
if(current->final) { |
464 |
- pt = current->list; |
|
465 |
- while(pt) { |
|
466 |
- bp = i + 1 - pt->depth; |
|
467 |
- if(ac_findmatch(buffer, bp, length, pt)) { |
|
468 |
- realoff = offset + bp - pt->prefix_length; |
|
469 |
- |
|
470 |
- if((pt->offset || pt->target) && (!pt->sigid || pt->partno == 1)) { |
|
471 |
- if((fd == -1 && !ftype) || !cli_validatesig(ftype, pt->offset, realoff, &info, fd, pt->virname)) { |
|
472 |
- pt = pt->next; |
|
473 |
- continue; |
|
464 |
+ patt = current->list; |
|
465 |
+ while(patt) { |
|
466 |
+ bp = i + 1 - patt->depth; |
|
467 |
+ if(ac_findmatch(buffer, bp, length, patt)) { |
|
468 |
+ pt = patt; |
|
469 |
+ while(pt) { |
|
470 |
+ realoff = offset + bp - pt->prefix_length; |
|
471 |
+ |
|
472 |
+ if((pt->offset || pt->target) && (!pt->sigid || pt->partno == 1)) { |
|
473 |
+ if((fd == -1 && !ftype) || !cli_validatesig(ftype, pt->offset, realoff, &info, fd, pt->virname)) { |
|
474 |
+ pt = pt->next_same; |
|
475 |
+ continue; |
|
476 |
+ } |
|
474 | 477 |
} |
475 |
- } |
|
476 | 478 |
|
477 |
- if(pt->sigid) { /* it's a partial signature */ |
|
479 |
+ if(pt->sigid) { /* it's a partial signature */ |
|
478 | 480 |
|
479 |
- if(!mdata->offmatrix[pt->sigid - 1]) { |
|
480 |
- mdata->offmatrix[pt->sigid - 1] = cli_malloc(pt->parts * sizeof(int32_t *)); |
|
481 |
- if(!mdata->offmatrix[pt->sigid - 1]) { |
|
482 |
- cli_errmsg("cli_ac_scanbuff: Can't allocate memory for mdata->offmatrix[%u]\n", pt->sigid - 1); |
|
483 |
- return CL_EMEM; |
|
481 |
+ if(pt->partno != 1 && (!mdata->offmatrix[pt->sigid - 1] || !mdata->offmatrix[pt->sigid - 1][pt->partno - 2][0])) { |
|
482 |
+ pt = pt->next_same; |
|
483 |
+ continue; |
|
484 | 484 |
} |
485 | 485 |
|
486 |
- mdata->offmatrix[pt->sigid - 1][0] = cli_malloc(pt->parts * (AC_DEFAULT_TRACKLEN + 1) * sizeof(int32_t)); |
|
487 |
- if(!mdata->offmatrix[pt->sigid - 1][0]) { |
|
488 |
- cli_errmsg("cli_ac_scanbuff: Can't allocate memory for mdata->offmatrix[%u][0]\n", pt->sigid - 1); |
|
489 |
- free(mdata->offmatrix[pt->sigid - 1]); |
|
490 |
- mdata->offmatrix[pt->sigid - 1] = NULL; |
|
491 |
- return CL_EMEM; |
|
492 |
- } |
|
493 |
- memset(mdata->offmatrix[pt->sigid - 1][0], -1, pt->parts * (AC_DEFAULT_TRACKLEN + 1) * sizeof(int32_t)); |
|
494 |
- mdata->offmatrix[pt->sigid - 1][0][0] = 0; |
|
495 |
- for(j = 1; j < pt->parts; j++) { |
|
496 |
- mdata->offmatrix[pt->sigid - 1][j] = mdata->offmatrix[pt->sigid - 1][0] + j * (AC_DEFAULT_TRACKLEN + 1); |
|
497 |
- mdata->offmatrix[pt->sigid - 1][j][0] = 0; |
|
486 |
+ if(!mdata->offmatrix[pt->sigid - 1]) { |
|
487 |
+ mdata->offmatrix[pt->sigid - 1] = cli_malloc(pt->parts * sizeof(int32_t *)); |
|
488 |
+ if(!mdata->offmatrix[pt->sigid - 1]) { |
|
489 |
+ cli_errmsg("cli_ac_scanbuff: Can't allocate memory for mdata->offmatrix[%u]\n", pt->sigid - 1); |
|
490 |
+ return CL_EMEM; |
|
491 |
+ } |
|
492 |
+ |
|
493 |
+ mdata->offmatrix[pt->sigid - 1][0] = cli_malloc(pt->parts * (AC_DEFAULT_TRACKLEN + 1) * sizeof(int32_t)); |
|
494 |
+ if(!mdata->offmatrix[pt->sigid - 1][0]) { |
|
495 |
+ cli_errmsg("cli_ac_scanbuff: Can't allocate memory for mdata->offmatrix[%u][0]\n", pt->sigid - 1); |
|
496 |
+ free(mdata->offmatrix[pt->sigid - 1]); |
|
497 |
+ mdata->offmatrix[pt->sigid - 1] = NULL; |
|
498 |
+ return CL_EMEM; |
|
499 |
+ } |
|
500 |
+ memset(mdata->offmatrix[pt->sigid - 1][0], -1, pt->parts * (AC_DEFAULT_TRACKLEN + 1) * sizeof(int32_t)); |
|
501 |
+ mdata->offmatrix[pt->sigid - 1][0][0] = 0; |
|
502 |
+ for(j = 1; j < pt->parts; j++) { |
|
503 |
+ mdata->offmatrix[pt->sigid - 1][j] = mdata->offmatrix[pt->sigid - 1][0] + j * (AC_DEFAULT_TRACKLEN + 1); |
|
504 |
+ mdata->offmatrix[pt->sigid - 1][j][0] = 0; |
|
505 |
+ } |
|
498 | 506 |
} |
499 |
- } |
|
500 |
- offmatrix = mdata->offmatrix[pt->sigid - 1]; |
|
501 |
- |
|
502 |
- if(pt->partno != 1) { |
|
503 |
- found = 0; |
|
504 |
- for(j = 1; j <= AC_DEFAULT_TRACKLEN && offmatrix[pt->partno - 2][j] != -1; j++) { |
|
505 |
- found = 1; |
|
506 |
- if(pt->maxdist) |
|
507 |
- if(realoff - offmatrix[pt->partno - 2][j] > pt->maxdist) |
|
508 |
- found = 0; |
|
509 |
- |
|
510 |
- if(found && pt->mindist) |
|
511 |
- if(realoff - offmatrix[pt->partno - 2][j] < pt->mindist) |
|
512 |
- found = 0; |
|
513 |
- |
|
514 |
- if(found) |
|
515 |
- break; |
|
507 |
+ offmatrix = mdata->offmatrix[pt->sigid - 1]; |
|
508 |
+ |
|
509 |
+ if(pt->partno != 1) { |
|
510 |
+ found = 0; |
|
511 |
+ for(j = 1; j <= AC_DEFAULT_TRACKLEN && offmatrix[pt->partno - 2][j] != -1; j++) { |
|
512 |
+ found = 1; |
|
513 |
+ if(pt->maxdist) |
|
514 |
+ if(realoff - offmatrix[pt->partno - 2][j] > pt->maxdist) |
|
515 |
+ found = 0; |
|
516 |
+ |
|
517 |
+ if(found && pt->mindist) |
|
518 |
+ if(realoff - offmatrix[pt->partno - 2][j] < pt->mindist) |
|
519 |
+ found = 0; |
|
520 |
+ |
|
521 |
+ if(found) |
|
522 |
+ break; |
|
523 |
+ } |
|
516 | 524 |
} |
517 |
- } |
|
518 | 525 |
|
519 |
- if(pt->partno == 1 || (found && (pt->partno != pt->parts))) { |
|
520 |
- offmatrix[pt->partno - 1][0] %= AC_DEFAULT_TRACKLEN; |
|
521 |
- offmatrix[pt->partno - 1][0]++; |
|
526 |
+ if(pt->partno == 1 || (found && (pt->partno != pt->parts))) { |
|
527 |
+ offmatrix[pt->partno - 1][0] %= AC_DEFAULT_TRACKLEN; |
|
528 |
+ offmatrix[pt->partno - 1][0]++; |
|
529 |
+ |
|
530 |
+ offmatrix[pt->partno - 1][offmatrix[pt->partno - 1][0]] = realoff + pt->length + pt->prefix_length; |
|
531 |
+ if(pt->partno == 1) /* save realoff for the first part */ |
|
532 |
+ offmatrix[pt->parts - 1][offmatrix[pt->partno - 1][0]] = realoff; |
|
533 |
+ } else if(found && pt->partno == pt->parts) { |
|
534 |
+ if(pt->type) { |
|
535 |
+ if(otfrec) { |
|
536 |
+ if(pt->type > type || pt->type >= CL_TYPE_SFX || pt->type == CL_TYPE_MSEXE) { |
|
537 |
+ cli_dbgmsg("Matched signature for file type %s\n", pt->virname); |
|
538 |
+ type = pt->type; |
|
539 |
+ if(ftoffset && (!*ftoffset || (*ftoffset)->cnt < MAX_EMBEDDED_OBJ) && ((ftype == CL_TYPE_MSEXE && type >= CL_TYPE_SFX) || ((ftype == CL_TYPE_MSEXE || ftype == CL_TYPE_ZIP) && type == CL_TYPE_MSEXE))) { |
|
540 |
+ /* FIXME: we don't know which offset of the first part is the correct one */ |
|
541 |
+ for(j = 1; j <= AC_DEFAULT_TRACKLEN && offmatrix[0][j] != -1; j++) { |
|
542 |
+ if(ac_addtype(ftoffset, type, offmatrix[pt->parts - 1][j])) { |
|
543 |
+ if(info.exeinfo.section) |
|
544 |
+ free(info.exeinfo.section); |
|
545 |
+ return CL_EMEM; |
|
546 |
+ } |
|
547 |
+ } |
|
548 |
+ } |
|
522 | 549 |
|
523 |
- offmatrix[pt->partno - 1][offmatrix[pt->partno - 1][0]] = realoff + pt->length + pt->prefix_length; |
|
524 |
- if(pt->partno == 1) /* save realoff for the first part */ |
|
525 |
- offmatrix[pt->parts - 1][offmatrix[pt->partno - 1][0]] = realoff; |
|
526 |
- } else if(found && pt->partno == pt->parts) { |
|
550 |
+ memset(offmatrix[0], -1, pt->parts * (AC_DEFAULT_TRACKLEN + 1) * sizeof(int32_t)); |
|
551 |
+ for(j = 0; j < pt->parts; j++) |
|
552 |
+ offmatrix[j][0] = 0; |
|
553 |
+ } |
|
554 |
+ } |
|
555 |
+ |
|
556 |
+ } else { /* !pt->type */ |
|
557 |
+ if(virname) |
|
558 |
+ *virname = pt->virname; |
|
559 |
+ |
|
560 |
+ if(info.exeinfo.section) |
|
561 |
+ free(info.exeinfo.section); |
|
562 |
+ |
|
563 |
+ return CL_VIRUS; |
|
564 |
+ } |
|
565 |
+ } |
|
566 |
+ |
|
567 |
+ } else { /* old type signature */ |
|
527 | 568 |
if(pt->type) { |
528 | 569 |
if(otfrec) { |
529 | 570 |
if(pt->type > type || pt->type >= CL_TYPE_SFX || pt->type == CL_TYPE_MSEXE) { |
530 |
- cli_dbgmsg("Matched signature for file type %s\n", pt->virname); |
|
571 |
+ cli_dbgmsg("Matched signature for file type %s at %u\n", pt->virname, realoff); |
|
531 | 572 |
type = pt->type; |
532 | 573 |
if(ftoffset && (!*ftoffset || (*ftoffset)->cnt < MAX_EMBEDDED_OBJ) && ((ftype == CL_TYPE_MSEXE && type >= CL_TYPE_SFX) || ((ftype == CL_TYPE_MSEXE || ftype == CL_TYPE_ZIP) && type == CL_TYPE_MSEXE))) { |
533 |
- /* FIXME: we don't know which offset of the first part is the correct one */ |
|
534 |
- for(j = 1; j <= AC_DEFAULT_TRACKLEN && offmatrix[0][j] != -1; j++) { |
|
535 |
- if(ac_addtype(ftoffset, type, offmatrix[pt->parts - 1][j])) { |
|
536 |
- if(info.exeinfo.section) |
|
537 |
- free(info.exeinfo.section); |
|
538 |
- return CL_EMEM; |
|
539 |
- } |
|
574 |
+ |
|
575 |
+ if(ac_addtype(ftoffset, type, realoff)) { |
|
576 |
+ if(info.exeinfo.section) |
|
577 |
+ free(info.exeinfo.section); |
|
578 |
+ return CL_EMEM; |
|
540 | 579 |
} |
541 | 580 |
} |
542 |
- |
|
543 |
- memset(offmatrix[0], -1, pt->parts * (AC_DEFAULT_TRACKLEN + 1) * sizeof(int32_t)); |
|
544 |
- for(j = 0; j < pt->parts; j++) |
|
545 |
- offmatrix[j][0] = 0; |
|
546 | 581 |
} |
547 | 582 |
} |
548 |
- |
|
549 |
- } else { /* !pt->type */ |
|
583 |
+ } else { |
|
550 | 584 |
if(virname) |
551 | 585 |
*virname = pt->virname; |
552 | 586 |
|
553 | 587 |
if(info.exeinfo.section) |
554 | 588 |
free(info.exeinfo.section); |
555 |
- |
|
556 | 589 |
return CL_VIRUS; |
557 | 590 |
} |
558 | 591 |
} |
559 |
- |
|
560 |
- } else { /* old type signature */ |
|
561 |
- if(pt->type) { |
|
562 |
- if(otfrec) { |
|
563 |
- if(pt->type > type || pt->type >= CL_TYPE_SFX || pt->type == CL_TYPE_MSEXE) { |
|
564 |
- cli_dbgmsg("Matched signature for file type %s at %u\n", pt->virname, realoff); |
|
565 |
- type = pt->type; |
|
566 |
- if(ftoffset && (!*ftoffset || (*ftoffset)->cnt < MAX_EMBEDDED_OBJ) && ((ftype == CL_TYPE_MSEXE && type >= CL_TYPE_SFX) || ((ftype == CL_TYPE_MSEXE || ftype == CL_TYPE_ZIP) && type == CL_TYPE_MSEXE))) { |
|
567 |
- |
|
568 |
- if(ac_addtype(ftoffset, type, realoff)) { |
|
569 |
- if(info.exeinfo.section) |
|
570 |
- free(info.exeinfo.section); |
|
571 |
- return CL_EMEM; |
|
572 |
- } |
|
573 |
- } |
|
574 |
- } |
|
575 |
- } |
|
576 |
- } else { |
|
577 |
- if(virname) |
|
578 |
- *virname = pt->virname; |
|
579 |
- |
|
580 |
- if(info.exeinfo.section) |
|
581 |
- free(info.exeinfo.section); |
|
582 |
- return CL_VIRUS; |
|
583 |
- } |
|
592 |
+ pt = pt->next_same; |
|
584 | 593 |
} |
585 | 594 |
} |
586 |
- |
|
587 |
- pt = pt->next; |
|
595 |
+ patt = patt->next; |
|
588 | 596 |
} |
589 | 597 |
} |
590 | 598 |
} |
... | ... |
@@ -751,15 +769,15 @@ int cli_ac_addsig(struct cli_matcher *root, const char *virname, const char *hex |
751 | 751 |
if(new->pattern[j] & CLI_MATCH_WILDCARD) { |
752 | 752 |
break; |
753 | 753 |
} else { |
754 |
- if(j - i + 1 > plen) { |
|
754 |
+ if(j - i + 1 >= plen) { |
|
755 | 755 |
plen = j - i + 1; |
756 | 756 |
ppos = i; |
757 | 757 |
} |
758 | 758 |
} |
759 |
- if(plen >= root->ac_maxdepth) |
|
759 |
+ if(plen >= root->ac_maxdepth && (new->pattern[ppos] || new->pattern[ppos + 1])) |
|
760 | 760 |
break; |
761 | 761 |
} |
762 |
- if(plen >= root->ac_maxdepth) |
|
762 |
+ if(plen >= root->ac_maxdepth && (new->pattern[ppos] || new->pattern[ppos + 1])) |
|
763 | 763 |
break; |
764 | 764 |
} |
765 | 765 |
|