Browse code

various speed optimisations

git-svn: trunk@3218

Tomasz Kojm authored on 2007/09/14 03:14:20
Showing 3 changed files
... ...
@@ -1,3 +1,10 @@
1
+Thu Sep 13 19:23:31 CEST 2007 (tk)
2
+----------------------------------
3
+  * libclamav/matcher-ac.[ch]: various speed optimisations:
4
+				- optimise node usage
5
+				- try hard to not overload node 0x00.0x00[0x00]
6
+				- optimise memory usage
7
+
1 8
 Thu Sep 13 17:37:31 BST 2007 (njh)
2 9
 ----------------------------------
3 10
   * libclamav:	More optimisations
... ...
@@ -39,6 +39,7 @@
39 39
 int cli_ac_addpatt(struct cli_matcher *root, struct cli_ac_patt *pattern)
40 40
 {
41 41
 	struct cli_ac_node *pt, *next, **newtable;
42
+	struct cli_ac_patt *ph;
42 43
 	uint8_t i;
43 44
 	uint16_t len = MIN(root->ac_maxdepth, pattern->length);
44 45
 
... ...
@@ -114,6 +115,19 @@ int cli_ac_addpatt(struct cli_matcher *root, struct cli_ac_patt *pattern)
114 114
 
115 115
     pt->final = 1;
116 116
     pattern->depth = i;
117
+
118
+    ph = pt->list;
119
+    while(ph) {
120
+	if((ph->length == pattern->length) && (ph->prefix_length == pattern->prefix_length)) {
121
+	    if(!memcmp(ph->pattern, pattern->pattern, ph->length * sizeof(uint16_t)) && !memcmp(ph->prefix, pattern->prefix, ph->prefix_length * sizeof(uint16_t))) {
122
+		pattern->next_same = ph->next_same;
123
+		ph->next_same = pattern;
124
+		return CL_SUCCESS;
125
+	    }
126
+	}
127
+	ph = ph->next;
128
+    }
129
+
117 130
     pattern->next = pt->list;
118 131
     pt->list = pattern;
119 132
 
... ...
@@ -336,13 +350,9 @@ inline static int ac_findmatch(const unsigned char *buffer, uint32_t offset, uin
336 336
 	uint8_t found;
337 337
 
338 338
 
339
-    if(offset + pattern->length > length)
339
+    if((offset + pattern->length > length) || (pattern->prefix_length > offset))
340 340
 	return 0;
341 341
 
342
-    if(pattern->prefix)
343
-	if(pattern->prefix_length > offset)
344
-	    return 0;
345
-
346 342
     bp = offset + pattern->depth;
347 343
 
348 344
     for(i = pattern->depth; i < pattern->length; i++) {
... ...
@@ -433,7 +443,7 @@ inline static int ac_addtype(struct cli_matched_type **list, cli_file_t type, of
433 433
 int cli_ac_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, const struct cli_matcher *root, struct cli_ac_data *mdata, uint8_t otfrec, uint32_t offset, cli_file_t ftype, int fd, struct cli_matched_type **ftoffset)
434 434
 {
435 435
 	struct cli_ac_node *current;
436
-	struct cli_ac_patt *pt;
436
+	struct cli_ac_patt *patt, *pt;
437 437
         uint32_t i, bp, realoff;
438 438
 	uint16_t j;
439 439
 	int32_t **offmatrix;
... ...
@@ -461,130 +471,138 @@ int cli_ac_scanbuff(const unsigned char *buffer, uint32_t length, const char **v
461 461
 	current = current->trans[buffer[i]];
462 462
 
463 463
 	if(current->final) {
464
-	    pt = current->list;
465
-	    while(pt) {
466
-		bp = i + 1 - pt->depth;
467
-		if(ac_findmatch(buffer, bp, length, pt)) {
468
-		    realoff = offset + bp - pt->prefix_length;
469
-
470
-		    if((pt->offset || pt->target) && (!pt->sigid || pt->partno == 1)) {
471
-			if((fd == -1 && !ftype) || !cli_validatesig(ftype, pt->offset, realoff, &info, fd, pt->virname)) {
472
-			    pt = pt->next;
473
-			    continue;
464
+	    patt = current->list;
465
+	    while(patt) {
466
+		bp = i + 1 - patt->depth;
467
+		if(ac_findmatch(buffer, bp, length, patt)) {
468
+		    pt = patt;
469
+		    while(pt) {
470
+			realoff = offset + bp - pt->prefix_length;
471
+
472
+			if((pt->offset || pt->target) && (!pt->sigid || pt->partno == 1)) {
473
+			    if((fd == -1 && !ftype) || !cli_validatesig(ftype, pt->offset, realoff, &info, fd, pt->virname)) {
474
+				pt = pt->next_same;
475
+				continue;
476
+			    }
474 477
 			}
475
-		    }
476 478
 
477
-		    if(pt->sigid) { /* it's a partial signature */
479
+			if(pt->sigid) { /* it's a partial signature */
478 480
 
479
-			if(!mdata->offmatrix[pt->sigid - 1]) {
480
-			    mdata->offmatrix[pt->sigid - 1] = cli_malloc(pt->parts * sizeof(int32_t *));
481
-			    if(!mdata->offmatrix[pt->sigid - 1]) {
482
-				cli_errmsg("cli_ac_scanbuff: Can't allocate memory for mdata->offmatrix[%u]\n", pt->sigid - 1);
483
-				return CL_EMEM;
481
+			    if(pt->partno != 1 && (!mdata->offmatrix[pt->sigid - 1] || !mdata->offmatrix[pt->sigid - 1][pt->partno - 2][0])) {
482
+				pt = pt->next_same;
483
+				continue;
484 484
 			    }
485 485
 
486
-			    mdata->offmatrix[pt->sigid - 1][0] = cli_malloc(pt->parts * (AC_DEFAULT_TRACKLEN + 1) * sizeof(int32_t));
487
-			    if(!mdata->offmatrix[pt->sigid - 1][0]) {
488
-				cli_errmsg("cli_ac_scanbuff: Can't allocate memory for mdata->offmatrix[%u][0]\n", pt->sigid - 1);
489
-				free(mdata->offmatrix[pt->sigid - 1]);
490
-				mdata->offmatrix[pt->sigid - 1] = NULL;
491
-				return CL_EMEM;
492
-			    }
493
-			    memset(mdata->offmatrix[pt->sigid - 1][0], -1, pt->parts * (AC_DEFAULT_TRACKLEN + 1) * sizeof(int32_t));
494
-			    mdata->offmatrix[pt->sigid - 1][0][0] = 0;
495
-			    for(j = 1; j < pt->parts; j++) {
496
-				mdata->offmatrix[pt->sigid - 1][j] = mdata->offmatrix[pt->sigid - 1][0] + j * (AC_DEFAULT_TRACKLEN + 1);
497
-				 mdata->offmatrix[pt->sigid - 1][j][0] = 0;
486
+			    if(!mdata->offmatrix[pt->sigid - 1]) {
487
+				mdata->offmatrix[pt->sigid - 1] = cli_malloc(pt->parts * sizeof(int32_t *));
488
+				if(!mdata->offmatrix[pt->sigid - 1]) {
489
+				    cli_errmsg("cli_ac_scanbuff: Can't allocate memory for mdata->offmatrix[%u]\n", pt->sigid - 1);
490
+				    return CL_EMEM;
491
+				}
492
+
493
+				mdata->offmatrix[pt->sigid - 1][0] = cli_malloc(pt->parts * (AC_DEFAULT_TRACKLEN + 1) * sizeof(int32_t));
494
+				if(!mdata->offmatrix[pt->sigid - 1][0]) {
495
+				    cli_errmsg("cli_ac_scanbuff: Can't allocate memory for mdata->offmatrix[%u][0]\n", pt->sigid - 1);
496
+				    free(mdata->offmatrix[pt->sigid - 1]);
497
+				    mdata->offmatrix[pt->sigid - 1] = NULL;
498
+				    return CL_EMEM;
499
+				}
500
+				memset(mdata->offmatrix[pt->sigid - 1][0], -1, pt->parts * (AC_DEFAULT_TRACKLEN + 1) * sizeof(int32_t));
501
+				mdata->offmatrix[pt->sigid - 1][0][0] = 0;
502
+				for(j = 1; j < pt->parts; j++) {
503
+				    mdata->offmatrix[pt->sigid - 1][j] = mdata->offmatrix[pt->sigid - 1][0] + j * (AC_DEFAULT_TRACKLEN + 1);
504
+				    mdata->offmatrix[pt->sigid - 1][j][0] = 0;
505
+				}
498 506
 			    }
499
-			}
500
-			offmatrix = mdata->offmatrix[pt->sigid - 1];
501
-
502
-			if(pt->partno != 1) {
503
-			    found = 0;
504
-			    for(j = 1; j <= AC_DEFAULT_TRACKLEN && offmatrix[pt->partno - 2][j] != -1; j++) {
505
-				found = 1;
506
-				if(pt->maxdist)
507
-				    if(realoff - offmatrix[pt->partno - 2][j] > pt->maxdist)
508
-					found = 0;
509
-
510
-				if(found && pt->mindist)
511
-				    if(realoff - offmatrix[pt->partno - 2][j] < pt->mindist)
512
-					found = 0;
513
-
514
-				if(found)
515
-				    break;
507
+			    offmatrix = mdata->offmatrix[pt->sigid - 1];
508
+
509
+			    if(pt->partno != 1) {
510
+				found = 0;
511
+				for(j = 1; j <= AC_DEFAULT_TRACKLEN && offmatrix[pt->partno - 2][j] != -1; j++) {
512
+				    found = 1;
513
+				    if(pt->maxdist)
514
+					if(realoff - offmatrix[pt->partno - 2][j] > pt->maxdist)
515
+					    found = 0;
516
+
517
+				    if(found && pt->mindist)
518
+					if(realoff - offmatrix[pt->partno - 2][j] < pt->mindist)
519
+					    found = 0;
520
+
521
+				    if(found)
522
+					break;
523
+				}
516 524
 			    }
517
-			}
518 525
 
519
-			if(pt->partno == 1 || (found && (pt->partno != pt->parts))) {
520
-			    offmatrix[pt->partno - 1][0] %= AC_DEFAULT_TRACKLEN;
521
-			    offmatrix[pt->partno - 1][0]++;
526
+			    if(pt->partno == 1 || (found && (pt->partno != pt->parts))) {
527
+				offmatrix[pt->partno - 1][0] %= AC_DEFAULT_TRACKLEN;
528
+				offmatrix[pt->partno - 1][0]++;
529
+
530
+				offmatrix[pt->partno - 1][offmatrix[pt->partno - 1][0]] = realoff + pt->length + pt->prefix_length;
531
+				if(pt->partno == 1) /* save realoff for the first part */
532
+				    offmatrix[pt->parts - 1][offmatrix[pt->partno - 1][0]] = realoff;
533
+			    } else if(found && pt->partno == pt->parts) {
534
+				if(pt->type) {
535
+				    if(otfrec) {
536
+					if(pt->type > type || pt->type >= CL_TYPE_SFX || pt->type == CL_TYPE_MSEXE) {
537
+					    cli_dbgmsg("Matched signature for file type %s\n", pt->virname);
538
+					    type = pt->type;
539
+					    if(ftoffset && (!*ftoffset || (*ftoffset)->cnt < MAX_EMBEDDED_OBJ) && ((ftype == CL_TYPE_MSEXE && type >= CL_TYPE_SFX) || ((ftype == CL_TYPE_MSEXE || ftype == CL_TYPE_ZIP) && type == CL_TYPE_MSEXE)))  {
540
+						/* FIXME: we don't know which offset of the first part is the correct one */
541
+						for(j = 1; j <= AC_DEFAULT_TRACKLEN && offmatrix[0][j] != -1; j++) {
542
+						    if(ac_addtype(ftoffset, type, offmatrix[pt->parts - 1][j])) {
543
+							if(info.exeinfo.section)
544
+							    free(info.exeinfo.section);
545
+							return CL_EMEM;
546
+						    }
547
+						}
548
+					    }
522 549
 
523
-			    offmatrix[pt->partno - 1][offmatrix[pt->partno - 1][0]] = realoff + pt->length + pt->prefix_length;
524
-			    if(pt->partno == 1) /* save realoff for the first part */
525
-				offmatrix[pt->parts - 1][offmatrix[pt->partno - 1][0]] = realoff;
526
-			} else if(found && pt->partno == pt->parts) {
550
+					    memset(offmatrix[0], -1, pt->parts * (AC_DEFAULT_TRACKLEN + 1) * sizeof(int32_t));
551
+					    for(j = 0; j < pt->parts; j++)
552
+						offmatrix[j][0] = 0;
553
+					}
554
+				    }
555
+
556
+				} else { /* !pt->type */
557
+				    if(virname)
558
+					*virname = pt->virname;
559
+
560
+				    if(info.exeinfo.section)
561
+					free(info.exeinfo.section);
562
+
563
+				    return CL_VIRUS;
564
+				}
565
+			    }
566
+
567
+			} else { /* old type signature */
527 568
 			    if(pt->type) {
528 569
 				if(otfrec) {
529 570
 				    if(pt->type > type || pt->type >= CL_TYPE_SFX || pt->type == CL_TYPE_MSEXE) {
530
-					cli_dbgmsg("Matched signature for file type %s\n", pt->virname);
571
+					cli_dbgmsg("Matched signature for file type %s at %u\n", pt->virname, realoff);
531 572
 					type = pt->type;
532 573
 					if(ftoffset && (!*ftoffset || (*ftoffset)->cnt < MAX_EMBEDDED_OBJ) && ((ftype == CL_TYPE_MSEXE && type >= CL_TYPE_SFX) || ((ftype == CL_TYPE_MSEXE || ftype == CL_TYPE_ZIP) && type == CL_TYPE_MSEXE)))  {
533
-					    /* FIXME: we don't know which offset of the first part is the correct one */
534
-					    for(j = 1; j <= AC_DEFAULT_TRACKLEN && offmatrix[0][j] != -1; j++) {
535
-						if(ac_addtype(ftoffset, type, offmatrix[pt->parts - 1][j])) {
536
-						    if(info.exeinfo.section)
537
-							free(info.exeinfo.section);
538
-						    return CL_EMEM;
539
-						}
574
+
575
+					    if(ac_addtype(ftoffset, type, realoff)) {
576
+						if(info.exeinfo.section)
577
+						    free(info.exeinfo.section);
578
+						return CL_EMEM;
540 579
 					    }
541 580
 					}
542
-
543
-					memset(offmatrix[0], -1, pt->parts * (AC_DEFAULT_TRACKLEN + 1) * sizeof(int32_t));
544
-					for(j = 0; j < pt->parts; j++)
545
-					    offmatrix[j][0] = 0;
546 581
 				    }
547 582
 				}
548
-
549
-			    } else { /* !pt->type */
583
+			    } else {
550 584
 				if(virname)
551 585
 				    *virname = pt->virname;
552 586
 
553 587
 				if(info.exeinfo.section)
554 588
 				    free(info.exeinfo.section);
555
-
556 589
 				return CL_VIRUS;
557 590
 			    }
558 591
 			}
559
-
560
-		    } else { /* old type signature */
561
-			if(pt->type) {
562
-			    if(otfrec) {
563
-				if(pt->type > type || pt->type >= CL_TYPE_SFX || pt->type == CL_TYPE_MSEXE) {
564
-				    cli_dbgmsg("Matched signature for file type %s at %u\n", pt->virname, realoff);
565
-				    type = pt->type;
566
-				    if(ftoffset && (!*ftoffset || (*ftoffset)->cnt < MAX_EMBEDDED_OBJ) && ((ftype == CL_TYPE_MSEXE && type >= CL_TYPE_SFX) || ((ftype == CL_TYPE_MSEXE || ftype == CL_TYPE_ZIP) && type == CL_TYPE_MSEXE)))  {
567
-
568
-					if(ac_addtype(ftoffset, type, realoff)) {
569
-					    if(info.exeinfo.section)
570
-						free(info.exeinfo.section);
571
-					    return CL_EMEM;
572
-					}
573
-				    }
574
-				}
575
-			    }
576
-			} else {
577
-			    if(virname)
578
-				*virname = pt->virname;
579
-
580
-			    if(info.exeinfo.section)
581
-				free(info.exeinfo.section);
582
-			    return CL_VIRUS;
583
-			}
592
+			pt = pt->next_same;
584 593
 		    }
585 594
 		}
586
-
587
-		pt = pt->next;
595
+		patt = patt->next;
588 596
 	    }
589 597
 	}
590 598
     }
... ...
@@ -751,15 +769,15 @@ int cli_ac_addsig(struct cli_matcher *root, const char *virname, const char *hex
751 751
 		if(new->pattern[j] & CLI_MATCH_WILDCARD) {
752 752
 		    break;
753 753
 		} else {
754
-		    if(j - i + 1 > plen) {
754
+		    if(j - i + 1 >= plen) {
755 755
 			plen = j - i + 1;
756 756
 			ppos = i;
757 757
 		    }
758 758
 		}
759
-		if(plen >= root->ac_maxdepth)
759
+		if(plen >= root->ac_maxdepth && (new->pattern[ppos] || new->pattern[ppos + 1]))
760 760
 		    break;
761 761
 	    }
762
-	    if(plen >= root->ac_maxdepth)
762
+	    if(plen >= root->ac_maxdepth && (new->pattern[ppos] || new->pattern[ppos + 1]))
763 763
 		break;
764 764
 	}
765 765
 
... ...
@@ -43,7 +43,7 @@ struct cli_ac_patt {
43 43
     uint8_t target;
44 44
     uint16_t type;
45 45
     unsigned char **altc;
46
-    struct cli_ac_patt *next;
46
+    struct cli_ac_patt *next, *next_same;
47 47
 };
48 48
 
49 49
 struct cli_ac_node {