Browse code

add support for detection based on analysis of archive metadata

git-svn-id: file:///var/lib/svn/clamav-devel/trunk/clamav-devel@1355 77e5149b-7576-45b1-b177-96237e5ba77b

Tomasz Kojm authored on 2005/02/20 13:14:06
Showing 7 changed files
... ...
@@ -1,3 +1,11 @@
1
+Sun Feb 20 05:08:54 CET 2005 (tk)
2
+---------------------------------
3
+  * libclamav: add support for detection based on analysis of archive metadata
4
+	       (currently only zip is supported)
5
+  * libclamav/clamav.h, libclamav/matcher.c: handle cli_zip_node list
6
+  * libclamav/readdb.c: load *.zmd (zip metadata signatures)
7
+  * libclamav/str.c: new function cli_hex2num()
8
+
1 9
 Fri Feb 18 21:29:16 GMT 2005 (njh)
2 10
 ----------------------------------
3 11
   * libclamav/message.c: Handle broken RFC2231 messages reported by Maxim
... ...
@@ -120,6 +120,12 @@ struct cli_md5_node {
120 120
     struct cli_md5_node *next;
121 121
 };
122 122
 
123
+struct cli_zip_node {
124
+    int compr, csize, size, encrypted, crc32;
125
+    char *filename, *virname;
126
+    struct cli_zip_node *next;
127
+};
128
+
123 129
 struct cl_node {
124 130
     unsigned int maxpatlen; /* maximal length of pattern in db */
125 131
 
... ...
@@ -133,6 +139,9 @@ struct cl_node {
133 133
 
134 134
     /* MD5 */
135 135
     struct cli_md5_node **md5_hlist;
136
+
137
+    /* Zip metadata */
138
+    struct cli_zip_node *zip_mlist;
136 139
 };
137 140
 
138 141
 struct cl_limits {
... ...
@@ -1,5 +1,5 @@
1 1
 /*
2
- *  Copyright (C) 2002 - 2004 Tomasz Kojm <tkojm@clamav.net>
2
+ *  Copyright (C) 2002 - 2005 Tomasz Kojm <tkojm@clamav.net>
3 3
  *
4 4
  *  This program is free software; you can redistribute it and/or modify
5 5
  *  it under the terms of the GNU General Public License as published by
... ...
@@ -325,8 +325,8 @@ int cl_build(struct cl_node *root)
325 325
 void cl_free(struct cl_node *root)
326 326
 {
327 327
 	int i;
328
-	struct cli_md5_node *pt, *h;
329
-
328
+	struct cli_md5_node *md5pt, *md5h;
329
+	struct cli_zip_node *zippt, *ziph;
330 330
 
331 331
     if(!root) {
332 332
 	cli_errmsg("cl_free: root == NULL\n");
... ...
@@ -338,20 +338,30 @@ void cl_free(struct cl_node *root)
338 338
 
339 339
     if(root->md5_hlist) {
340 340
 	for(i = 0; i < 256; i++) {
341
-	    pt = root->md5_hlist[i];
342
-	    while(pt) {
343
-		h = pt;
344
-		pt = pt->next;
345
-		free(h->md5);
346
-		free(h->virname);
347
-		if(h->viralias)
348
-		    free(h->viralias);
349
-		free(h);
341
+	    md5pt = root->md5_hlist[i];
342
+	    while(md5pt) {
343
+		md5h = md5pt;
344
+		md5pt = md5pt->next;
345
+		free(md5h->md5);
346
+		free(md5h->virname);
347
+		if(md5h->viralias)
348
+		    free(md5h->viralias);
349
+		free(md5h);
350 350
 	    }
351 351
 	}
352 352
 	free(root->md5_hlist);
353 353
     }
354 354
 
355
+    zippt = root->zip_mlist;
356
+    while(zippt) {
357
+	ziph = zippt;
358
+	zippt = zippt->next;
359
+	free(ziph->virname);
360
+	if(ziph->filename)
361
+	    free(ziph->filename);
362
+	free(ziph);
363
+    }
364
+
355 365
     free(root);
356 366
 }
357 367
 
... ...
@@ -700,6 +700,146 @@ static int cli_loadhdb(FILE *fd, struct cl_node **root, unsigned int *signo)
700 700
     return 0;
701 701
 }
702 702
 
703
+static int cli_loadzmd(FILE *fd, struct cl_node **root, unsigned int *signo)
704
+{
705
+	char buffer[FILEBUFF], *pt;
706
+	int line = 0, comments = 0, ret = 0;
707
+	struct cli_zip_node *new;
708
+
709
+
710
+    if(!*root) {
711
+	cli_dbgmsg("Initializing main node\n");
712
+	*root = (struct cl_node *) cli_calloc(1, sizeof(struct cl_node));
713
+	if(!*root)
714
+	    return CL_EMEM;
715
+    }
716
+
717
+    while(fgets(buffer, FILEBUFF, fd)) {
718
+	line++;
719
+	if(buffer[0] == '#') {
720
+	    comments++;
721
+	    continue;
722
+	}
723
+
724
+	cli_chomp(buffer);
725
+
726
+	new = (struct cli_zip_node *) cli_calloc(1, sizeof(struct cli_zip_node));
727
+	if(!new) {
728
+	    ret = CL_EMEM;
729
+	    break;
730
+	}
731
+
732
+	if(!(new->virname = cli_strtok(buffer, 0, ":"))) {
733
+	    free(new);
734
+	    ret = CL_EMALFDB;
735
+	    break;
736
+	}
737
+
738
+	if(!(pt = cli_strtok(buffer, 1, ":"))) {
739
+	    free(new->virname);
740
+	    free(new);
741
+	    ret = CL_EMALFDB;
742
+	    break;
743
+	} else {
744
+	    new->encrypted = atoi(pt);
745
+	    free(pt);
746
+	}
747
+
748
+	if(!(new->filename = cli_strtok(buffer, 2, ":"))) {
749
+	    free(new->virname);
750
+	    free(new);
751
+	    ret = CL_EMALFDB;
752
+	    break;
753
+	} else {
754
+	    if(!strcmp(new->filename, "*")) {
755
+		free(new->filename);
756
+		new->filename = NULL;
757
+	    }
758
+	}
759
+
760
+	if(!(pt = cli_strtok(buffer, 3, ":"))) {
761
+	    free(new->filename);
762
+	    free(new->virname);
763
+	    free(new);
764
+	    ret = CL_EMALFDB;
765
+	    break;
766
+	} else {
767
+	    if(!strcmp(pt, "*"))
768
+		new->size = -1;
769
+	    else
770
+		new->size = atoi(pt);
771
+	    free(pt);
772
+	}
773
+
774
+	if(!(pt = cli_strtok(buffer, 4, ":"))) {
775
+	    free(new->filename);
776
+	    free(new->virname);
777
+	    free(new);
778
+	    ret = CL_EMALFDB;
779
+	    break;
780
+	} else {
781
+	    if(!strcmp(pt, "*"))
782
+		new->csize = -1;
783
+	    else
784
+		new->csize = atoi(pt);
785
+	    free(pt);
786
+	}
787
+
788
+	if(!(pt = cli_strtok(buffer, 5, ":"))) {
789
+	    free(new->filename);
790
+	    free(new->virname);
791
+	    free(new);
792
+	    ret = CL_EMALFDB;
793
+	    break;
794
+	} else {
795
+	    if(!strcmp(pt, "*")) {
796
+		new->crc32 = 0;
797
+	    } else {
798
+		new->crc32 = cli_hex2num(pt);
799
+		if(new->crc32 == -1) {
800
+		    ret = CL_EMALFDB;
801
+		    break;
802
+		}
803
+	    }
804
+	    free(pt);
805
+	}
806
+
807
+	if(!(pt = cli_strtok(buffer, 6, ":"))) {
808
+	    free(new->filename);
809
+	    free(new->virname);
810
+	    free(new);
811
+	    ret = CL_EMALFDB;
812
+	    break;
813
+	} else {
814
+	    if(!strcmp(pt, "*"))
815
+		new->compr = -1;
816
+	    else
817
+		new->compr = atoi(pt);
818
+	    free(pt);
819
+	}
820
+
821
+	new->next = (*root)->zip_mlist;
822
+	(*root)->zip_mlist = new;
823
+    }
824
+
825
+    if(!line) {
826
+	cli_errmsg("Empty database file\n");
827
+	cl_free(*root);
828
+	return CL_EMALFDB;
829
+    }
830
+
831
+    if(ret) {
832
+	cli_errmsg("Problem parsing database at line %d\n", line);
833
+	cl_free(*root);
834
+	return ret;
835
+    }
836
+
837
+    if(signo)
838
+	*signo += (line - comments);
839
+
840
+    return 0;
841
+}
842
+
703 843
 int cl_loaddb(const char *filename, struct cl_node **root, unsigned int *signo)
704 844
 {
705 845
 	FILE *fd;
... ...
@@ -730,6 +870,9 @@ int cl_loaddb(const char *filename, struct cl_node **root, unsigned int *signo)
730 730
     } else if(cli_strbcasestr(filename, ".ndb")) {
731 731
 	ret = cli_loadndb(fd, root, signo);
732 732
 
733
+    } else if(cli_strbcasestr(filename, ".zmd")) {
734
+	ret = cli_loadzmd(fd, root, signo);
735
+
733 736
     } else {
734 737
 	cli_dbgmsg("cl_loaddb: unknown extension - assuming old database format\n");
735 738
 	ret = cli_loaddb(fd, root, signo);
... ...
@@ -296,8 +296,9 @@ static int cli_scanzip(int desc, const char **virname, long int *scanned, const
296 296
 	ZZIP_FILE *zfp;
297 297
 	FILE *tmp = NULL;
298 298
 	char *buff;
299
-	int fd, bytes, files = 0, ret = CL_CLEAN;
299
+	int fd, bytes, files = 0, ret = CL_CLEAN, encrypted;
300 300
 	struct stat source;
301
+	struct cli_zip_node *mdata;
301 302
 	zzip_error_t err;
302 303
 
303 304
 
... ...
@@ -327,7 +328,9 @@ static int cli_scanzip(int desc, const char **virname, long int *scanned, const
327 327
 	    break;
328 328
 	}
329 329
 
330
-	cli_dbgmsg("Zip: %s, compressed: %u, normal: %u, ratio: %d (max: %d)\n", zdirent.d_name, zdirent.d_csize, zdirent.st_size, zdirent.d_csize ? (zdirent.st_size / zdirent.d_csize) : 0, limits ? limits->maxratio : -1 );
330
+	encrypted = zdirent.d_flags;
331
+
332
+	cli_dbgmsg("Zip: %s, crc32: 0x%x, encrypted: %d, compressed: %u, normal: %u, ratio: %d (max: %d)\n", zdirent.d_name, zdirent.d_crc32, encrypted, zdirent.d_csize, zdirent.st_size, zdirent.d_csize ? (zdirent.st_size / zdirent.d_csize) : 0, limits ? limits->maxratio : -1);
331 333
 
332 334
 	if(!zdirent.st_size) {
333 335
 	    files++;
... ...
@@ -340,6 +343,39 @@ static int cli_scanzip(int desc, const char **virname, long int *scanned, const
340 340
 	    continue;
341 341
 	}
342 342
 
343
+	/* Scan metadata */
344
+	mdata = root->zip_mlist;
345
+	do {
346
+	    if(mdata->encrypted != encrypted)
347
+		continue;
348
+
349
+	    if(mdata->crc32 && mdata->crc32 != zdirent.d_crc32)
350
+		continue;
351
+
352
+	    if(mdata->csize > 0 && mdata->csize != zdirent.d_csize)
353
+		continue;
354
+
355
+	    if(mdata->size >= 0 && mdata->size != zdirent.st_size)
356
+		continue;
357
+
358
+	    if(mdata->compr >= 0 && mdata->compr != zdirent.d_compr)
359
+		continue;
360
+
361
+	    /* FIXME: add support for regex */
362
+	    /*if(mdata->filename && !strstr(zdirent.d_name, mdata->filename))*/
363
+	    if(mdata->filename && strcmp(zdirent.d_name, mdata->filename))
364
+		continue;
365
+
366
+	    break; /* matched */
367
+
368
+	} while((mdata = mdata->next));
369
+
370
+	if(mdata) {
371
+	    *virname = mdata->virname;
372
+	    ret = CL_VIRUS;
373
+	    break;
374
+	}
375
+
343 376
 	/* 
344 377
 	 * Workaround for archives created with ICEOWS.
345 378
 	 * ZZIP_DIRENT does not contain information on file type
... ...
@@ -365,7 +401,7 @@ static int cli_scanzip(int desc, const char **virname, long int *scanned, const
365 365
 	    break;
366 366
         }
367 367
 
368
-	if(DETECT_ENCRYPTED && (zdirent.d_flags & 1 )) {
368
+	if(DETECT_ENCRYPTED && encrypted) {
369 369
 	    files++;
370 370
 	    cli_dbgmsg("Zip: Encrypted files found in archive.\n");
371 371
 	    lseek(desc, 0, SEEK_SET);
... ...
@@ -129,6 +129,28 @@ char *cli_hex2str(const char *hex)
129 129
     return str;
130 130
 }
131 131
 
132
+int cli_hex2num(const char *hex)
133
+{
134
+	int numChars = 0;
135
+	int hexval, ret = 0, len, i;
136
+
137
+
138
+    len = strlen(hex);
139
+
140
+    if(len % 2 != 0) {
141
+	cli_errmsg("cli_hex2num(): Malformed hexstring: %s (length: %d)\n", hex, len);
142
+	return -1;
143
+    }
144
+
145
+    for(i = 0; i < len; i++) {
146
+	if((hexval = cli_hex2int(hex[i])) < 0)
147
+	    break;
148
+	ret = (ret << 4) | hexval;
149
+    }
150
+
151
+    return ret;
152
+}
153
+
132 154
 char *cli_str2hex(const char *string, unsigned int len)
133 155
 {
134 156
 	char *hexstr;
... ...
@@ -24,6 +24,7 @@ int cli_chomp(char *string);
24 24
 char *cli_strtok(const char *line, int field, const char *delim);
25 25
 short int *cli_hex2si(const char *hex);
26 26
 char *cli_hex2str(const char *hex);
27
+int cli_hex2num(const char *hex);
27 28
 char *cli_str2hex(const char *string, unsigned int len);
28 29
 char *cli_strtokbuf(const char *input, int fieldno, const char *delim, char *output);
29 30
 const char *cli_memstr(const char *haystack, int hs, const char *needle, int ns);