Browse code

merge from clamd-proto branch: introduce cli_ftw

git-svn: trunk@4754

Török Edvin authored on 2009/02/13 01:40:35
Showing 4 changed files
... ...
@@ -1,3 +1,9 @@
1
+Thu Feb 12 19:08:28 EET 2009 (edwin)
2
+------------------------------------
3
+ * libclamav/libclamav.map, libclamav/others.h,
4
+ libclamav/others_common.c: merge from clamd-proto branch: introduce
5
+ cli_ftw
6
+
1 7
 Thu Feb 12 12:20:05 EET 2009 (edwin)
2 8
 ------------------------------------
3 9
  * unit_tests/valgrind.supp: add some more suppressions
... ...
@@ -125,6 +125,10 @@ CLAMAV_PRIVATE {
125 125
     mp_clear;
126 126
     cli_versig;
127 127
     cli_filecopy;
128
+    cli_sftw;
129
+    cli_ftw;
130
+    cli_unlink;
131
+    cli_writen;
128 132
   local:
129 133
     *;
130 134
 };
... ...
@@ -364,4 +364,56 @@ int cli_checklimits(const char *, cli_ctx *, unsigned long, unsigned long, unsig
364 364
 int cli_updatelimits(cli_ctx *, unsigned long);
365 365
 unsigned long cli_getsizelimit(cli_ctx *, unsigned long);
366 366
 int cli_matchregex(const char *str, const char *regex);
367
+
368
+/* symlink behaviour */
369
+#define CLI_FTW_FOLLOW_FILE_SYMLINK 0x01
370
+#define CLI_FTW_FOLLOW_DIR_SYMLINK  0x02
371
+
372
+/* if the callback needs the stat */
373
+#define CLI_FTW_NEED_STAT	    0x04
374
+
375
+#define CLI_FTW_STD (CLI_FTW_NEED_STAT)
376
+
377
+enum cli_ftw_reason {
378
+    visit_file,
379
+    visit_directory_toplev, /* this is a directory at toplevel of recursion */
380
+    error_mem, /* recommended to return CL_EMEM */
381
+    /* recommended to return CL_SUCCESS below */
382
+    error_stat,
383
+    warning_skipped_link,
384
+    warning_skipped_special,
385
+    warning_skipped_dir
386
+};
387
+
388
+/* wrap void*, so that we don't mix it with some other pointer */
389
+struct cli_ftw_cbdata {
390
+    void *data;
391
+};
392
+
393
+/* 
394
+ * return CL_BREAK to break out without an error, CL_SUCCESS to continue,
395
+ * or any CL_E* to break out due to error.
396
+ * The callback is responsible for freeing filename when it is done using it.
397
+ * Note that callback decides if directory traversal should continue 
398
+ * after an error, we call the callback with reason == error,
399
+ * and if it returns CL_BREAK we break.
400
+ */
401
+typedef int (*cli_ftw_cb)(struct stat *stat_buf, char *filename, const char *path, enum cli_ftw_reason reason, struct cli_ftw_cbdata *data);
402
+
403
+/*
404
+ * returns 
405
+ *  CL_SUCCESS if it traversed all files and subdirs
406
+ *  CL_BREAK if traversal has stopped at some point
407
+ *  CL_E* if error encountered during traversal and we had to break out
408
+ * This is regardless of virus found/not, that is the callback's job to store.
409
+ * Note that the callback may dispatch async the scan, so that when cli_ftw
410
+ * returns we don't know the infected/notinfected status of the directory yet!
411
+ * Due to this if the callback scans synchronously it should store the infected
412
+ * status in its cbdata.
413
+ * This works for both files and directories. It stats the path to determine
414
+ * which one it is.
415
+ * If it is a file, it simply calls the callback once, otherwise recurses.
416
+ */
417
+int cli_ftw(const char *base, int flags, int maxdepth, cli_ftw_cb callback, struct cli_ftw_cbdata *data);
418
+
367 419
 #endif
... ...
@@ -368,3 +368,300 @@ int cli_filecopy(const char *src, const char *dest)
368 368
 
369 369
     return close(d);
370 370
 }
371
+struct dirent_data {
372
+    char *filename;
373
+    const char *dirname;
374
+    struct stat *statbuf;
375
+    int   is_dir;/* 0 - no, 1 - yes */
376
+    long  ino; /* -1: inode not available */
377
+};
378
+
379
+/* sort files before directories, and lower inodes before higher inodes */
380
+static int ftw_compare(const void *a, const void *b)
381
+{
382
+    const struct dirent_data *da = a;
383
+    const struct dirent_data *db = b;
384
+    long diff = da->is_dir - db->is_dir;
385
+    if (!diff) {
386
+	diff = da->ino - db->ino;
387
+    }
388
+    return diff;
389
+}
390
+
391
+enum filetype {
392
+    ft_unknown,
393
+    ft_link,
394
+    ft_directory,
395
+    ft_regular,
396
+    ft_skipped_special,
397
+    ft_skipped_link
398
+};
399
+
400
+static inline int ft_skipped(enum filetype ft)
401
+{
402
+    return ft != ft_regular && ft != ft_directory;
403
+}
404
+
405
+#define FOLLOW_SYMLINK_MASK (CLI_FTW_FOLLOW_FILE_SYMLINK | CLI_FTW_FOLLOW_DIR_SYMLINK)
406
+static int get_filetype(const char *fname, int flags, int need_stat,
407
+			 struct stat *statbuf, enum filetype *ft)
408
+{
409
+    int stated = 0;
410
+
411
+    if (*ft == ft_unknown || *ft == ft_link) {
412
+	need_stat = 1;
413
+
414
+	if ((flags & FOLLOW_SYMLINK_MASK) != FOLLOW_SYMLINK_MASK) {
415
+	    /* Following only one of directory/file symlinks, or none, may
416
+	     * need to lstat.
417
+	     * If we're following both file and directory symlinks, we don't need
418
+	     * to lstat(), we can just stat() directly.*/
419
+	    if (*ft != ft_link) {
420
+		/* need to lstat to determine if it is a symlink */
421
+		if (lstat(fname, statbuf) == -1)
422
+		    return -1;
423
+		if (S_ISLNK(statbuf->st_mode)) {
424
+		    *ft = ft_link;
425
+		} else {
426
+		    /* It was not a symlink, stat() not needed */
427
+		    need_stat = 0;
428
+		    stated = 1;
429
+		}
430
+	    }
431
+	    if (*ft == ft_link && !(flags & FOLLOW_SYMLINK_MASK)) {
432
+		/* This is a symlink, but we don't follow any symlinks */
433
+		*ft = ft_skipped_link;
434
+		return 0;
435
+	    }
436
+	}
437
+    }
438
+
439
+    if (need_stat) {
440
+	if (stat(fname, statbuf) == -1)
441
+	    return -1;
442
+	stated = 1;
443
+    }
444
+
445
+    if (*ft == ft_unknown || *ft == ft_link) {
446
+	if (S_ISDIR(statbuf->st_mode) &&
447
+	    (*ft != ft_link || (flags & CLI_FTW_FOLLOW_DIR_SYMLINK))) {
448
+	    /* A directory, or (a symlink to a directory and we're following dir
449
+	     * symlinks) */
450
+	    *ft = ft_directory;
451
+	} else if (S_ISREG(statbuf->st_mode) &&
452
+		   (*ft != ft_link || (flags & CLI_FTW_FOLLOW_FILE_SYMLINK))) {
453
+	    /* A file, or (a symlink to a file and we're following file symlinks) */
454
+	    *ft = ft_regular;
455
+	} else {
456
+	    /* default: skipped */
457
+	    *ft = S_ISLNK(statbuf->st_mode) ?
458
+		ft_skipped_link : ft_skipped_special;
459
+	}
460
+    }
461
+    return stated;
462
+}
463
+
464
+static int handle_filetype(const char *fname, int flags,
465
+			   struct stat *statbuf, int *stated, enum filetype *ft,
466
+			   cli_ftw_cb callback, struct cli_ftw_cbdata *data)
467
+{
468
+    int ret;
469
+
470
+    *stated = get_filetype(fname, flags, flags & CLI_FTW_NEED_STAT , statbuf, ft);
471
+
472
+    if (*stated == -1) {
473
+	/*  we failed a stat() or lstat() */
474
+	ret = callback(NULL, NULL, fname, error_stat, data);
475
+	if (ret != CL_SUCCESS)
476
+	    return ret;
477
+	*ft = ft_unknown;
478
+    } else if (*ft == ft_skipped_link || *ft == ft_skipped_special) {
479
+	/* skipped filetype */
480
+	ret = callback(stated ? statbuf : NULL, NULL, fname,
481
+		       *ft == ft_skipped_link ?
482
+		       warning_skipped_link : warning_skipped_special, data);
483
+	if (ret != CL_SUCCESS)
484
+	    return ret;
485
+    }
486
+    return CL_SUCCESS;
487
+}
488
+
489
+static int cli_ftw_dir(const char *dirname, int flags, int maxdepth, cli_ftw_cb callback, struct cli_ftw_cbdata *data);
490
+static int handle_entry(struct dirent_data *entry, int flags, int maxdepth, cli_ftw_cb callback, struct cli_ftw_cbdata *data)
491
+{
492
+    if (!entry->is_dir) {
493
+	return callback(entry->statbuf, entry->filename, entry->filename, visit_file, data);
494
+    } else {
495
+	return cli_ftw_dir(entry->dirname, flags, maxdepth, callback, data);
496
+    }
497
+}
498
+
499
+int cli_ftw(const char *path, int flags, int maxdepth, cli_ftw_cb callback, struct cli_ftw_cbdata *data)
500
+{
501
+    struct stat statbuf;
502
+    enum filetype ft = ft_unknown;
503
+    struct dirent_data entry;
504
+    int stated = 0;
505
+
506
+    int ret = handle_filetype(path, flags, &statbuf, &stated, &ft, callback, data);
507
+    if (ret != CL_SUCCESS)
508
+	return ret;
509
+    if (ft_skipped(ft))
510
+	return CL_SUCCESS;
511
+    entry.statbuf = stated ? &statbuf : NULL;
512
+    entry.is_dir = ft == ft_directory;
513
+    entry.filename = entry.is_dir ? NULL : strdup(path);
514
+    entry.dirname = entry.is_dir ? path : NULL;
515
+    if (entry.is_dir) {
516
+	ret = callback(entry.statbuf, NULL, path, visit_directory_toplev, data);
517
+	if (ret != CL_SUCCESS)
518
+	    return ret;
519
+    }
520
+    return handle_entry(&entry, flags, maxdepth, callback, data);
521
+}
522
+
523
+static int cli_ftw_dir(const char *dirname, int flags, int maxdepth, cli_ftw_cb callback, struct cli_ftw_cbdata *data)
524
+{
525
+    DIR *dd;
526
+#if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2)
527
+    union {
528
+	struct dirent d;
529
+	char b[offsetof(struct dirent, d_name) + NAME_MAX + 1];
530
+    } result;
531
+#endif
532
+    struct dirent_data *entries = NULL;
533
+    size_t i, entries_cnt = 0;
534
+    int ret;
535
+
536
+    if (maxdepth < 0) {
537
+	/* exceeded recursion limit */
538
+	ret = callback(NULL, NULL, dirname, warning_skipped_dir, data);
539
+	return ret;
540
+    }
541
+
542
+    if((dd = opendir(dirname)) != NULL) {
543
+	struct dirent *dent;
544
+	errno = 0;
545
+	ret = CL_SUCCESS;
546
+#ifdef HAVE_READDIR_R_3
547
+	while(!readdir_r(dd, &result.d, &dent) && dent) {
548
+#elif defined(HAVE_READDIR_R_2)
549
+	while((dent = (struct dirent *) readdir_r(dd, &result.d))) {
550
+#else
551
+	while((dent = readdir(dd))) {
552
+#endif
553
+	    int stated = 0;
554
+	    enum filetype ft;
555
+	    char *fname;
556
+	    struct stat statbuf;
557
+	    struct stat *statbufp;
558
+
559
+	    if(!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
560
+		continue;
561
+#ifdef _DIRENT_HAVE_D_TYPE
562
+	    switch (dent->d_type) {
563
+		case DT_DIR:
564
+		    ft = ft_directory;
565
+		    break;
566
+		case DT_LNK:
567
+		    if (!(flags & FOLLOW_SYMLINK_MASK)) {
568
+			/* we don't follow symlinks, don't bother
569
+			 * stating it */
570
+			errno = 0;
571
+			continue;
572
+		    }
573
+		    ft = ft_link;
574
+		    break;
575
+		case DT_REG:
576
+		    ft = ft_regular;
577
+		    break;
578
+		case DT_UNKNOWN:
579
+		    ft = ft_unknown;
580
+		    break;
581
+		default:
582
+		    ft = ft_skipped_special;
583
+		    break;
584
+	    }
585
+#else
586
+	    ft = ft_unknown;
587
+#endif
588
+	    fname = (char *) cli_malloc(strlen(dirname) + strlen(dent->d_name) + 2);
589
+	    if(!fname) {
590
+		ret = callback(NULL, NULL, dirname, error_mem, data);
591
+		if (ret != CL_SUCCESS)
592
+		    break;
593
+	    }
594
+	    sprintf(fname, "%s/%s", dirname, dent->d_name);
595
+
596
+	    ret = handle_filetype(fname, flags, &statbuf, &stated, &ft, callback, data);
597
+	    if (ret != CL_SUCCESS) {
598
+		free(fname);
599
+		break;
600
+	    }
601
+
602
+	    if (ft_skipped(ft)) { /* skip */
603
+		free(fname);
604
+		errno = 0;
605
+		continue;
606
+	    }
607
+
608
+	    if (stated && (flags & CLI_FTW_NEED_STAT)) {
609
+		statbufp = cli_malloc(sizeof(*statbufp));
610
+		if (!statbufp) {
611
+		    ret = callback(stated ? &statbuf : NULL, NULL, fname, error_mem, data);
612
+		    free(fname);
613
+		    if (ret != CL_SUCCESS)
614
+			break;
615
+		    else {
616
+			errno = 0;
617
+			continue;
618
+		    }
619
+		}
620
+		memcpy(statbufp, &statbuf, sizeof(statbuf));
621
+	    } else {
622
+		statbufp = 0;
623
+	    }
624
+
625
+	    entries_cnt++;
626
+	    entries = cli_realloc(entries, entries_cnt*sizeof(*entries));
627
+	    if (!entries) {
628
+		ret = callback(stated ? &statbuf : NULL, NULL, fname, error_mem, data);
629
+		free(fname);
630
+		if (statbufp)
631
+		    free(statbufp);
632
+		break;
633
+	    } else {
634
+		struct dirent_data *entry = &entries[entries_cnt-1];
635
+		entry->filename = fname;
636
+		entry->statbuf = statbufp;
637
+		entry->is_dir = ft == ft_directory;
638
+		entry->dirname = entry->is_dir ? fname : NULL;
639
+#ifdef _XOPEN_UNIX
640
+		entry->ino = dent->d_ino;
641
+#else
642
+		entry->ino = -1;
643
+#endif
644
+	    }
645
+	    errno = 0;
646
+	}
647
+	closedir(dd);
648
+
649
+	if (entries) {
650
+	    qsort(entries, entries_cnt, sizeof(*entries), ftw_compare);
651
+	    for (i = 0; i < entries_cnt; i++) {
652
+		struct dirent_data *entry = &entries[i];
653
+		ret = handle_entry(entry, flags, maxdepth-1, callback, data);
654
+		if (entry->is_dir)
655
+		    free(entry->filename);
656
+		if (entry->statbuf)
657
+		    free(entry->statbuf);
658
+		if (ret != CL_SUCCESS)
659
+		    break;
660
+	    }
661
+	    free(entries);
662
+	}
663
+    } else {
664
+	ret = callback(NULL, NULL, dirname, error_stat, data);
665
+    }
666
+    return ret;
667
+}