git-svn-id: file:///var/lib/svn/clamav-devel/trunk/clamav-devel@768 77e5149b-7576-45b1-b177-96237e5ba77b
Nigel Horne authored on 2004/08/19 06:39:36... | ... |
@@ -1,3 +1,7 @@ |
1 |
+Wed Aug 18 22:38:39 BST 2004 (njh) |
|
2 |
+---------------------------------- |
|
3 |
+ * libclamav/mbox.c: Multithread the followURL code - URLs are now followed in parallel |
|
4 |
+ |
|
1 | 5 |
Wed Aug 18 20:37:42 CEST 2004 (tk) |
2 | 6 |
---------------------------------- |
3 | 7 |
* libclamav/contrib: Include database optimisation tool (optimize/optimize.c). |
... | ... |
@@ -17,6 +17,9 @@ |
17 | 17 |
* |
18 | 18 |
* Change History: |
19 | 19 |
* $Log: mbox.c,v $ |
20 |
+ * Revision 1.105 2004/08/18 21:35:08 nigelhorne |
|
21 |
+ * Multithread the FollowURL calls |
|
22 |
+ * |
|
20 | 23 |
* Revision 1.104 2004/08/18 15:53:43 nigelhorne |
21 | 24 |
* Honour CL_MAILURL |
22 | 25 |
* |
... | ... |
@@ -300,7 +303,7 @@ |
300 | 300 |
* Compilable under SCO; removed duplicate code with message.c |
301 | 301 |
* |
302 | 302 |
*/ |
303 |
-static char const rcsid[] = "$Id: mbox.c,v 1.104 2004/08/18 15:53:43 nigelhorne Exp $"; |
|
303 |
+static char const rcsid[] = "$Id: mbox.c,v 1.105 2004/08/18 21:35:08 nigelhorne Exp $"; |
|
304 | 304 |
|
305 | 305 |
#if HAVE_CONFIG_H |
306 | 306 |
#include "clamav-config.h" |
... | ... |
@@ -415,7 +418,16 @@ static bool saveFile(const blob *b, const char *dir); |
415 | 415 |
|
416 | 416 |
static void checkURLs(message *m, const char *dir); |
417 | 417 |
#ifdef WITH_CURL |
418 |
-static void getURL(const char *url, const char *dir, const char *filename); |
|
418 |
+struct arg { |
|
419 |
+ char *url; |
|
420 |
+ char *dir; |
|
421 |
+ char *filename; |
|
422 |
+}; |
|
423 |
+#ifdef CL_THREAD_SAFE |
|
424 |
+static void *getURL(void *a); |
|
425 |
+#else |
|
426 |
+static void *getURL(struct arg *arg); |
|
427 |
+#endif |
|
419 | 428 |
#endif |
420 | 429 |
|
421 | 430 |
|
... | ... |
@@ -2307,6 +2319,10 @@ checkURLs(message *m, const char *dir) |
2307 | 2307 |
size_t len; |
2308 | 2308 |
table_t *t; |
2309 | 2309 |
int n; |
2310 |
+#if defined(WITH_CURL) && defined(CL_THREAD_SAFE) |
|
2311 |
+ pthread_t tid[MAX_URLS]; |
|
2312 |
+ struct arg args[MAX_URLS]; |
|
2313 |
+#endif |
|
2310 | 2314 |
|
2311 | 2315 |
if(b == NULL) |
2312 | 2316 |
return; |
... | ... |
@@ -2335,13 +2351,18 @@ checkURLs(message *m, const char *dir) |
2335 | 2335 |
while(len >= 8) { |
2336 | 2336 |
/* FIXME: allow any number of white space */ |
2337 | 2337 |
if(strncasecmp(ptr, "<a href=", 8) == 0) { |
2338 |
-#ifndef WITH_CURL |
|
2338 |
+#ifdef WITH_CURL |
|
2339 |
+#ifndef CL_THREAD_SAFE |
|
2340 |
+ struct arg arg; |
|
2341 |
+#endif |
|
2342 |
+ |
|
2343 |
+#else /*!WITH_CURL*/ |
|
2339 | 2344 |
#ifdef CL_THREAD_SAFE |
2340 | 2345 |
static pthread_mutex_t system_mutex = PTHREAD_MUTEX_INITIALIZER; |
2341 | 2346 |
#endif |
2342 | 2347 |
struct stat statb; |
2343 | 2348 |
char cmd[512]; |
2344 |
-#endif |
|
2349 |
+#endif /*WITH_CURL*/ |
|
2345 | 2350 |
char *p2 = &ptr[8]; |
2346 | 2351 |
char *p3; |
2347 | 2352 |
char name[512]; |
... | ... |
@@ -2365,6 +2386,10 @@ checkURLs(message *m, const char *dir) |
2365 | 2365 |
continue; |
2366 | 2366 |
if(*p2 == '\0') |
2367 | 2367 |
continue; |
2368 |
+ if(n == MAX_URLS) { |
|
2369 |
+ cli_warnmsg("Not all URLs will be scanned\n"); |
|
2370 |
+ break; |
|
2371 |
+ } |
|
2368 | 2372 |
if(tableFind(t, p2) == 1) { |
2369 | 2373 |
cli_dbgmsg("URL %s already downloaded\n", p2); |
2370 | 2374 |
continue; |
... | ... |
@@ -2377,7 +2402,18 @@ checkURLs(message *m, const char *dir) |
2377 | 2377 |
*p3 = '_'; |
2378 | 2378 |
|
2379 | 2379 |
#ifdef WITH_CURL |
2380 |
- getURL(p2, dir, name); |
|
2380 |
+#ifdef CL_THREAD_SAFE |
|
2381 |
+ args[n].url = strdup(p2); |
|
2382 |
+ args[n].dir = strdup(dir); |
|
2383 |
+ args[n].filename = strdup(name); |
|
2384 |
+ pthread_create(&tid[n], NULL, getURL, &args[n]); |
|
2385 |
+#else |
|
2386 |
+ arg.url = p2; |
|
2387 |
+ arg.dir = dir; |
|
2388 |
+ arg.filename = name; |
|
2389 |
+ getURL(&arg); |
|
2390 |
+#endif |
|
2391 |
+ |
|
2381 | 2392 |
#else |
2382 | 2393 |
/* |
2383 | 2394 |
* TODO: maximum size and timeouts |
... | ... |
@@ -2401,48 +2437,78 @@ checkURLs(message *m, const char *dir) |
2401 | 2401 |
(void)unlink(cmd); |
2402 | 2402 |
} |
2403 | 2403 |
#endif |
2404 |
- if(++n > MAX_URLS) { |
|
2405 |
- cli_warnmsg("Not all URLs will be scanned\n"); |
|
2406 |
- break; |
|
2407 |
- } |
|
2404 |
+ ++n; |
|
2408 | 2405 |
} |
2409 | 2406 |
ptr++; |
2410 | 2407 |
len--; |
2411 | 2408 |
} |
2412 | 2409 |
blobDestroy(b); |
2413 | 2410 |
tableDestroy(t); |
2411 |
+ |
|
2412 |
+#if defined(WITH_CURL) && defined(CL_THREAD_SAFE) |
|
2413 |
+ cli_dbgmsg("checkURLs: waiting for %d thread(s) to finish\n", n); |
|
2414 |
+ while(--n >= 0) { |
|
2415 |
+ pthread_join(tid[n], NULL); |
|
2416 |
+ free(args[n].url); |
|
2417 |
+ free(args[n].dir); |
|
2418 |
+ free(args[n].filename); |
|
2419 |
+ } |
|
2420 |
+#endif |
|
2414 | 2421 |
} |
2415 | 2422 |
|
2416 | 2423 |
#ifdef WITH_CURL |
2424 |
+static void * |
|
2425 |
+#ifdef CL_THREAD_SAFE |
|
2426 |
+getURL(void *a) |
|
2427 |
+#else |
|
2417 | 2428 |
static void |
2418 |
-getURL(const char *url, const char *dir, const char *filename) |
|
2429 |
+getURL(struct arg *arg) |
|
2430 |
+#endif |
|
2419 | 2431 |
{ |
2420 | 2432 |
char *fout; |
2421 | 2433 |
CURL *curl; |
2422 | 2434 |
FILE *fp; |
2423 | 2435 |
struct curl_slist *headers; |
2424 | 2436 |
static int initialised = 0; |
2437 |
+#ifdef CL_THREAD_SAFE |
|
2438 |
+ static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER; |
|
2439 |
+ struct arg *arg = (struct arg *)a; |
|
2440 |
+#endif |
|
2441 |
+ const char *url = arg->url; |
|
2442 |
+ const char *dir = arg->dir; |
|
2443 |
+ const char *filename = arg->filename; |
|
2425 | 2444 |
|
2445 |
+#ifdef CL_THREAD_SAFE |
|
2446 |
+ pthread_mutex_lock(&init_mutex); |
|
2447 |
+#endif |
|
2426 | 2448 |
if(!initialised) { |
2427 |
- if(curl_global_init(CURL_GLOBAL_NOTHING) != 0) |
|
2428 |
- return; |
|
2449 |
+ if(curl_global_init(CURL_GLOBAL_NOTHING) != 0) { |
|
2450 |
+#ifdef CL_THREAD_SAFE |
|
2451 |
+ pthread_mutex_unlock(&init_mutex); |
|
2452 |
+#endif |
|
2453 |
+ return NULL; |
|
2454 |
+ } |
|
2429 | 2455 |
initialised = 1; |
2430 | 2456 |
} |
2457 |
+#ifdef CL_THREAD_SAFE |
|
2458 |
+ pthread_mutex_unlock(&init_mutex); |
|
2459 |
+#endif |
|
2460 |
+ |
|
2431 | 2461 |
/* easy isn't the word I'd use... */ |
2432 | 2462 |
curl = curl_easy_init(); |
2433 | 2463 |
if(curl == NULL) |
2434 |
- return; |
|
2464 |
+ return NULL; |
|
2435 | 2465 |
|
2436 | 2466 |
(void)curl_easy_setopt(curl, CURLOPT_USERAGENT, "www.clamav.net"); |
2437 | 2467 |
|
2438 | 2468 |
if(curl_easy_setopt(curl, CURLOPT_URL, url) != 0) |
2439 |
- return; |
|
2469 |
+ return NULL; |
|
2440 | 2470 |
|
2441 | 2471 |
fout = cli_malloc(strlen(dir) + strlen(filename) + 2); |
2442 | 2472 |
|
2443 | 2473 |
if(fout == NULL) { |
2444 | 2474 |
curl_easy_cleanup(curl); |
2445 |
- return; |
|
2475 |
+ return NULL; |
|
2446 | 2476 |
} |
2447 | 2477 |
|
2448 | 2478 |
sprintf(fout, "%s/%s", dir, filename); |
... | ... |
@@ -2453,8 +2519,15 @@ getURL(const char *url, const char *dir, const char *filename) |
2453 | 2453 |
perror(fout); |
2454 | 2454 |
free(fout); |
2455 | 2455 |
curl_easy_cleanup(curl); |
2456 |
- return; |
|
2456 |
+ return NULL; |
|
2457 | 2457 |
} |
2458 |
+ if(curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp) != 0) { |
|
2459 |
+ fclose(fp); |
|
2460 |
+ free(fout); |
|
2461 |
+ curl_easy_cleanup(curl); |
|
2462 |
+ return NULL; |
|
2463 |
+ } |
|
2464 |
+ |
|
2458 | 2465 |
/* |
2459 | 2466 |
* If an item is in squid's cache get it from there (TCP_HIT/200) |
2460 | 2467 |
* by default curl doesn't (TCP_CLIENT_REFRESH_MISS/200) |
... | ... |
@@ -2462,13 +2535,6 @@ getURL(const char *url, const char *dir, const char *filename) |
2462 | 2462 |
headers = curl_slist_append(NULL, "Pragma:"); |
2463 | 2463 |
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); |
2464 | 2464 |
|
2465 |
- if(curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp) != 0) { |
|
2466 |
- fclose(fp); |
|
2467 |
- free(fout); |
|
2468 |
- curl_easy_cleanup(curl); |
|
2469 |
- return; |
|
2470 |
- } |
|
2471 |
- |
|
2472 | 2465 |
/* These should be customisable */ |
2473 | 2466 |
curl_easy_setopt(curl, CURLOPT_TIMEOUT, 30); |
2474 | 2467 |
curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 10); |
... | ... |
@@ -2476,14 +2542,29 @@ getURL(const char *url, const char *dir, const char *filename) |
2476 | 2476 |
curl_easy_setopt(curl, CURLOPT_MAXFILESIZE, 50*1024); |
2477 | 2477 |
#endif |
2478 | 2478 |
|
2479 |
+#ifdef CL_THREAD_SAFE |
|
2480 |
+ curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0); |
|
2481 |
+#endif |
|
2482 |
+ /* |
|
2483 |
+ * FIXME: valgrind reports "pthread_mutex_unlock: mutex is not locked" |
|
2484 |
+ * from gethostbyaddr_r within this. It may be a bug in libcurl |
|
2485 |
+ * rather than this code, but I need to check, see Curl_resolv() |
|
2486 |
+ * If pushed really hard it will sometimes say |
|
2487 |
+ * Conditional jump or move depends on uninitialised value(s) and |
|
2488 |
+ * quit. But the program seems to work OK without valgrind... |
|
2489 |
+ * Perhaps Curl_resolv() isn't thread safe? |
|
2490 |
+ */ |
|
2479 | 2491 |
if(curl_easy_perform(curl) != CURLE_OK) { |
2480 | 2492 |
cli_warnmsg("URL %s failed to download\n", url); |
2481 | 2493 |
unlink(fout); |
2482 | 2494 |
} |
2483 | 2495 |
|
2484 | 2496 |
fclose(fp); |
2497 |
+ curl_slist_free_all(headers); |
|
2485 | 2498 |
curl_easy_cleanup(curl); |
2486 | 2499 |
free(fout); |
2500 |
+ |
|
2501 |
+ return NULL; |
|
2487 | 2502 |
} |
2488 | 2503 |
#endif |
2489 | 2504 |
|