git-svn: trunk@2930
Nigel Horne authored on 2007/03/11 10:56:08... | ... |
@@ -130,27 +130,8 @@ typedef enum { |
130 | 130 |
|
131 | 131 |
#define SAVE_TO_DISC /* multipart/message are saved in a temporary file */ |
132 | 132 |
|
133 |
-#ifndef CL_EXPERIMENTAL |
|
134 |
-/* |
|
135 |
- * Code does exist to run FOLLOWURLS on systems without libcurl, however that |
|
136 |
- * is not recommended so it is not compiled by default |
|
137 |
- * |
|
138 |
- * On Solaris, when using the GNU C compiler, the clamAV build system uses the |
|
139 |
- * Sun supplied ld instead of the GNU ld causing an error. Therefore you cannot |
|
140 |
- * use WITH_CURL on Solaris with gcc, you must configure with |
|
141 |
- * "--without-libcurl". I don't know if it works with Sun's own compiler |
|
142 |
- * |
|
143 |
- * Fails to link on Solaris 10 with this error: |
|
144 |
- * Undefined first referenced |
|
145 |
- * symbol in file |
|
146 |
- * __floatdidf /opt/sfw/lib/libcurl.s |
|
147 |
- */ |
|
148 |
-#if defined(C_SOLARIS) && defined(__GNUC__) |
|
149 | 133 |
#undef WITH_CURL |
150 |
-#endif |
|
151 |
-#endif |
|
152 | 134 |
|
153 |
-#if defined(WITH_CURL) || defined(CL_EXPERIMENTAL) |
|
154 | 135 |
#define FOLLOWURLS 5 /* |
155 | 136 |
* Maximum number of URLs scanned in a message |
156 | 137 |
* part. Helps to prevent Dialer.gen-45 and |
... | ... |
@@ -158,7 +139,6 @@ typedef enum { |
158 | 158 |
* dispatched by emails which point to it. If |
159 | 159 |
* not defined, don't check any URLs |
160 | 160 |
*/ |
161 |
-#endif |
|
162 | 161 |
|
163 | 162 |
#ifdef FOLLOWURLS |
164 | 163 |
#include "htmlnorm.h" |
... | ... |
@@ -168,8 +148,6 @@ typedef enum { |
168 | 168 |
#include "phishcheck.h" |
169 | 169 |
#endif |
170 | 170 |
|
171 |
-#ifdef FOLLOWURLS |
|
172 |
- |
|
173 | 171 |
#ifndef C_WINDOWS |
174 | 172 |
#include <netdb.h> |
175 | 173 |
#include <sys/socket.h> |
... | ... |
@@ -184,8 +162,6 @@ typedef enum { |
184 | 184 |
#define closesocket(s) close(s) |
185 | 185 |
#endif |
186 | 186 |
|
187 |
-#ifdef CL_EXPERIMENTAL /* dropping curl support */ |
|
188 |
- |
|
189 | 187 |
#include <fcntl.h> |
190 | 188 |
#ifndef C_WINDOWS |
191 | 189 |
#include <sys/time.h> |
... | ... |
@@ -209,55 +185,6 @@ typedef unsigned int in_addr_t; |
209 | 209 |
#define EISCONN WSAEISCONN |
210 | 210 |
#endif |
211 | 211 |
|
212 |
-#else |
|
213 |
- |
|
214 |
-#ifdef WITH_CURL /* Set in configure */ |
|
215 |
-/* |
|
216 |
- * To build with WITH_CURL: |
|
217 |
- * LDFLAGS=`curl-config --libs` ./configure ... |
|
218 |
- */ |
|
219 |
-#include <curl/curl.h> |
|
220 |
- |
|
221 |
-/* |
|
222 |
- * Needs curl >= 7.11 (I've heard that 7.9 can cause crashes and I have seen |
|
223 |
- * 7.10 segfault, later versions can be flakey as well) |
|
224 |
- * untested) |
|
225 |
- * |
|
226 |
- * Even 7.15 crashes, valgrind shows this: |
|
227 |
- * ==2835== Warning: client switching stacks? SP change: 0xBEB0FD2C --> 0xD0678F0 |
|
228 |
-* ==2835== to suppress, use: --max-stackframe=1314225092 or greater |
|
229 |
- |
|
230 |
- * ==2835== Invalid write of size 4 |
|
231 |
- * ==2835== at 0x40F67BD: Curl_resolv (in /usr/lib/libcurl.so.3.0.0) |
|
232 |
- * ==2835== Address 0xD0678F4 is on thread 1's stack |
|
233 |
- * ==2835== Can't extend stack to 0xD067390 during signal delivery for thread 1: |
|
234 |
- * ==2835== no stack segment |
|
235 |
- * ==2835== |
|
236 |
- * ==2835== Process terminating with default action of signal 11 (SIGSEGV) |
|
237 |
- * ==2835== Access not within mapped region at address 0xD067390 |
|
238 |
- * ==2835== at 0x40F67BD: Curl_resolv (in /usr/lib/libcurl.so.3.0.0) |
|
239 |
- * |
|
240 |
- * This bug has been reported upstream, however they claim that the bug |
|
241 |
- * does not exist :-(. I have received reports that 7.15.5 suffers from the |
|
242 |
- * same problem in Curl_resolv |
|
243 |
- * |
|
244 |
- * TODO: Drop curl and do it ourselves |
|
245 |
- */ |
|
246 |
-#if (LIBCURL_VERSION_NUM < 0x070B00) |
|
247 |
-#undef WITH_CURL /* also undef FOLLOWURLS? */ |
|
248 |
-#endif |
|
249 |
- |
|
250 |
-#else |
|
251 |
-#error "FOLLOWURLS without CURL is no longer supported" |
|
252 |
- |
|
253 |
-#endif /*WITH_CURL*/ |
|
254 |
- |
|
255 |
-#endif /* CL_EXPERIMENTAL */ |
|
256 |
- |
|
257 |
-#else /*!FOLLOWURLS*/ |
|
258 |
-#undef WITH_CURL |
|
259 |
-#endif /*FOLLOWURLS*/ |
|
260 |
- |
|
261 | 212 |
/* |
262 | 213 |
* Define this to handle messages covered by section 7.3.2 of RFC1341. |
263 | 214 |
* This is experimental code so it is up to YOU to (1) ensure it's secure |
... | ... |
@@ -312,22 +239,14 @@ static int count_quotes(const char *buf); |
312 | 312 |
static bool next_is_folded_header(const text *t); |
313 | 313 |
static bool newline_in_header(const char *line); |
314 | 314 |
|
315 |
-static void checkURLs(message *m, mbox_ctx *mctx, mbox_status *rc, int is_html); |
|
316 |
- |
|
317 |
-#ifdef CL_EXPERIMENTAL |
|
318 |
-static void do_checkURLs(message *m, const char *dir, tag_arguments_t *hrefs); |
|
319 | 315 |
static blob *getHrefs(message *m, tag_arguments_t *hrefs); |
320 | 316 |
static void hrefs_done(blob *b, tag_arguments_t *hrefs); |
321 |
-#endif |
|
317 |
+static void checkURLs(message *m, mbox_ctx *mctx, mbox_status *rc, int is_html); |
|
318 |
+static void do_checkURLs(const char *dir, tag_arguments_t *hrefs); |
|
322 | 319 |
|
323 | 320 |
#if defined(FOLLOWURLS) && (FOLLOWURLS > 0) |
324 | 321 |
struct arg { |
325 |
-#ifdef CL_EXPERIMENTAL |
|
326 | 322 |
char *url; |
327 |
-#else |
|
328 |
- CURL *curl; |
|
329 |
- const char *url; |
|
330 |
-#endif |
|
331 | 323 |
const char *dir; |
332 | 324 |
char *filename; |
333 | 325 |
}; |
... | ... |
@@ -1250,12 +1169,6 @@ cli_parse_mbox(const char *dir, int desc, cli_ctx *ctx) |
1250 | 1250 |
char tmpfilename[16]; |
1251 | 1251 |
int tmpfd; |
1252 | 1252 |
#endif |
1253 |
-#if defined(FOLLOWURLS) && (!defined(CL_EXPERIMENTAL)) |
|
1254 |
- static int initialised = 0; |
|
1255 |
-#ifdef CL_THREAD_SAFE |
|
1256 |
- static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER; |
|
1257 |
-#endif |
|
1258 |
-#endif |
|
1259 | 1253 |
|
1260 | 1254 |
#ifdef NEW_WORLD |
1261 | 1255 |
cli_dbgmsg("fall back to old world\n"); |
... | ... |
@@ -1263,27 +1176,6 @@ cli_parse_mbox(const char *dir, int desc, cli_ctx *ctx) |
1263 | 1263 |
cli_dbgmsg("in mbox()\n"); |
1264 | 1264 |
#endif |
1265 | 1265 |
|
1266 |
-#if defined(FOLLOWURLS) && (!defined(CL_EXPERIMENTAL)) |
|
1267 |
- if(ctx->options&CL_SCAN_MAILURL) { |
|
1268 |
-#ifdef CL_THREAD_SAFE |
|
1269 |
- pthread_mutex_lock(&init_mutex); |
|
1270 |
-#endif |
|
1271 |
- if(!initialised) { |
|
1272 |
- if(curl_global_init(CURL_GLOBAL_ALL) != 0) { |
|
1273 |
-#ifdef CL_THREAD_SAFE |
|
1274 |
- pthread_mutex_unlock(&init_mutex); |
|
1275 |
-#endif |
|
1276 |
- cli_warnmsg("curl_global_init failed, disabling mail-follow-urls"); |
|
1277 |
- ctx->options &= ~CL_SCAN_MAILURL; |
|
1278 |
- } |
|
1279 |
- initialised = 1; |
|
1280 |
- } |
|
1281 |
-#ifdef CL_THREAD_SAFE |
|
1282 |
- pthread_mutex_unlock(&init_mutex); |
|
1283 |
-#endif |
|
1284 |
- } |
|
1285 |
-#endif |
|
1286 |
- |
|
1287 | 1266 |
i = dup(desc); |
1288 | 1267 |
if((fd = fdopen(i, "rb")) == NULL) { |
1289 | 1268 |
cli_errmsg("Can't open descriptor %d\n", desc); |
... | ... |
@@ -3930,7 +3822,6 @@ rfc1341(message *m, const char *dir) |
3930 | 3930 |
} |
3931 | 3931 |
#endif |
3932 | 3932 |
|
3933 |
-#ifdef CL_EXPERIMENTAL |
|
3934 | 3933 |
static void |
3935 | 3934 |
hrefs_done(blob *b, tag_arguments_t *hrefs) |
3936 | 3935 |
{ |
... | ... |
@@ -3982,11 +3873,16 @@ getHrefs(message *m, tag_arguments_t *hrefs) |
3982 | 3982 |
return b; |
3983 | 3983 |
} |
3984 | 3984 |
|
3985 |
+#ifdef CL_EXPERIMENTAL |
|
3986 |
+/* |
|
3987 |
+ * Experimental: validate URLs for phishes |
|
3988 |
+ * followurls: see if URLs point to malware |
|
3989 |
+ */ |
|
3985 | 3990 |
static void |
3986 | 3991 |
checkURLs(message *mainMessage, mbox_ctx *mctx, mbox_status *rc, int is_html) |
3987 | 3992 |
{ |
3988 |
- tag_arguments_t hrefs; |
|
3989 | 3993 |
blob *b; |
3994 |
+ tag_arguments_t hrefs; |
|
3990 | 3995 |
|
3991 | 3996 |
/* aCaB: stripped GA related stuff */ |
3992 | 3997 |
hrefs.scanContents = mctx->ctx->engine->dboptions&CL_DB_PHISHING_URLS; |
... | ... |
@@ -4014,14 +3910,36 @@ checkURLs(message *mainMessage, mbox_ctx *mctx, mbox_status *rc, int is_html) |
4014 | 4014 |
} |
4015 | 4015 |
} |
4016 | 4016 |
if(is_html && (mctx->ctx->options&CL_SCAN_MAILURL) && (*rc != VIRUS)) |
4017 |
- do_checkURLs(mainMessage, mctx->dir, &hrefs); |
|
4017 |
+ do_checkURLs(mctx->dir, &hrefs); |
|
4018 | 4018 |
} |
4019 | 4019 |
hrefs_done(b,&hrefs); |
4020 | 4020 |
} |
4021 | 4021 |
|
4022 |
+#else /*!CL_EXPERIMENTAL*/ |
|
4023 |
+ |
|
4024 |
+static void |
|
4025 |
+checkURLs(message *mainMessage, mbox_ctx *mctx, mbox_status *rc, int is_html) |
|
4026 |
+{ |
|
4027 |
+ blob *b; |
|
4028 |
+ tag_arguments_t hrefs; |
|
4029 |
+ |
|
4030 |
+ if(!is_html || (!(mctx->ctx->options&CL_SCAN_MAILURL)) || (*rc == VIRUS)) |
|
4031 |
+ return; |
|
4032 |
+ |
|
4033 |
+ hrefs.count = 0; |
|
4034 |
+ hrefs.tag = hrefs.value = NULL; |
|
4035 |
+ hrefs.contents = NULL; |
|
4036 |
+ |
|
4037 |
+ b = getHrefs(mainMessage, &hrefs); |
|
4038 |
+ if(b) |
|
4039 |
+ do_checkURLs(mctx->dir, &hrefs); |
|
4040 |
+ hrefs_done(b, &hrefs); |
|
4041 |
+} |
|
4042 |
+#endif /*CL_EXPERIMENTAL*/ |
|
4043 |
+ |
|
4022 | 4044 |
#if defined(FOLLOWURLS) && (FOLLOWURLS > 0) |
4023 | 4045 |
static void |
4024 |
-do_checkURLs(message *m, const char *dir, tag_arguments_t *hrefs) |
|
4046 |
+do_checkURLs(const char *dir, tag_arguments_t *hrefs) |
|
4025 | 4047 |
{ |
4026 | 4048 |
table_t *t; |
4027 | 4049 |
int i, n; |
... | ... |
@@ -4057,16 +3975,13 @@ do_checkURLs(message *m, const char *dir, tag_arguments_t *hrefs) |
4057 | 4057 |
} |
4058 | 4058 |
/* |
4059 | 4059 |
* What about foreign character spoofing? |
4060 |
- * It would be useful be able to check if url |
|
4061 |
- * is the same as the text displayed, e.g. |
|
4062 |
- * <a href="http://dodgy.biz">www.paypal.com</a> |
|
4063 |
- * but that needs support from HTML normalise |
|
4064 | 4060 |
*/ |
4065 | 4061 |
if(strchr(url, '%') && strchr(url, '@')) |
4066 | 4062 |
cli_warnmsg("Possible URL spoofing attempt noticed, but not yet handled (%s)\n", url); |
4067 | 4063 |
|
4068 | 4064 |
if(n == FOLLOWURLS) { |
4069 |
- cli_warnmsg("URL %s will not be scanned\n", url); |
|
4065 |
+ cli_warnmsg("URL %s will not be scanned (FOLLOWURLS limit %d was reached)\n", |
|
4066 |
+ url, FOLLOWURLS); |
|
4070 | 4067 |
break; |
4071 | 4068 |
} |
4072 | 4069 |
|
... | ... |
@@ -4105,200 +4020,15 @@ do_checkURLs(message *m, const char *dir, tag_arguments_t *hrefs) |
4105 | 4105 |
} |
4106 | 4106 |
#endif |
4107 | 4107 |
} |
4108 |
-#else |
|
4109 |
-static void |
|
4110 |
-do_checkURLs(message *m, const char *dir, tag_arguments_t *hrefs) |
|
4111 |
-{ |
|
4112 |
-} |
|
4113 |
-#endif |
|
4114 | 4108 |
|
4115 |
-#else /*!CL_EXPERIMENTAL*/ |
|
4109 |
+#else /*!FOLLOWURLS*/ |
|
4116 | 4110 |
|
4117 |
-#if defined(FOLLOWURLS) && (FOLLOWURLS > 0) |
|
4118 | 4111 |
static void |
4119 |
-checkURLs(message *m, mbox_ctx *mctx, mbox_status *rc, int is_html) |
|
4112 |
+do_checkURLs(const char *dir, tag_arguments_t *hrefs) |
|
4120 | 4113 |
{ |
4121 |
- blob *b = messageToBlob(m, 0); |
|
4122 |
- size_t len; |
|
4123 |
- table_t *t; |
|
4124 |
- int i, n; |
|
4125 |
-#if defined(WITH_CURL) && defined(CL_THREAD_SAFE) |
|
4126 |
- pthread_t tid[FOLLOWURLS]; |
|
4127 |
- struct arg args[FOLLOWURLS]; |
|
4128 |
-#endif |
|
4129 |
- tag_arguments_t hrefs; |
|
4130 |
- |
|
4131 |
- if(b == NULL) |
|
4132 |
- return; |
|
4133 |
- |
|
4134 |
- len = blobGetDataSize(b); |
|
4135 |
- |
|
4136 |
- if(len == 0) { |
|
4137 |
- blobDestroy(b); |
|
4138 |
- return; |
|
4139 |
- } |
|
4140 |
- |
|
4141 |
- /* TODO: make this size customisable */ |
|
4142 |
- if(len > 100*1024) { |
|
4143 |
- cli_warnmsg("Viruses pointed to by URL not scanned in large message\n"); |
|
4144 |
- blobDestroy(b); |
|
4145 |
- return; |
|
4146 |
- } |
|
4147 |
- |
|
4148 |
- t = tableCreate(); |
|
4149 |
- if(t == NULL) { |
|
4150 |
- blobDestroy(b); |
|
4151 |
- return; |
|
4152 |
- } |
|
4153 |
- |
|
4154 |
- hrefs.count = 0; |
|
4155 |
- hrefs.tag = hrefs.value = NULL; |
|
4156 |
- |
|
4157 |
- cli_dbgmsg("checkURLs: calling html_normalise_mem\n"); |
|
4158 |
- if(!html_normalise_mem(blobGetData(b), len, NULL, &hrefs)) { |
|
4159 |
- blobDestroy(b); |
|
4160 |
- tableDestroy(t); |
|
4161 |
- return; |
|
4162 |
- } |
|
4163 |
- cli_dbgmsg("checkURLs: html_normalise_mem returned\n"); |
|
4164 |
- |
|
4165 |
- /* TODO: Do we need to call remove_html_comments? */ |
|
4166 |
- |
|
4167 |
- n = 0; |
|
4168 |
- |
|
4169 |
- for(i = 0; i < hrefs.count; i++) { |
|
4170 |
- const char *url = (const char *)hrefs.value[i]; |
|
4171 |
- |
|
4172 |
- /* |
|
4173 |
- * TODO: If it's an image source, it'd be nice to note beacons |
|
4174 |
- * where width="0" height="0", which needs support from |
|
4175 |
- * the HTML normalise code |
|
4176 |
- */ |
|
4177 |
- if(strncasecmp("http://", url, 7) == 0) { |
|
4178 |
- char *ptr; |
|
4179 |
-#ifdef WITH_CURL |
|
4180 |
-#ifndef CL_THREAD_SAFE |
|
4181 |
- struct arg arg; |
|
4182 |
-#endif |
|
4183 |
- |
|
4184 |
-#else /*!WITH_CURL*/ |
|
4185 |
-#ifdef CL_THREAD_SAFE |
|
4186 |
- static pthread_mutex_t system_mutex = PTHREAD_MUTEX_INITIALIZER; |
|
4187 |
-#endif |
|
4188 |
- struct stat statb; |
|
4189 |
- char cmd[512]; |
|
4190 |
-#endif /*WITH_CURL*/ |
|
4191 |
- char name[NAME_MAX + 1]; |
|
4192 |
- |
|
4193 |
- if(tableFind(t, url) == 1) { |
|
4194 |
- cli_dbgmsg("URL %s already downloaded\n", url); |
|
4195 |
- continue; |
|
4196 |
- } |
|
4197 |
- /* |
|
4198 |
- * What about foreign character spoofing? |
|
4199 |
- * It would be useful be able to check if url |
|
4200 |
- * is the same as the text displayed, e.g. |
|
4201 |
- * <a href="http://dodgy.biz">www.paypal.com</a> |
|
4202 |
- * but that needs support from HTML normalise |
|
4203 |
- */ |
|
4204 |
- if(strchr(url, '%') && strchr(url, '@')) |
|
4205 |
- cli_warnmsg("Possible URL spoofing attempt noticed, but not yet handled (%s)\n", url); |
|
4206 |
- |
|
4207 |
- if(n == FOLLOWURLS) { |
|
4208 |
- cli_warnmsg("URL %s will not be scanned\n", url); |
|
4209 |
- break; |
|
4210 |
- } |
|
4211 |
- |
|
4212 |
- (void)tableInsert(t, url, 1); |
|
4213 |
- cli_dbgmsg("Downloading URL %s to be scanned\n", url); |
|
4214 |
- strncpy(name, url, sizeof(name) - 1); |
|
4215 |
- name[sizeof(name) - 1] = '\0'; |
|
4216 |
- for(ptr = name; *ptr; ptr++) |
|
4217 |
- if(*ptr == '/') |
|
4218 |
- *ptr = '_'; |
|
4219 |
- |
|
4220 |
-#ifdef WITH_CURL |
|
4221 |
-#ifdef CL_THREAD_SAFE |
|
4222 |
- args[n].curl = curl_easy_init(); |
|
4223 |
- if(args[n].curl == NULL) { |
|
4224 |
- cli_errmsg("curl_easy_init failed\n"); |
|
4225 |
- continue; |
|
4226 |
- } |
|
4227 |
- args[n].dir = mctx->dir; |
|
4228 |
- args[n].url = url; |
|
4229 |
- args[n].filename = cli_strdup(name); |
|
4230 |
- pthread_create(&tid[n], NULL, getURL, &args[n]); |
|
4231 |
-#else |
|
4232 |
- /* easy isn't the word I'd use... */ |
|
4233 |
- arg.curl = curl_easy_init(); |
|
4234 |
- if(arg.curl == NULL) { |
|
4235 |
- cli_errmsg("curl_easy_init failed\n"); |
|
4236 |
- continue; |
|
4237 |
- } |
|
4238 |
- arg.url = url; |
|
4239 |
- arg.dir = mctx->dir; |
|
4240 |
- arg.filename = name; |
|
4241 |
- getURL(&arg); |
|
4242 |
- curl_easy_cleanup(arg.curl); |
|
4243 |
-#endif |
|
4244 |
- |
|
4245 |
-#else /*!WITH_CURL*/ |
|
4246 |
- cli_warnmsg("The use of mail-follow-urls without CURL being installed is deprecated\n"); |
|
4247 |
- /* |
|
4248 |
- * TODO: maximum size and timeouts |
|
4249 |
- */ |
|
4250 |
- len = sizeof(cmd) - 26 - strlen(mctx->dir) - strlen(name); |
|
4251 |
-#ifdef CL_DEBUG |
|
4252 |
- snprintf(cmd, sizeof(cmd) - 1, "GET -t10 \"%.*s\" >%s/%s", len, url, mctx->dir, name); |
|
4253 |
-#else |
|
4254 |
- snprintf(cmd, sizeof(cmd) - 1, "GET -t10 \"%.*s\" >%s/%s 2>/dev/null", len, url, mctx->dir, name); |
|
4255 |
-#endif |
|
4256 |
- cmd[sizeof(cmd) - 1] = '\0'; |
|
4257 |
- |
|
4258 |
- cli_dbgmsg("%s\n", cmd); |
|
4259 |
-#ifdef CL_THREAD_SAFE |
|
4260 |
- pthread_mutex_lock(&system_mutex); |
|
4261 |
-#endif |
|
4262 |
- system(cmd); |
|
4263 |
-#ifdef CL_THREAD_SAFE |
|
4264 |
- pthread_mutex_unlock(&system_mutex); |
|
4265 |
-#endif |
|
4266 |
- snprintf(cmd, sizeof(cmd), "%s/%s", mctx->dir, name); |
|
4267 |
- if(stat(cmd, &statb) >= 0) |
|
4268 |
- if(statb.st_size == 0) { |
|
4269 |
- cli_warnmsg("URL %s failed to download\n", url); |
|
4270 |
- /* |
|
4271 |
- * Don't bother scanning an empty file |
|
4272 |
- */ |
|
4273 |
- (void)unlink(cmd); |
|
4274 |
- } |
|
4275 |
-#endif |
|
4276 |
- ++n; |
|
4277 |
- } |
|
4278 |
- } |
|
4279 |
- blobDestroy(b); |
|
4280 |
- tableDestroy(t); |
|
4281 |
- |
|
4282 |
-#if defined(WITH_CURL) && defined(CL_THREAD_SAFE) |
|
4283 |
- assert(n <= FOLLOWURLS); |
|
4284 |
- cli_dbgmsg("checkURLs: waiting for %d thread(s) to finish\n", n); |
|
4285 |
- while(--n >= 0) { |
|
4286 |
- pthread_join(tid[n], NULL); |
|
4287 |
- free(args[n].filename); |
|
4288 |
- curl_easy_cleanup(args[n].curl); |
|
4289 |
- } |
|
4290 |
-#endif |
|
4291 |
- html_tag_arg_free(&hrefs); |
|
4292 | 4114 |
} |
4293 | 4115 |
|
4294 |
-#else |
|
4295 |
- |
|
4296 |
-static void |
|
4297 |
-checkURLs(message *m, mbox_ctx *mctx, mbox_status* rc, int is_html) |
|
4298 |
-{ |
|
4299 |
-} |
|
4300 | 4116 |
#endif |
4301 |
-#endif /* CL_EXPERIMENTAL */ |
|
4302 | 4117 |
|
4303 | 4118 |
#if defined(FOLLOWURLS) && (FOLLOWURLS > 0) |
4304 | 4119 |
/* |
... | ... |
@@ -4312,8 +4042,6 @@ checkURLs(message *m, mbox_ctx *mctx, mbox_status* rc, int is_html) |
4312 | 4312 |
* an issue here. |
4313 | 4313 |
*/ |
4314 | 4314 |
|
4315 |
-#if defined(CL_EXPERIMENTAL) || (!defined(WITH_CURL)) |
|
4316 |
- |
|
4317 | 4315 |
/* |
4318 | 4316 |
* Removing the reliance on libcurl |
4319 | 4317 |
* Includes some of the freshclam hacks by Everton da Silva Marques |
... | ... |
@@ -4763,7 +4491,7 @@ nonblock_connect(int sock, const struct sockaddr *addr, socklen_t addrlen, int s |
4763 | 4763 |
/* Launch (possibly) non-blocking connect() request */ |
4764 | 4764 |
if(connect(sock, addr, addrlen)) { |
4765 | 4765 |
int e = errno; |
4766 |
- cli_dbgmsg("DEBUG nonblock_connect: connect(): fd=%d errno=%d: %s\n", |
|
4766 |
+ cli_dbgmsg("nonblock_connect: connect(): fd=%d errno=%d: %s\n", |
|
4767 | 4767 |
sock, e, strerror(e)); |
4768 | 4768 |
switch (e) { |
4769 | 4769 |
case EALREADY: |
... | ... |
@@ -4811,7 +4539,7 @@ nonblock_connect(int sock, const struct sockaddr *addr, socklen_t addrlen, int s |
4811 | 4811 |
break; /* failed */ |
4812 | 4812 |
} |
4813 | 4813 |
|
4814 |
- cli_dbgmsg("DEBUG nonblock_connect: select = %d\n", n); |
|
4814 |
+ cli_dbgmsg("nonblock_connect: select = %d\n", n); |
|
4815 | 4815 |
|
4816 | 4816 |
if(n) |
4817 | 4817 |
return connect_error(sock); |
... | ... |
@@ -4847,148 +4575,6 @@ connect_error(int sock) |
4847 | 4847 |
#endif |
4848 | 4848 |
} |
4849 | 4849 |
|
4850 |
-#else |
|
4851 |
- |
|
4852 |
-static int curl_has_segfaulted; |
|
4853 |
-/* |
|
4854 |
- * Inspite of numerious bug reports, curl is still buggy :-( |
|
4855 |
- * For a fuller explanation, read the long comment at the top, including |
|
4856 |
- * the valgrind evidence |
|
4857 |
- */ |
|
4858 |
-static void |
|
4859 |
-curlsegv(int sig) |
|
4860 |
-{ |
|
4861 |
- curl_has_segfaulted = 1; |
|
4862 |
-} |
|
4863 |
- |
|
4864 |
-static void * |
|
4865 |
-#ifdef CL_THREAD_SAFE |
|
4866 |
-getURL(void *a) |
|
4867 |
-#else |
|
4868 |
-getURL(struct arg *arg) |
|
4869 |
-#endif |
|
4870 |
-{ |
|
4871 |
- FILE *fp; |
|
4872 |
- struct curl_slist *headers; |
|
4873 |
-#ifdef CL_THREAD_SAFE |
|
4874 |
- struct arg *arg = (struct arg *)a; |
|
4875 |
-#endif |
|
4876 |
- const char *url = arg->url; |
|
4877 |
- const char *dir = arg->dir; |
|
4878 |
- CURL *curl = arg->curl; |
|
4879 |
- const char *filename = arg->filename; |
|
4880 |
- void (*oldsegv)(int); |
|
4881 |
-#ifdef CURLOPT_ERRORBUFFER |
|
4882 |
- char errorbuffer[CURL_ERROR_SIZE + 1]; |
|
4883 |
-#elif (LIBCURL_VERSION_NUM >= 0x070C00) |
|
4884 |
- CURLcode res = CURLE_OK; |
|
4885 |
-#endif |
|
4886 |
- char fout[NAME_MAX + 1]; |
|
4887 |
- |
|
4888 |
- (void)curl_easy_setopt(curl, CURLOPT_USERAGENT, "www.clamav.net"); |
|
4889 |
- |
|
4890 |
- if(curl_easy_setopt(curl, CURLOPT_URL, url) != 0) { |
|
4891 |
- cli_errmsg("%s: curl_easy_setopt failed\n", url); |
|
4892 |
- return NULL; |
|
4893 |
- } |
|
4894 |
- |
|
4895 |
- snprintf(fout, sizeof(fout) - 1, "%s/%s", dir, filename); |
|
4896 |
- |
|
4897 |
- cli_dbgmsg("Saving %s to %s\n", url, fout); |
|
4898 |
- fp = fopen(fout, "wb"); |
|
4899 |
- |
|
4900 |
- if(fp == NULL) { |
|
4901 |
- cli_errmsg("Can't open '%s' for writing", fout); |
|
4902 |
- return NULL; |
|
4903 |
- } |
|
4904 |
-#ifdef CURLOPT_WRITEDATA |
|
4905 |
- if(curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp) != 0) { |
|
4906 |
- fclose(fp); |
|
4907 |
- return NULL; |
|
4908 |
- } |
|
4909 |
-#else |
|
4910 |
- if(curl_easy_setopt(curl, CURLOPT_FILE, fp) != 0) { |
|
4911 |
- fclose(fp); |
|
4912 |
- return NULL; |
|
4913 |
- } |
|
4914 |
-#endif |
|
4915 |
- |
|
4916 |
- /* |
|
4917 |
- * If an item is in squid's cache get it from there (TCP_HIT/200) |
|
4918 |
- * by default curl doesn't (TCP_CLIENT_REFRESH_MISS/200) |
|
4919 |
- */ |
|
4920 |
- headers = curl_slist_append(NULL, "Pragma:"); |
|
4921 |
- curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); |
|
4922 |
- |
|
4923 |
- /* These should be customisable */ |
|
4924 |
- curl_easy_setopt(curl, CURLOPT_TIMEOUT, 30); |
|
4925 |
- curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 10); |
|
4926 |
-#ifdef CURLOPT_MAXFILESIZE |
|
4927 |
- curl_easy_setopt(curl, CURLOPT_MAXFILESIZE, 50*1024); |
|
4928 |
-#endif |
|
4929 |
- |
|
4930 |
-#ifdef CL_THREAD_SAFE |
|
4931 |
-#ifdef CURLOPT_DNS_USE_GLOBAL_CACHE |
|
4932 |
- /* Apparently this is depracated */ |
|
4933 |
- /*curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);*/ |
|
4934 |
-#endif |
|
4935 |
-#endif |
|
4936 |
- |
|
4937 |
-#ifdef CL_THREAD_SAFE |
|
4938 |
-#ifdef CURLOPT_NOSIGNAL |
|
4939 |
- curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1); |
|
4940 |
-#endif |
|
4941 |
-#endif |
|
4942 |
- |
|
4943 |
- /* |
|
4944 |
- * Prevent password: prompting with older versions |
|
4945 |
- * FIXME: a better username? |
|
4946 |
- */ |
|
4947 |
- curl_easy_setopt(curl, CURLOPT_USERPWD, "username:password"); |
|
4948 |
- |
|
4949 |
- /* |
|
4950 |
- * FIXME: valgrind reports "pthread_mutex_unlock: mutex is not locked" |
|
4951 |
- * from gethostbyaddr_r within this. It may be a bug in libcurl |
|
4952 |
- * rather than this code, but I need to check, see Curl_resolv() |
|
4953 |
- * If pushed really hard it will sometimes say |
|
4954 |
- * Conditional jump or move depends on uninitialised value(s) and |
|
4955 |
- * quit. But the program seems to work OK without valgrind... |
|
4956 |
- * Perhaps Curl_resolv() isn't thread safe? |
|
4957 |
- * |
|
4958 |
- * I have seen segfaults in version 7.12.3. Version 7.14 seems OK. |
|
4959 |
- */ |
|
4960 |
- /* |
|
4961 |
- * On some C libraries (notably with FC3, glibc-2.3.3-74) you get a |
|
4962 |
- * memory leak here in getaddrinfo(), see |
|
4963 |
- * https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=139559 |
|
4964 |
- */ |
|
4965 |
- curl_has_segfaulted = 0; |
|
4966 |
- oldsegv = signal(SIGSEGV, curlsegv); |
|
4967 |
-#ifdef CURLOPT_ERRORBUFFER |
|
4968 |
- curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, errorbuffer); |
|
4969 |
- |
|
4970 |
- if(curl_easy_perform(curl) != CURLE_OK) |
|
4971 |
- cli_warnmsg("URL %s failed to download: %s\n", url, errorbuffer); |
|
4972 |
-#elif (LIBCURL_VERSION_NUM >= 0x070C00) |
|
4973 |
- if((res = curl_easy_perform(curl)) != CURLE_OK) |
|
4974 |
- cli_warnmsg("URL %s failed to download: %s\n", url, |
|
4975 |
- curl_easy_strerror(res)); |
|
4976 |
-#else |
|
4977 |
- if(curl_easy_perform(curl) != CURLE_OK) |
|
4978 |
- cli_warnmsg("URL %s failed to download\n", url); |
|
4979 |
-#endif |
|
4980 |
- |
|
4981 |
- fclose(fp); |
|
4982 |
- curl_slist_free_all(headers); |
|
4983 |
- |
|
4984 |
- if(curl_has_segfaulted) |
|
4985 |
- cli_warnmsg("Libcurl has segfaulted on '%s'\n", url); |
|
4986 |
- |
|
4987 |
- signal(SIGSEGV, oldsegv); |
|
4988 |
- return NULL; |
|
4989 |
-} |
|
4990 |
-#endif |
|
4991 |
- |
|
4992 | 4850 |
#endif |
4993 | 4851 |
|
4994 | 4852 |
#ifdef HAVE_BACKTRACE |