Browse code

Merge branch 'master' into fmapify

* master:
rework scan callbacks
add filetype clcb
logg_size is unsigned
fix safebrowsing detection on certain URLs

Conflicts:
libclamav/scanners.c

Török Edvin authored on 2011/06/15 00:26:23
Showing 8 changed files
... ...
@@ -318,7 +318,7 @@ int main(int argc, char **argv)
318 318
 
319 319
 
320 320
     if(logg_size)
321
-	logg("#Log file size limited to %d bytes.\n", logg_size);
321
+	logg("#Log file size limited to %u bytes.\n", logg_size);
322 322
     else
323 323
 	logg("#Log file size limit disabled.\n");
324 324
 
... ...
@@ -200,9 +200,37 @@ extern int cl_engine_free(struct cl_engine *engine);
200 200
 
201 201
 /* CALLBACKS */
202 202
 
203
+/* I certainly wish I could declare the callback protoes stable and
204
+   move on to better things. But real life crossed my way enough times
205
+   already and what looked perfect had to evolve somehow.
206
+   So all I can say is I'll try my best not to break these things in the long run.
207
+   But I just can't guarantee that won't happen (again). */
208
+
209
+typedef cl_error_t (*clcb_pre_cache)(int fd, const char *type, void *context);
210
+/* PRE-CACHE
211
+   Called for each processed file (both the entry level - AKA 'outer' - file and
212
+   inner files - those generated when processing archive and container files), before
213
+   the actual scanning takes place.
214
+
215
+Input:
216
+fd      = File descriptor which is about to be scanned
217
+type    = File type detected via magic - i.e. NOT on the fly - (e.g. "CL_TYPE_MSEXE")
218
+context = Opaque application provided data
219
+
220
+Output:
221
+CL_CLEAN = File is scanned
222
+CL_BREAK = Whitelisted by callback - file is skipped and marked as clean
223
+CL_VIRUS = Blacklisted by callback - file is skipped and marked as infected
224
+*/
225
+extern void cl_engine_set_clcb_file_type(struct cl_engine *engine, clcb_pre_cache callback);
203 226
 
204 227
 typedef cl_error_t (*clcb_pre_scan)(int fd, const char *type, void *context);
205 228
 /* PRE-SCAN
229
+   Called for each NEW file (inner and outer) before the scanning takes place. This is
230
+   roughly the the same as clcb_before_cache, but it is affected by clean file caching.
231
+   This means that it won't be called if a clean cached file (inner or outer) is
232
+   scanned a second time.
233
+
206 234
 Input:
207 235
 fd      = File descriptor which is about to be scanned
208 236
 type    = File type detected via magic - i.e. NOT on the fly - (e.g. "CL_TYPE_MSEXE")
... ...
@@ -215,9 +243,10 @@ CL_VIRUS = Blacklisted by callback - file is skipped and marked as infected
215 215
 */
216 216
 extern void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);
217 217
 
218
-
219 218
 typedef cl_error_t (*clcb_post_scan)(int fd, int result, const char *virname, void *context);
220 219
 /* POST-SCAN
220
+   Called for each processed file (inner and outer), after the scanning is complete.
221
+
221 222
 Input:
222 223
 fd      = File descriptor which is was scanned
223 224
 result  = The scan result for the file
... ...
@@ -560,6 +560,7 @@ struct cl_settings *cl_engine_settings_copy(const struct cl_engine *engine)
560 560
     settings->bytecode_mode = engine->bytecode_mode;
561 561
     settings->pua_cats = engine->pua_cats ? strdup(engine->pua_cats) : NULL;
562 562
 
563
+    settings->cb_pre_cache = engine->cb_pre_cache;
563 564
     settings->cb_pre_scan = engine->cb_pre_scan;
564 565
     settings->cb_post_scan = engine->cb_post_scan;
565 566
     settings->cb_sigload = engine->cb_sigload;
... ...
@@ -605,6 +606,7 @@ int cl_engine_settings_apply(struct cl_engine *engine, const struct cl_settings
605 605
 	engine->pua_cats = NULL;
606 606
     }
607 607
 
608
+    engine->cb_pre_cache = settings->cb_pre_cache;
608 609
     engine->cb_pre_scan = settings->cb_pre_scan;
609 610
     engine->cb_post_scan = settings->cb_post_scan;
610 611
     engine->cb_sigload = settings->cb_sigload;
... ...
@@ -1059,6 +1061,10 @@ int cli_bitset_test(bitset_t *bs, unsigned long bit_offset)
1059 1059
 	return (bs->bitset[char_offset] & ((unsigned char)1 << bit_offset));
1060 1060
 }
1061 1061
 
1062
+void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback) {
1063
+    engine->cb_pre_cache = callback;
1064
+}
1065
+
1062 1066
 void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback) {
1063 1067
     engine->cb_pre_scan = callback;
1064 1068
 }
... ...
@@ -252,6 +252,7 @@ struct cl_engine {
252 252
     mpool_t *mempool;
253 253
 
254 254
     /* Callback(s) */
255
+    clcb_pre_cache cb_pre_cache;
255 256
     clcb_pre_scan cb_pre_scan;
256 257
     clcb_post_scan cb_post_scan;
257 258
     clcb_sigload cb_sigload;
... ...
@@ -290,6 +291,7 @@ struct cl_settings {
290 290
     char *pua_cats;
291 291
 
292 292
     /* callbacks */
293
+    clcb_pre_cache cb_pre_cache;
293 294
     clcb_pre_scan cb_pre_scan;
294 295
     clcb_post_scan cb_post_scan;
295 296
     clcb_sigload cb_sigload;
... ...
@@ -1438,9 +1438,6 @@ static enum phish_status phishingCheck(const struct cl_engine* engine,struct url
1438 1438
 	cli_dbgmsg("Phishcheck:Checking url %s->%s\n", urls->realLink.data,
1439 1439
 		urls->displayLink.data);
1440 1440
 
1441
-	if(!strcmp(urls->realLink.data,urls->displayLink.data))
1442
-		return CL_PHISH_CLEAN;/* displayed and real URL are identical -> clean */
1443
-
1444 1441
 	if(!isURL(urls->realLink.data, 0)) {
1445 1442
 		cli_dbgmsg("Real 'url' is not url:%s\n",urls->realLink.data);
1446 1443
 		return CL_PHISH_CLEAN;
... ...
@@ -1456,6 +1453,9 @@ static enum phish_status phishingCheck(const struct cl_engine* engine,struct url
1456 1456
 	    }
1457 1457
 	}
1458 1458
 
1459
+	if(!strcmp(urls->realLink.data,urls->displayLink.data))
1460
+		return CL_PHISH_CLEAN;/* displayed and real URL are identical -> clean */
1461
+
1459 1462
 	if (urls->displayLink.data[0] == '\0') {
1460 1463
 	    return CL_PHISH_CLEAN;
1461 1464
 	}
... ...
@@ -2037,42 +2037,43 @@ static void emax_reached(cli_ctx *ctx) {
2037 2037
 #define LINESTR(x) #x
2038 2038
 #define LINESTR2(x) LINESTR(x)
2039 2039
 #define __AT__  " at line "LINESTR2(__LINE__)
2040
-#define ret_from_magicscan(retcode) do {							\
2041
-    cli_dbgmsg("cli_magic_scandesc: returning %d %s\n", retcode, __AT__);			\
2042
-    if(ctx->engine->cb_post_scan) {								\
2043
-	perf_start(ctx, PERFT_POSTCB);                                                         \
2044
-	switch(ctx->engine->cb_post_scan(desc, retcode, retcode == CL_VIRUS && ctx->virname ? *ctx->virname : NULL, ctx->cb_ctx)) {		\
2045
-	case CL_BREAK:										\
2046
-	    cli_dbgmsg("cli_magic_scandesc: file whitelisted by callback\n");			\
2047
-	    perf_stop(ctx, PERFT_POSTCB);                                                      \
2048
-	    return CL_CLEAN;									\
2049
-	case CL_VIRUS:										\
2050
-	    cli_dbgmsg("cli_magic_scandesc: file blacklisted by callback\n");			\
2051
-	    if(ctx->virname)									\
2052
-		*ctx->virname = "Detected.By.Callback";						\
2053
-	    perf_stop(ctx, PERFT_POSTCB);                                                      \
2054
-	    return CL_VIRUS;									\
2055
-	case CL_CLEAN:										\
2056
-	    break;										\
2057
-	default:										\
2058
-	    cli_warnmsg("cli_magic_scandesc: ignoring bad return code from callback\n");	\
2040
+#define ret_from_magicscan(retcode) \
2041
+    do {											\
2042
+	cli_dbgmsg("cli_magic_scandesc: returning %d %s\n", retcode, __AT__); 			\
2043
+	if(ctx->engine->cb_post_scan) {								\
2044
+	    perf_start(ctx, PERFT_POSTCB);							\
2045
+	    switch(ctx->engine->cb_post_scan(desc, retcode, retcode == CL_VIRUS && ctx->virname ? *ctx->virname : NULL, ctx->cb_ctx)) {	\
2046
+	    case CL_BREAK:									\
2047
+		cli_dbgmsg("cli_magic_scandesc: file whitelisted by post_scan callback\n"); 	\
2048
+		perf_stop(ctx, PERFT_POSTCB);							\
2049
+		return CL_CLEAN;								\
2050
+	    case CL_VIRUS:									\
2051
+		cli_dbgmsg("cli_magic_scandesc: file blacklisted by post_scan callback\n");	\
2052
+		if(ctx->virname)								\
2053
+		    *ctx->virname = "Detected.By.Callback";					\
2054
+		perf_stop(ctx, PERFT_POSTCB);							\
2055
+		return CL_VIRUS;								\
2056
+	    case CL_CLEAN:									\
2057
+		break;										\
2058
+	    default:										\
2059
+		cli_warnmsg("cli_magic_scandesc: ignoring bad return code from post_scan callback\n");	\
2060
+	    }											\
2061
+	    perf_stop(ctx, PERFT_POSTCB);							\
2059 2062
 	}											\
2060
-	perf_stop(ctx, PERFT_POSTCB);                                                          \
2061
-    }\
2062
-    return retcode;										\
2063
+	return retcode;										\
2063 2064
     } while(0)
2064 2065
 
2065 2066
 
2066
-#define CALL_PRESCAN_CB(type_name)	                                                     \
2067
-    if(ctx->engine->cb_pre_scan) {		                                             \
2067
+#define CALL_PRESCAN_CB(scanfn)	                                                     \
2068
+    if(ctx->engine->scanfn) {				\
2068 2069
 	perf_start(ctx, PERFT_PRECB);                                                        \
2069
-	switch(ctx->engine->cb_pre_scan(desc, (type_name), ctx->cb_ctx)) {                   \
2070
+	switch(ctx->engine->scanfn(desc, filetype, ctx->cb_ctx)) {	\
2070 2071
 	case CL_BREAK:                                                                       \
2071
-	    cli_dbgmsg("cli_magic_scandesc: file whitelisted by callback\n");                \
2072
+	    cli_dbgmsg("cli_magic_scandesc: file whitelisted by "#scanfn" callback\n");                \
2072 2073
 	    perf_stop(ctx, PERFT_PRECB);                                                     \
2073 2074
 	    ret_from_magicscan(CL_CLEAN);                                                    \
2074 2075
 	case CL_VIRUS:                                                                       \
2075
-	    cli_dbgmsg("cli_magic_scandesc: file blacklisted by callback\n");                \
2076
+	    cli_dbgmsg("cli_magic_scandesc: file blacklisted by "#scanfn" callback\n");                \
2076 2077
 	    if(ctx->virname)                                                                 \
2077 2078
 		*ctx->virname = "Detected.By.Callback";                                      \
2078 2079
 	    perf_stop(ctx, PERFT_PRECB);                                                     \
... ...
@@ -2097,6 +2098,7 @@ static int magic_scandesc(cli_ctx *ctx, cli_file_t type)
2097 2097
 	unsigned char hash[16];
2098 2098
 	bitset_t *old_hook_lsig_matches;
2099 2099
 	int desc = (*ctx->fmap)->fd;
2100
+	const char *filetype;
2100 2101
 
2101 2102
     if(ctx->engine->maxreclevel && ctx->recursion > ctx->engine->maxreclevel) {
2102 2103
         cli_dbgmsg("cli_magic_scandesc: Archive recursion limit exceeded (%u, max: %u)\n", ctx->recursion, ctx->engine->maxreclevel);
... ...
@@ -2119,6 +2121,18 @@ static int magic_scandesc(cli_ctx *ctx, cli_file_t type)
2119 2119
         ret_from_magicscan(CL_CLEAN);
2120 2120
     }
2121 2121
 
2122
+    perf_start(ctx, PERFT_FT);
2123
+    if(type == CL_TYPE_ANY)
2124
+	type = cli_filetype2(*ctx->fmap, ctx->engine);
2125
+    perf_stop(ctx, PERFT_FT);
2126
+    if(type == CL_TYPE_ERROR) {
2127
+	cli_dbgmsg("cli_magic_scandesc: cli_filetype2 returned CL_TYPE_ERROR\n");
2128
+	ctx->hook_lsig_matches = old_hook_lsig_matches;
2129
+	ret_from_magicscan(CL_EREAD);
2130
+    }
2131
+    filetype = cli_ftname(type);
2132
+    CALL_PRESCAN_CB(cb_pre_cache);
2133
+
2122 2134
     perf_start(ctx, PERFT_CACHE);
2123 2135
     if(cache_check(hash, ctx) == CL_CLEAN) {
2124 2136
 	perf_stop(ctx, PERFT_CACHE);
... ...
@@ -2135,7 +2149,7 @@ static int magic_scandesc(cli_ctx *ctx, cli_file_t type)
2135 2135
 	else
2136 2136
 	    cli_dbgmsg("Raw mode: No support for special files\n");
2137 2137
 
2138
-	CALL_PRESCAN_CB("CL_TYPE_BINARY_DATA");
2138
+	CALL_PRESCAN_CB(cb_pre_scan);
2139 2139
 	if((ret = cli_fmap_scandesc(ctx, 0, 0, NULL, AC_SCAN_VIR, NULL, hash)) == CL_VIRUS)
2140 2140
 	    cli_dbgmsg("%s found in descriptor %d\n", *ctx->virname, desc);
2141 2141
 	else if(ret == CL_CLEAN) {
... ...
@@ -2149,17 +2163,7 @@ static int magic_scandesc(cli_ctx *ctx, cli_file_t type)
2149 2149
 	ret_from_magicscan(ret);
2150 2150
     }
2151 2151
 
2152
-    perf_start(ctx, PERFT_FT);
2153
-    if(type == CL_TYPE_ANY)
2154
-	type = cli_filetype2(*ctx->fmap, ctx->engine);
2155
-    perf_stop(ctx, PERFT_FT);
2156
-    if(type == CL_TYPE_ERROR) {
2157
-	cli_dbgmsg("cli_magic_scandesc: cli_filetype2 returned CL_TYPE_ERROR\n");
2158
-	ctx->hook_lsig_matches = old_hook_lsig_matches;
2159
-	ret_from_magicscan(CL_EREAD);
2160
-    }
2161
-
2162
-    CALL_PRESCAN_CB(cli_ftname(type));
2152
+    CALL_PRESCAN_CB(cb_pre_scan);
2163 2153
 
2164 2154
 #ifdef HAVE__INTERNAL__SHA_COLLECT
2165 2155
     if(!ctx->sha_collect && type==CL_TYPE_MSEXE) ctx->sha_collect = 1;
... ...
@@ -355,7 +355,7 @@ int CLAMAPI Scan_Initialize(const wchar_t *pEnginesFolder, const wchar_t *pTempR
355 355
 	unlock_engine();
356 356
 	FAIL(CL_EMEM, "Not enough memory for a new engine");
357 357
     }
358
-    cl_engine_set_clcb_pre_scan(engine, prescan_cb);
358
+    cl_engine_set_clcb_pre_cache(engine, prescan_cb);
359 359
     cl_engine_set_clcb_post_scan(engine, postscan_cb);
360 360
     
361 361
     minimal_definitions = bLoadMinDefs;
... ...
@@ -39,6 +39,7 @@ EXPORTS cl_engine_set_clcb_sigload @31
39 39
 EXPORTS cl_scanfile_callback @32
40 40
 EXPORTS cl_set_clcb_msg @33
41 41
 EXPORTS cl_engine_set_clcb_hash @34
42
+EXPORTS cl_engine_set_clcb_pre_cache @35
42 43
 
43 44
 
44 45
 ; path variables