Browse code

add scanning options for scanning xml-based documents (MSXML, OOXML, HWPML) and HWP3

Kevin Lin authored on 2016/02/03 04:23:13
Showing 10 changed files
... ...
@@ -1010,6 +1010,20 @@ int recvloop_th(int *socketds, unsigned nsockets, struct cl_engine *engine, unsi
1010 1010
 	logg("HTML support disabled.\n");
1011 1011
     }
1012 1012
 
1013
+    if(optget(opts, "ScanXMLDOCS")->enabled) {
1014
+	logg("XMLDOCS support enabled.\n");
1015
+	options |= CL_SCAN_XMLDOCS;
1016
+    } else {
1017
+	logg("XMLDOCS support disabled.\n");
1018
+    }
1019
+
1020
+    if(optget(opts, "ScanHWP3")->enabled) {
1021
+	logg("HWP3 support enabled.\n");
1022
+	options |= CL_SCAN_HWP3;
1023
+    } else {
1024
+	logg("HWP3 support disabled.\n");
1025
+    }
1026
+
1013 1027
     if(optget(opts,"PhishingScanURLs")->enabled) {
1014 1028
 
1015 1029
 	if(optget(opts,"PhishingAlwaysBlockCloak")->enabled) {
... ...
@@ -262,6 +262,8 @@ void help(void)
262 262
     mprintf("    --scan-pdf[=yes(*)/no]               Scan PDF files\n");
263 263
     mprintf("    --scan-swf[=yes(*)/no]               Scan SWF files\n");
264 264
     mprintf("    --scan-html[=yes(*)/no]              Scan HTML files\n");
265
+    mprintf("    --scan-xmldocs[=yes(*)/no]           Scan xml-based document files\n");
266
+    mprintf("    --scan-hwp3[=yes(*)/no]              Scan HWP3 files\n");
265 267
     mprintf("    --scan-archive[=yes(*)/no]           Scan archive files (supported by libclamav)\n");
266 268
     mprintf("    --detect-broken[=yes/no(*)]          Try to detect broken executable files\n");
267 269
     mprintf("    --block-encrypted[=yes/no(*)]        Block encrypted archives\n");
... ...
@@ -1076,6 +1076,12 @@ int scanmanager(const struct optstruct *opts)
1076 1076
     if(optget(opts, "scan-mail")->enabled)
1077 1077
         options |= CL_SCAN_MAIL;
1078 1078
 
1079
+    if(optget(opts, "scan-xmldocs")->enabled)
1080
+        options |= CL_SCAN_XMLDOCS;
1081
+
1082
+    if(optget(opts, "scan-hwp3")->enabled)
1083
+        options |= CL_SCAN_HWP3;
1084
+
1079 1085
     if(optget(opts, "algorithmic-detection")->enabled)
1080 1086
         options |= CL_SCAN_ALGORITHMIC;
1081 1087
 
... ...
@@ -467,6 +467,20 @@ If you turn off this option, the original files will still be scanned, but witho
467 467
 .br 
468 468
 Default: yes
469 469
 .TP
470
+\fBScanXMLDOCS BOOL\fR
471
+This option enables scanning xml-based document files supported by libclamav.
472
+.br
473
+If you turn off this option, the original files will still be scanned, but without additional processing.
474
+.br
475
+Default: yes
476
+.TP
477
+\fBScanHWP3 BOOL\fR
478
+This option enables scanning HWP3 files.
479
+.br
480
+If you turn off this option, the original files will still be scanned, but without additional processing.
481
+.br
482
+Default: yes
483
+.TP 
470 484
 \fBScanArchive BOOL\fR
471 485
 Scan within archives and compressed files.
472 486
 .br
... ...
@@ -168,6 +168,12 @@ Scan SWF files. If you turn off this option, the original files will still be sc
168 168
 \fB\-\-scan\-html[=yes(*)/no]\fR
169 169
 Detect, normalize/decrypt and scan HTML files and embedded scripts. If you turn off this option, the original files will still be scanned, but without additional processing.
170 170
 .TP 
171
+\fB\-\-scan\-xmldocs[=yes(*)/no]\fR
172
+Scan xml-based document files supported by libclamav. If you turn off this option, the original files will still be scanned, but without additional processing.
173
+.TP 
174
+\fB\-\-scan\-hwp3[=yes(*)/no]\fR
175
+Scan HWP3 files. If you turn off this option, the original files will still be scanned, but without additional processing.
176
+.TP 
171 177
 \fB\-\-scan\-archive[=yes(*)/no]\fR
172 178
 Scan archives supported by libclamav. If you turn off this option, the original files will still be scanned, but without unpacking and additional processing.
173 179
 .TP 
... ...
@@ -318,6 +318,18 @@ Example
318 318
 # Default: yes
319 319
 #ScanSWF yes
320 320
 
321
+# This option enables scanning xml-based document files supported by libclamav.
322
+# If you turn off this option, the original files will still be scanned, but
323
+# without additional processing.
324
+# Default: yes
325
+#ScanXMLDOCS yes
326
+
327
+# This option enables scanning of HWP3 files.
328
+# If you turn off this option, the original files will still be scanned, but
329
+# without additional processing.
330
+# Default: yes
331
+#ScanHWP3 yes
332
+
321 333
 
322 334
 ##
323 335
 ## Mail files
... ...
@@ -166,13 +166,15 @@ typedef enum {
166 166
 #define CL_SCAN_ALLMATCHES		0x200000
167 167
 #define CL_SCAN_SWF			0x400000
168 168
 #define CL_SCAN_PARTITION_INTXN         0x800000
169
+#define CL_SCAN_XMLDOCS                 0x1000000
170
+#define CL_SCAN_HWP3                    0x2000000
169 171
 #define CL_SCAN_FILE_PROPERTIES         0x10000000
170 172
 //#define UNUSED                        0x20000000
171 173
 #define CL_SCAN_PERFORMANCE_INFO        0x40000000 /* collect performance timings */
172 174
 #define CL_SCAN_INTERNAL_COLLECT_SHA    0x80000000 /* Enables hash output in sha-collect builds - for internal use only */
173 175
 
174 176
 /* recommended scan settings */
175
-#define CL_SCAN_STDOPT		(CL_SCAN_ARCHIVE | CL_SCAN_MAIL | CL_SCAN_OLE2 | CL_SCAN_PDF | CL_SCAN_HTML | CL_SCAN_PE | CL_SCAN_ALGORITHMIC | CL_SCAN_ELF | CL_SCAN_SWF)
177
+#define CL_SCAN_STDOPT		(CL_SCAN_ARCHIVE | CL_SCAN_MAIL | CL_SCAN_OLE2 | CL_SCAN_PDF | CL_SCAN_HTML | CL_SCAN_PE | CL_SCAN_ALGORITHMIC | CL_SCAN_ELF | CL_SCAN_SWF | CL_SCAN_XMLDOCS | CL_SCAN_HWP3)
176 178
 
177 179
 /* cl_countsigs options */
178 180
 #define CL_COUNTSIGS_OFFICIAL	    0x1
... ...
@@ -473,6 +473,8 @@ extern int have_rar;
473 473
 #define SCAN_ALL            (ctx->options & CL_SCAN_ALLMATCHES)
474 474
 #define SCAN_SWF            (ctx->options & CL_SCAN_SWF)
475 475
 #define SCAN_PROPERTIES     (ctx->options & CL_SCAN_FILE_PROPERTIES)
476
+#define SCAN_XMLDOCS        (ctx->options & CL_SCAN_XMLDOCS)
477
+#define SCAN_HWP3           (ctx->options & CL_SCAN_HWP3)
476 478
 
477 479
 /* based on macros from A. Melnikoff */
478 480
 #define cbswap16(v) (((v & 0xff) << 8) | (((v) >> 8) & 0xff))
... ...
@@ -2273,19 +2273,19 @@ static int cli_scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_file_
2273 2273
                     }
2274 2274
                     break;
2275 2275
                 case CL_TYPE_XML_WORD:
2276
-                    if(DCONF_DOC & DOC_CONF_MSXML) {
2276
+                    if(SCAN_XMLDOCS && (DCONF_DOC & DOC_CONF_MSXML)) {
2277 2277
                         cli_dbgmsg("XML-WORD signature found at %u\n", (unsigned int) fpt->offset);
2278 2278
                         ret = cli_scanmsxml(ctx);
2279 2279
                     }
2280 2280
                     break;
2281 2281
                 case CL_TYPE_XML_XL:
2282
-                    if(DCONF_DOC & DOC_CONF_MSXML) {
2282
+                    if(SCAN_XMLDOCS && (DCONF_DOC & DOC_CONF_MSXML)) {
2283 2283
                         cli_dbgmsg("XML-XL signature found at %u\n", (unsigned int) fpt->offset);
2284 2284
                         ret = cli_scanmsxml(ctx);
2285 2285
                     }
2286 2286
                     break;
2287 2287
                 case CL_TYPE_XML_HWP:
2288
-                    if(DCONF_DOC & DOC_CONF_HWP) {
2288
+                    if(SCAN_XMLDOCS && (DCONF_DOC & DOC_CONF_HWP)) {
2289 2289
                         cli_dbgmsg("XML-HWP signature found at %u\n", (unsigned int) fpt->offset);
2290 2290
                         ret = cli_scanhwpml(ctx);
2291 2291
                     }
... ...
@@ -2858,7 +2858,7 @@ static int magic_scandesc(cli_ctx *ctx, cli_file_t type)
2858 2858
 	    break;
2859 2859
 
2860 2860
 	case CL_TYPE_HWP3:
2861
-	    if(DCONF_DOC & DOC_CONF_HWP)
2861
+	    if(SCAN_HWP3 && (DCONF_DOC & DOC_CONF_HWP))
2862 2862
 		ret = cli_scanhwp3(ctx);
2863 2863
 	    break;
2864 2864
 
... ...
@@ -2868,17 +2868,17 @@ static int magic_scandesc(cli_ctx *ctx, cli_file_t type)
2868 2868
 	    break;
2869 2869
 
2870 2870
 	case CL_TYPE_XML_WORD:
2871
-	    if(DCONF_DOC & DOC_CONF_MSXML)
2871
+	    if(SCAN_XMLDOCS && (DCONF_DOC & DOC_CONF_MSXML))
2872 2872
 		ret = cli_scanmsxml(ctx);
2873 2873
 	    break;
2874 2874
 
2875 2875
 	case CL_TYPE_XML_XL:
2876
-	    if(DCONF_DOC & DOC_CONF_MSXML)
2876
+	    if(SCAN_XMLDOCS && (DCONF_DOC & DOC_CONF_MSXML))
2877 2877
 		ret = cli_scanmsxml(ctx);
2878 2878
 	    break;
2879 2879
 
2880 2880
 	case CL_TYPE_XML_HWP:
2881
-	    if(DCONF_DOC & DOC_CONF_HWP)
2881
+	    if(SCAN_XMLDOCS && (DCONF_DOC & DOC_CONF_HWP))
2882 2882
 		ret = cli_scanhwpml(ctx);
2883 2883
 	    break;
2884 2884
 
... ...
@@ -2914,7 +2914,7 @@ static int magic_scandesc(cli_ctx *ctx, cli_file_t type)
2914 2914
 	case CL_TYPE_OOXML_XL:
2915 2915
 	case CL_TYPE_OOXML_HWP:
2916 2916
 #if HAVE_JSON
2917
-	    if(DCONF_DOC & DOC_CONF_OOXML) {
2917
+	    if(SCAN_XMLDOCS && (DCONF_DOC & DOC_CONF_OOXML)) {
2918 2918
 		if ((ctx->options & CL_SCAN_FILE_PROPERTIES) && (ctx->wrkproperty != NULL)) {
2919 2919
 		    ret = cli_process_ooxml(ctx, type);
2920 2920
 
... ...
@@ -353,6 +353,10 @@ const struct clam_option __clam_options[] = {
353 353
 
354 354
     { "ScanSWF", "scan-swf", 0, CLOPT_TYPE_BOOL, MATCH_BOOL, 1, NULL, 0, OPT_CLAMD | OPT_CLAMSCAN, "This option enables scanning within SWF files.\nIf you turn off this option, the original files will still be scanned, but\nwithout decoding and additional processing.", "yes" },
355 355
 
356
+    { "ScanXMLDOCS", "scan-xmldocs", 0, CLOPT_TYPE_BOOL, MATCH_BOOL, 1, NULL, 0, OPT_CLAMD | OPT_CLAMSCAN, "This option enables scanning xml-based document files supported by libclamav.\nIf you turn off this option, the original files will still be scanned, but\nwithout additional processing.", "yes" },
357
+
358
+    { "ScanHWP3", "scan-hwp3", 0, CLOPT_TYPE_BOOL, MATCH_BOOL, 1, NULL, 0, OPT_CLAMD | OPT_CLAMSCAN, "This option enables scanning HWP3 files.\nIf you turn off this option, the original files will still be scanned, but\nwithout additional processing.", "yes" },
359
+
356 360
     { "ScanArchive", "scan-archive", 0, CLOPT_TYPE_BOOL, MATCH_BOOL, 1, NULL, 0, OPT_CLAMD | OPT_CLAMSCAN, "Scan within archives and compressed files.\nIf you turn off this option, the original files will still be scanned, but\nwithout unpacking and additional processing.", "yes" },
357 361
 
358 362
     { "ArchiveBlockEncrypted", "block-encrypted", 0, CLOPT_TYPE_BOOL, MATCH_BOOL, 0, NULL, 0, OPT_CLAMD | OPT_CLAMSCAN, "Mark encrypted archives as viruses (Encrypted.Zip, Encrypted.RAR).", "no" },