Browse code

Merge remote-tracking branch 'origin/features/yara' into swebb/clamyara

Conflicts:
libclamav/Makefile.am
libclamav/Makefile.in
libclamav/readdb.c

Shawn Webb authored on 2014/11/11 02:09:12
Showing 34 changed files
... ...
@@ -457,9 +457,13 @@ int main(int argc, char **argv)
457 457
 #ifdef HAVE_BZLIB_H
458 458
 	printf("BZIP2 ");
459 459
 #endif
460
+
460 461
 #ifdef HAVE_LIBXML2
461 462
 	printf("LIBXML2 ");
462 463
 #endif
464
+#ifdef HAVE_PCRE
465
+	printf("PCRE ");
466
+#endif
463 467
 #ifdef HAVE_JSON
464 468
 	printf("JSON ");
465 469
 #endif
... ...
@@ -874,6 +874,36 @@ int recvloop_th(int *socketds, unsigned nsockets, struct cl_engine *engine, unsi
874 874
     val = cl_engine_get_num(engine, CL_ENGINE_MAX_ICONSPE, NULL);
875 875
     logg("Limits: MaxIconsPE limit set to %llu.\n", val);
876 876
 
877
+    if((opt = optget(opts, "PCREMatchLimit"))->active) {
878
+        if((ret = cl_engine_set_num(engine, CL_ENGINE_PCRE_MATCH_LIMIT, opt->numarg))) {
879
+            logg("!cli_engine_set_num(PCREMatchLimit) failed: %s\n", cl_strerror(ret));
880
+            cl_engine_free(engine);
881
+            return 1;
882
+        }
883
+    }
884
+    val = cl_engine_get_num(engine, CL_ENGINE_PCRE_MATCH_LIMIT, NULL);
885
+    logg("Limits: PCREMatchLimit limit set to %llu.\n", val);
886
+
887
+    if((opt = optget(opts, "PCRERecMatchLimit"))->active) {
888
+        if((ret = cl_engine_set_num(engine, CL_ENGINE_PCRE_RECMATCH_LIMIT, opt->numarg))) {
889
+            logg("!cli_engine_set_num(PCRERecMatchLimit) failed: %s\n", cl_strerror(ret));
890
+            cl_engine_free(engine);
891
+            return 1;
892
+        }
893
+    }
894
+    val = cl_engine_get_num(engine, CL_ENGINE_PCRE_RECMATCH_LIMIT, NULL);
895
+    logg("Limits: PCRERecMatchLimit limit set to %llu.\n", val);
896
+
897
+    if((opt = optget(opts, "PCREMaxFileSize"))->active) {
898
+        if((ret = cl_engine_set_num(engine, CL_ENGINE_PCRE_MAX_FILESIZE, opt->numarg))) {
899
+            logg("!cli_engine_set_num(PCREMaxFileSize) failed: %s\n", cl_strerror(ret));
900
+            cl_engine_free(engine);
901
+            return 1;
902
+        }
903
+    }
904
+    val = cl_engine_get_num(engine, CL_ENGINE_PCRE_MAX_FILESIZE, NULL);
905
+    logg("Limits: PCREMaxFileSize limit set to %llu.\n", val);
906
+
877 907
     if(optget(opts, "ScanArchive")->enabled) {
878 908
 	logg("Archive support enabled.\n");
879 909
 	options |= CL_SCAN_ARCHIVE;
... ...
@@ -283,6 +283,11 @@ void help(void)
283 283
     mprintf("    --disable-pe-stats                   Disable submission of individual PE sections in stats submissions\n");
284 284
     mprintf("    --stats-timeout=#n                   Number of seconds to wait for waiting a response back from the stats server\n");
285 285
     mprintf("    --stats-host-id=UUID                 Set the Host ID used when submitting statistical info.\n");
286
+#if HAVE_PCRE
287
+    mprintf("    --pcre-match-limit=#n                Maximum calls to the PCRE match function.\n");
288
+    mprintf("    --pcre-recmatch-limit=#n             Maximum recursive calls to the PCRE match function.\n");
289
+    mprintf("    --pcre-max-filesize=#n               Maximum size file to perform PCRE sunsig matching.\n");
290
+#endif /* HAVE_PCRE */
286 291
     mprintf("\n");
287 292
     mprintf("(*) Default scan settings\n");
288 293
     mprintf("(**) Certain files (e.g. documents, archives, etc.) may in turn contain other\n");
... ...
@@ -59,6 +59,7 @@
59 59
 #include "libclamav/clamav.h"
60 60
 #include "libclamav/others.h"
61 61
 #include "libclamav/matcher-ac.h"
62
+#include "libclamav/matcher-pcre.h"
62 63
 #include "libclamav/str.h"
63 64
 #include "libclamav/readdb.h"
64 65
 #include "libclamav/cltypes.h"
... ...
@@ -764,9 +765,6 @@ int scanmanager(const struct optstruct *opts)
764 764
     if(optget(opts, "bytecode-unsigned")->enabled)
765 765
         dboptions |= CL_DB_BYTECODE_UNSIGNED;
766 766
 
767
-    if(optget(opts, "bytecode-statistics")->enabled)
768
-        dboptions |= CL_DB_BYTECODE_STATS;
769
-
770 767
     if((opt = optget(opts,"bytecode-timeout"))->enabled)
771 768
         cl_engine_set_num(engine, CL_ENGINE_BYTECODE_TIMEOUT, opt->numarg);
772 769
 
... ...
@@ -785,6 +783,18 @@ int scanmanager(const struct optstruct *opts)
785 785
         cl_engine_set_num(engine, CL_ENGINE_BYTECODE_MODE, mode);
786 786
     }
787 787
 
788
+    if((opt = optget(opts, "statistics"))->enabled) {
789
+	while(opt) {
790
+	    if (!strcasecmp(opt->strarg, "bytecode")) {
791
+		dboptions |= CL_DB_BYTECODE_STATS;
792
+	    }
793
+	    else if (!strcasecmp(opt->strarg, "pcre")) {
794
+		dboptions |= CL_DB_PCRE_STATS;
795
+	    }
796
+	    opt = opt->nextarg;
797
+        }
798
+    }
799
+
788 800
     if((opt = optget(opts, "tempdir"))->enabled) {
789 801
         if((ret = cl_engine_set_str(engine, CL_ENGINE_TMPDIR, opt->strarg))) {
790 802
             logg("!cli_engine_set_str(CL_ENGINE_TMPDIR) failed: %s\n", cl_strerror(ret));
... ...
@@ -961,6 +971,30 @@ int scanmanager(const struct optstruct *opts)
961 961
         }
962 962
     }
963 963
 
964
+    if ((opt = optget(opts, "pcre-match-limit"))->active) {
965
+        if ((ret = cl_engine_set_num(engine, CL_ENGINE_PCRE_MATCH_LIMIT, opt->numarg))) {
966
+            logg("!cli_engine_set_num(CL_ENGINE_PCRE_MATCH_LIMIT) failed: %s\n", cl_strerror(ret));
967
+            cl_engine_free(engine);
968
+            return 2;
969
+        }
970
+    }
971
+
972
+    if ((opt = optget(opts, "pcre-recmatch-limit"))->active) {
973
+        if ((ret = cl_engine_set_num(engine, CL_ENGINE_PCRE_RECMATCH_LIMIT, opt->numarg))) {
974
+            logg("!cli_engine_set_num(CL_ENGINE_PCRE_RECMATCH_LIMIT) failed: %s\n", cl_strerror(ret));
975
+            cl_engine_free(engine);
976
+            return 2;
977
+        }
978
+    }
979
+
980
+    if ((opt = optget(opts, "pcre-max-filesize"))->active) {
981
+        if ((ret = cl_engine_set_num(engine, CL_ENGINE_PCRE_MAX_FILESIZE, opt->numarg))) {
982
+            logg("!cli_engine_set_num(CL_ENGINE_PCRE_MAX_FILESIZE) failed: %s\n", cl_strerror(ret));
983
+            cl_engine_free(engine);
984
+            return 2;
985
+        }
986
+    }
987
+
964 988
     /* set scan options */
965 989
     if(optget(opts, "allmatch")->enabled)
966 990
         options |= CL_SCAN_ALLMATCHES;
... ...
@@ -1129,9 +1163,20 @@ int scanmanager(const struct optstruct *opts)
1129 1129
         }
1130 1130
     }
1131 1131
 
1132
-    if(optget(opts, "bytecode-statistics")->enabled) {
1133
-        cli_sigperf_print();
1134
-        cli_sigperf_events_destroy();
1132
+    if((opt = optget(opts, "statistics"))->enabled) {
1133
+	while(opt) {
1134
+	    if (!strcasecmp(opt->strarg, "bytecode")) {
1135
+		cli_sigperf_print();
1136
+		cli_sigperf_events_destroy();
1137
+	    }
1138
+#if HAVE_PCRE
1139
+	    else if (!strcasecmp(opt->strarg, "pcre")) {
1140
+		cli_pcre_perf_print();
1141
+		cli_pcre_perf_events_destroy();
1142
+	    }
1143
+#endif
1144
+	    opt = opt->nextarg;
1145
+        }
1135 1146
     }
1136 1147
 
1137 1148
     /* free the engine */
... ...
@@ -17541,37 +17541,39 @@ if test "${with_pcre+set}" = set; then :
17541 17541
   withval=$with_pcre;
17542 17542
   { $as_echo "$as_me:${as_lineno-$LINENO}: checking for libpcre installation" >&5
17543 17543
 $as_echo_n "checking for libpcre installation... " >&6; }
17544
-  if test "X$withval" = "Xno"; then
17544
+  case "$withval" in
17545
+  no)
17545 17546
     { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
17546 17547
 $as_echo "no" >&6; }
17547
-  else
17548
-    if test "X$withval" = "Xyes"; then
17549
-      PCRE_HOME=/usr/local
17550
-      if test ! -x "$PCRE_HOME/bin/pcre-config"; then
17551
-        PCRE_HOME=/usr
17552
-        if test ! -x "$PCRE_HOME/bin/pcre-config"; then
17553
-          PCRE_HOME=""
17554
-        fi
17555
-      fi
17556
-    elif test "$withval"; then
17557
-      PCRE_HOME="$withval"
17548
+    ;;
17549
+  yes)
17550
+    PCRE_HOME=/usr/local
17551
+    if test ! -x "$PCRE_HOME/bin/pcre-config"; then
17552
+      PCRE_HOME=/usr
17558 17553
       if test ! -x "$PCRE_HOME/bin/pcre-config"; then
17559 17554
         PCRE_HOME=""
17560
-        as_fn_error $? "cannot locate libpcre at $withval" "$LINENO" 5
17555
+        as_fn_error $? "cannot locate libpcre at /usr/local or /usr" "$LINENO" 5
17561 17556
       fi
17562
-    else
17563
-      as_fn_error $? "cannot assign blank value to --with-pcre" "$LINENO" 5
17564 17557
     fi
17558
+    ;;
17559
+  "")
17560
+    as_fn_error $? "cannot assign blank value to --with-pcre" "$LINENO" 5
17561
+    ;;
17562
+  *)
17563
+    PCRE_HOME="$withval"
17564
+    if test ! -x "$PCRE_HOME/bin/pcre-config"; then
17565
+      PCRE_HOME=""
17566
+      as_fn_error $? "cannot locate libpcre at $withval" "$LINENO" 5
17567
+    fi
17568
+    ;;
17569
+  esac
17565 17570
 
17566
-    if test "x$PCRE_HOME" != "x"; then
17567
-      { $as_echo "$as_me:${as_lineno-$LINENO}: result: using $PCRE_HOME" >&5
17571
+  if test "x$PCRE_HOME" != "x"; then
17572
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: using $PCRE_HOME" >&5
17568 17573
 $as_echo "using $PCRE_HOME" >&6; }
17569
-    else
17570
-      { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
17574
+  else
17575
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
17571 17576
 $as_echo "not found" >&6; }
17572
-      { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cannot locate libpcre at /usr/local or /usr" >&5
17573
-$as_echo "$as_me: WARNING: cannot locate libpcre at /usr/local or /usr" >&2;}
17574
-    fi
17575 17577
   fi
17576 17578
 
17577 17579
 else
17578 17580
Binary files a/docs/signatures.pdf and b/docs/signatures.pdf differ
... ...
@@ -209,12 +209,12 @@ attachment.exe: OK
209 209
     MD5 signature for \verb+test.exe+ use the \verb+--md5+ option of sigtool:
210 210
     \begin{verbatim}
211 211
 zolw@localhost:/tmp/test$ sigtool --md5 test.exe > test.hdb
212
-zolw@localhost:/tmp/test$ cat test.hdb 
212
+zolw@localhost:/tmp/test$ cat test.hdb
213 213
 48c4533230e1ae1c118c741c0db19dfb:17387:test.exe
214 214
     \end{verbatim}
215 215
     That's it! The signature is ready for use:
216 216
     \begin{verbatim}
217
-zolw@localhost:/tmp/test$ clamscan -d test.hdb test.exe 
217
+zolw@localhost:/tmp/test$ clamscan -d test.hdb test.exe
218 218
 test.exe: test.exe FOUND
219 219
 
220 220
 ----------- SCAN SUMMARY -----------
... ...
@@ -242,7 +242,7 @@ Time: 0.024 sec (0 m 0 s)
242 242
 
243 243
     \subsubsection{SHA1 and SHA256 hash-based signatures}
244 244
     ClamAV 0.98 has also added support for SHA1 and SHA256 file checksums.
245
-    The format is the same as for MD5 file checksum. 
245
+    The format is the same as for MD5 file checksum.
246 246
     It can differentiate between them based on the length of the hash string
247 247
     in the signature. For best backwards compatibility, these should be
248 248
     placed inside a \verb+*.hsb+ file. The format is:
... ...
@@ -482,7 +482,7 @@ Sig1;Target:0;(0&1&2&3)&(4|1);6b6f74656b;616c61;7a6f6c77;7374656
482 482
 6616e;deadbeef
483 483
 
484 484
 Sig2;Target:0;((0|1|2)>5,2)&(3|1);6b6f74656b;616c61;7a6f6c77;737
485
-46566616e  
485
+46566616e
486 486
 
487 487
 Sig3;Target:0;((0|1|2|3)=2)&(4|1);6b6f74656b;616c61;7a6f6c77;737
488 488
 46566616e;deadbeef
... ...
@@ -492,15 +492,75 @@ f2aef7d14951684cf04100e8110a00;S2+78:22??232c2d252229{-15}6e6573
492 492
 (63|64)61706528;S+50:68efa311c3b9963cb1ee8e586d32aeb9043e;f9c58d
493 493
 cf43987e4f519d629b103375;SL+550:6300680065005c0046006900
494 494
     \end{verbatim}
495
-    ClamAV 0.96 introduced support for special macro subsignatures in
496
-    the following format: \verb+${min-max}MACROID$+, where \verb+MACROID+
497
-    points to a group of signatures and \verb+{min-max}+ specifies the
498
-    offset range at which one of the group signatures should match.
499
-    The range is calculated against the match offset of the previous
500
-    subsignature. The macro subsignature makes its preceding subsignature
501
-    considered a match only if both of them get matched. For more
502
-    information and examples please see
503
-    \url{https://bugzilla.clamav.net/show_bug.cgi?id=164}.
495
+
496
+    \subsection{Special Subsignature Types}
497
+    Macro subsignatures(clamav-0.96): \verb+${min-max}MACROID$+:
498
+    \begin{itemize}
499
+	\item \verb+MACROID+ points to a group of signatures and \verb+{min-max}+
500
+	specifies the offset range at which one of the group signatures should match.
501
+	\item The range is calculated against the match offset of the previous subsignature.
502
+	\item The macro subsignature makes its preceding subsignature considered a match
503
+	only if both of them get matched.
504
+	\item For more information and examples please see \url{https://wwws.clamav.net/bugzilla/show_bug.cgi?id=164}.
505
+    \end{itemize}
506
+    PCRE subsignatures(clamav-0.99): \verb+Trigger/PCRE/[Flags]+
507
+    \begin{itemize}
508
+    \item \verb+Trigger+ is a required field that is a valid \verb+LogicalExpression+ and
509
+    may refer to any subsignatures that precede this subsignature. Triggers cannot be
510
+    self-referential and cannot refer to subsequent subsignatures.
511
+    \item \verb+PCRE+ is the expression representing the regex to execute. \verb+PCRE+
512
+    must be delimited by '/', but does not need to be escaped within the expression.
513
+    \verb+PCRE+ cannot be empty and (?UTF*) control sequence is not allowed. If debug is specified,
514
+    named capture groups are displayed in a post-execution report.
515
+    \item \verb+Flags+ are a series of characters which affect the compilation and execution
516
+    of \verb+PCRE+ within the PCRE compiler and the ClamAV engine. This field is optional.
517
+	\begin{itemize}
518
+	\item \verb+g [CLAMAV_GLOBAL]+ specifies to search for ALL matches of PCRE (default is to
519
+        search for first match). NOTE: INCREASES the time needed to run the PCRE.
520
+        \item \verb+r [CLAMAV_ROLLING]+ specifies to use the given offset as the starting location
521
+        to search for a match as opposed to the only location; applies to subsigs without maxshifts.
522
+        By default, in order to facilatate normal ClamAV offset behavior, PCREs are auto-anchored
523
+        (only attempt match on first offset); using the rolling option disables the auto-anchoring.
524
+	\item \verb+e [CLAMAV_ENCOMPASS]+ specifies to CONFINE matching between the specified offset
525
+	and maxshift; applies only when maxshift is specified. Note: DECREASES time needed to run the PCRE.
526
+	\item \verb+i [PCRE_CASELESS]+
527
+	\item \verb+s [PCRE_DOTALL]+
528
+	\item \verb+m [PCRE_MULTILINE]+
529
+	\item \verb+x [PCRE_EXTENDED]+
530
+	\item \verb+A [PCRE_ANCHORED]+
531
+	\item \verb+E [PCRE_DOLLAR_ENODNLY]+
532
+	\item \verb+G [PCRE_UNGREEDY]+
533
+	\end{itemize}
534
+    \end{itemize}
535
+    Examples:
536
+    \begin{verbatim}
537
+Find.All.ClamAV;Target:0;1;6265676c6164697427736e6f7462797465636
538
+f6465;0/clamav/g
539
+
540
+Find.ClamAV.OnlyAt.299;Target:0;2;7374756c747a67657473;706372657
541
+2656765786c6f6c;299:0&1/clamav/
542
+
543
+Find.ClamAV.StartAt.300;Target:0;3;616c61696e;62756731393238;636
544
+c6f736564;300:0&1&2/clamav/r
545
+
546
+Find.All.Encompassed.ClamAV;Target:0;3;7768796172656e2774;796f75
547
+7573696e67;79617261;200,300:0&1&2/clamav/ge
548
+
549
+Named.CapGroup.Pcre;Target:0;3;636f75727479617264;616c62756d;746
550
+57272696572;50:0&1&2/variable=(?<nilshell>.{16})end/gr
551
+
552
+Firefox.TreeRange.UseAfterFree;Target:0;0&1&2;2e766965772e73656c
553
+656374696f6e;2e696e76616c696461746553656c656374696f6e;0&1/\x2Evi
554
+ew\x2Eselection.*?\x2Etree\s*\x3D\s*null.*?\x2Einvalidate/smi
555
+
556
+Firefox.IDB.UseAfterFree;Target:0;0&1;4944424b657952616e6765;0/^
557
+\x2e(only|lowerBound|upperBound|bound)\x28.*?\x29.*?\x2e(lower|u
558
+pper|lowerOpen|upperOpen)/smi
559
+
560
+Firefox.boundElements;Target:0;0&1&2;6576656e742e626f756e64456c6
561
+56d656e7473;77696e646f772e636c6f7365;0&1/on(load|click)\s*=\s*\x
562
+22?window\.close\s*\x28/si
563
+    \end{verbatim}
504 564
 
505 565
     \subsection{Icon signatures for PE files}
506 566
     ClamAV 0.96 includes an approximate/fuzzy icon matcher to help
... ...
@@ -522,6 +522,31 @@ Example
522 522
 # Default: 100
523 523
 #MaxIconsPE 200
524 524
 
525
+# This option sets the maximum calls to the PCRE match function during an instance of regex matching.
526
+# Instances using more than this limit will be terminated and alert the user but the scan will continue.
527
+# For more information on match_limit, see the PCRE documentation.
528
+# Negative values are not allowed.
529
+# WARNING: setting this limit too high may severely impact performance.
530
+# Default: 10000
531
+#PCREMatchLimit 20000
532
+
533
+# This option sets the maximum recursive calls to the PCRE match function during an instance of regex matching.
534
+# Instances using more than this limit will be terminated and alert the user but the scan will continue.
535
+# For more information on match_limit_recursion, see the PCRE documentation.
536
+# Negative values are not allowed and values > PCREMatchLimit are superfluous.
537
+# WARNING: setting this limit too high may severely impact performance.
538
+# Default: 5000
539
+#PCRERecMatchLimit 10000
540
+
541
+# This option sets the maximum filesize for which PCRE subsigs will be executed.
542
+# Files exceeding this limit will not have PCRE subsigs executed unless a subsig is encompassed to a smaller buffer.
543
+# Negative values are not allowed.
544
+# Setting this value to zero disables the limit.
545
+# WARNING: setting this limit too high or disabling it may severely impact performance.
546
+# Default: 25M
547
+#PCREMaxFileSize 100M
548
+
549
+
525 550
 ##
526 551
 ## On-access Scan Settings
527 552
 ##
... ...
@@ -450,7 +450,11 @@ libclamav_la_SOURCES = \
450 450
 	yara_parser.h \
451 451
 	yara_clam.h \
452 452
 	msdoc.c \
453
-	msdoc.h
453
+	msdoc.h \
454
+	matcher-pcre.c \
455
+	matcher-pcre.h \
456
+	regex_pcre.c \
457
+	regex_pcre.h
454 458
 
455 459
 libclamav_la_SOURCES += bignum.h\
456 460
 	bignum_fast.h\
... ...
@@ -286,6 +286,7 @@ am_libclamav_la_OBJECTS = libclamav_la-matcher-ac.lo \
286 286
 	libclamav_la-hostid.lo libclamav_la-openioc.lo \
287 287
 	libclamav_la-yara_grammar.lo libclamav_la-yara_lexer.lo \
288 288
 	libclamav_la-yara_parser.lo libclamav_la-msdoc.lo \
289
+	libclamav_la-matcher-pcre.lo libclamav_la-regex_pcre.lo \
289 290
 	libclamav_la-fp_add.lo libclamav_la-fp_add_d.lo \
290 291
 	libclamav_la-fp_addmod.lo libclamav_la-fp_cmp.lo \
291 292
 	libclamav_la-fp_cmp_d.lo libclamav_la-fp_cmp_mag.lo \
... ...
@@ -915,6 +916,7 @@ libclamav_la_SOURCES = matcher-ac.c matcher-ac.h matcher-bm.c \
915 915
 	stats_json.c stats_json.h hostid.c hostid.h openioc.c \
916 916
 	openioc.h yara_grammar.y yara_lexer.l yara_lexer.h \
917 917
 	yara_parser.c yara_parser.h yara_clam.h msdoc.c msdoc.h \
918
+	matcher-pcre.c matcher-pcre.h regex_pcre.c regex_pcre.h \
918 919
 	bignum.h bignum_fast.h tomsfastmath/addsub/fp_add.c \
919 920
 	tomsfastmath/addsub/fp_add_d.c tomsfastmath/addsub/fp_addmod.c \
920 921
 	tomsfastmath/addsub/fp_cmp.c tomsfastmath/addsub/fp_cmp_d.c \
... ...
@@ -1271,6 +1273,7 @@ distclean-compile:
1271 1271
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-matcher-ac.Plo@am__quote@
1272 1272
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-matcher-bm.Plo@am__quote@
1273 1273
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-matcher-hash.Plo@am__quote@
1274
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-matcher-pcre.Plo@am__quote@
1274 1275
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-matcher.Plo@am__quote@
1275 1276
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-mbox.Plo@am__quote@
1276 1277
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-mbr.Plo@am__quote@
... ...
@@ -1300,6 +1303,7 @@ distclean-compile:
1300 1300
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-readdb.Plo@am__quote@
1301 1301
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-rebuildpe.Plo@am__quote@
1302 1302
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-regex_list.Plo@am__quote@
1303
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-regex_pcre.Plo@am__quote@
1303 1304
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-regex_suffix.Plo@am__quote@
1304 1305
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-rijndael.Plo@am__quote@
1305 1306
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-rtf.Plo@am__quote@
... ...
@@ -2356,6 +2360,20 @@ libclamav_la-msdoc.lo: msdoc.c
2356 2356
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
2357 2357
 @am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -c -o libclamav_la-msdoc.lo `test -f 'msdoc.c' || echo '$(srcdir)/'`msdoc.c
2358 2358
 
2359
+libclamav_la-matcher-pcre.lo: matcher-pcre.c
2360
+@am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -MT libclamav_la-matcher-pcre.lo -MD -MP -MF $(DEPDIR)/libclamav_la-matcher-pcre.Tpo -c -o libclamav_la-matcher-pcre.lo `test -f 'matcher-pcre.c' || echo '$(srcdir)/'`matcher-pcre.c
2361
+@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libclamav_la-matcher-pcre.Tpo $(DEPDIR)/libclamav_la-matcher-pcre.Plo
2362
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='matcher-pcre.c' object='libclamav_la-matcher-pcre.lo' libtool=yes @AMDEPBACKSLASH@
2363
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
2364
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -c -o libclamav_la-matcher-pcre.lo `test -f 'matcher-pcre.c' || echo '$(srcdir)/'`matcher-pcre.c
2365
+
2366
+libclamav_la-regex_pcre.lo: regex_pcre.c
2367
+@am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -MT libclamav_la-regex_pcre.lo -MD -MP -MF $(DEPDIR)/libclamav_la-regex_pcre.Tpo -c -o libclamav_la-regex_pcre.lo `test -f 'regex_pcre.c' || echo '$(srcdir)/'`regex_pcre.c
2368
+@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libclamav_la-regex_pcre.Tpo $(DEPDIR)/libclamav_la-regex_pcre.Plo
2369
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='regex_pcre.c' object='libclamav_la-regex_pcre.lo' libtool=yes @AMDEPBACKSLASH@
2370
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
2371
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -c -o libclamav_la-regex_pcre.lo `test -f 'regex_pcre.c' || echo '$(srcdir)/'`regex_pcre.c
2372
+
2359 2373
 libclamav_la-fp_add.lo: tomsfastmath/addsub/fp_add.c
2360 2374
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -MT libclamav_la-fp_add.lo -MD -MP -MF $(DEPDIR)/libclamav_la-fp_add.Tpo -c -o libclamav_la-fp_add.lo `test -f 'tomsfastmath/addsub/fp_add.c' || echo '$(srcdir)/'`tomsfastmath/addsub/fp_add.c
2361 2375
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libclamav_la-fp_add.Tpo $(DEPDIR)/libclamav_la-fp_add.Plo
... ...
@@ -1454,6 +1454,11 @@ void cli_sigperf_print()
1454 1454
     struct sigperf_elem stats[MAX_BC], *elem = stats;
1455 1455
     int i, elems = 0, max_name_len = 0, name_len;
1456 1456
 
1457
+    if (!g_sigid || !g_sigevents) {
1458
+        cli_warnmsg("cli_sigperf_print: statistics requested but no bytecodes were loaded!\n");
1459
+        return;
1460
+    }
1461
+
1457 1462
     memset(stats, 0, sizeof(stats));
1458 1463
     for (i=0;i<MAX_BC;i++) {
1459 1464
 	union ev_val val;
... ...
@@ -1479,6 +1484,8 @@ void cli_sigperf_print()
1479 1479
 	elem++;
1480 1480
 	elems++;
1481 1481
     }
1482
+    if (max_name_len < strlen("Bytecode name"))
1483
+        max_name_len = strlen("Bytecode name");
1482 1484
 
1483 1485
     cli_qsort(stats, elems, sizeof(struct sigperf_elem), sigelem_comp);
1484 1486
 
... ...
@@ -134,6 +134,7 @@ typedef enum {
134 134
 #define CL_DB_UNSIGNED	    0x10000 /* internal */
135 135
 #define CL_DB_BYTECODE_STATS 0x20000
136 136
 #define CL_DB_ENHANCED      0x40000
137
+#define CL_DB_PCRE_STATS    0x80000
137 138
 
138 139
 /* recommended db settings */
139 140
 #define CL_DB_STDOPT	    (CL_DB_PHISHING | CL_DB_PHISHING_URLS | CL_DB_BYTECODE)
... ...
@@ -233,7 +234,10 @@ enum cl_engine_field {
233 233
     CL_ENGINE_STATS_TIMEOUT,        /* uint32_t */
234 234
     CL_ENGINE_MAX_PARTITIONS,       /* uint32_t */
235 235
     CL_ENGINE_MAX_ICONSPE,          /* uint32_t */
236
-    CL_ENGINE_TIME_LIMIT            /* uint32_t */
236
+    CL_ENGINE_TIME_LIMIT,           /* uint32_t */
237
+    CL_ENGINE_PCRE_MATCH_LIMIT,     /* uint64_t */
238
+    CL_ENGINE_PCRE_RECMATCH_LIMIT,  /* uint64_t */
239
+    CL_ENGINE_PCRE_MAX_FILESIZE     /* uint64_t */
237 240
 };
238 241
 
239 242
 enum bytecode_security {
... ...
@@ -136,6 +136,10 @@ static struct dconf_module modules[] = {
136 136
     { "STATS",      "DISABLED",     DCONF_STATS_DISABLED,   0 },
137 137
     { "STATS",      "PESECTION DISABLED", DCONF_STATS_PE_SECTION_DISABLED, 0 },
138 138
 
139
+    { "PCRE",       "SUPPORT",      PCRE_CONF_SUPPORT,   1 },
140
+    { "PCRE",       "OPTIONS",      PCRE_CONF_OPTIONS,   1 },
141
+    { "PCRE",       "GLOBAL",       PCRE_CONF_GLOBAL,    1 },
142
+
139 143
     { NULL,     NULL,       0,              0 }
140 144
 };
141 145
 
... ...
@@ -189,6 +193,9 @@ struct cli_dconf *cli_dconf_init(void)
189 189
         } else if (!strcmp(modules[i].mname, "STATS")) {
190 190
             if (modules[i].state)
191 191
                 dconf->stats |= modules[i].bflag;
192
+        } else if (!strcmp(modules[i].mname, "PCRE")) {
193
+            if (modules[i].state)
194
+                dconf->pcre |= modules[i].bflag;
192 195
         }
193 196
     }
194 197
 
... ...
@@ -198,7 +205,7 @@ struct cli_dconf *cli_dconf_init(void)
198 198
 void cli_dconf_print(struct cli_dconf *dconf)
199 199
 {
200 200
     unsigned int pe = 0, elf = 0, macho = 0, arch = 0, doc = 0, mail = 0;
201
-    unsigned int other = 0, phishing = 0, i, bytecode=0, stats=0;
201
+    unsigned int other = 0, phishing = 0, i, bytecode=0, stats=0, pcre=0;
202 202
 
203 203
 
204 204
     cli_dbgmsg("Dynamic engine configuration settings:\n");
... ...
@@ -292,9 +299,28 @@ void cli_dconf_print(struct cli_dconf *dconf)
292 292
             }
293 293
 
294 294
             if (dconf->stats)
295
-                cli_dbgmsg("    * Submodule %10s:\t%s\n", modules[i].sname, (dconf->stats & modules[i].bflag) ? "On" : "** Off **");
295
+                cli_dbgmsg("   * Submodule %10s:\t%s\n", modules[i].sname, (dconf->stats & modules[i].bflag) ? "On" : "** Off **");
296 296
             else
297 297
                 continue;
298
+        } else if (!strcmp(modules[i].mname, "PCRE")) {
299
+#if HAVE_PCRE
300
+            if (!pcre) {
301
+                cli_dbgmsg("Module PCRE %s\n", dconf->pcre ? "On" : "Off");
302
+                pcre = 1;
303
+            }
304
+
305
+            if (dconf->pcre)
306
+                cli_dbgmsg("   * Submodule %10s:\t%s\n", modules[i].sname, (dconf->pcre & modules[i].bflag) ? "On" : "** Off **");
307
+            else
308
+                continue;
309
+#else
310
+            if (!pcre) {
311
+                cli_dbgmsg("Module PCRE Off\n");
312
+                pcre = 1;
313
+            }
314
+
315
+            continue;
316
+#endif
298 317
         }
299 318
     }
300 319
 }
... ...
@@ -437,6 +463,15 @@ int cli_dconf_load(FILE *fs, struct cl_engine *engine, unsigned int options, str
437 437
                 break;
438 438
             }
439 439
         }
440
+
441
+        if(!strncmp(buffer, "PCRE:", 5) && chkflevel(buffer, 2)) {
442
+            if(sscanf(buffer + 5, "0x%x", &val) == 1) {
443
+                engine->dconf->pcre = val;
444
+            } else {
445
+                ret = CL_EMALFDB;
446
+                break;
447
+            }
448
+        }
440 449
     }
441 450
 
442 451
     if(ret) {
... ...
@@ -42,6 +42,7 @@ struct cli_dconf {
42 42
     uint32_t phishing;
43 43
     uint32_t bytecode;
44 44
     uint32_t stats;
45
+    uint32_t pcre;
45 46
 };
46 47
 
47 48
 /* PE flags */
... ...
@@ -129,6 +130,11 @@ struct cli_dconf {
129 129
 #define DCONF_STATS_DISABLED            0x1
130 130
 #define DCONF_STATS_PE_SECTION_DISABLED 0x2
131 131
 
132
+/* PCRE flags */
133
+#define PCRE_CONF_SUPPORT 0x1
134
+#define PCRE_CONF_OPTIONS 0x2
135
+#define PCRE_CONF_GLOBAL  0x4
136
+
132 137
 #define BYTECODE_ENGINE_MASK (BYTECODE_INTERPRETER | BYTECODE_JIT_X86 | BYTECODE_JIT_PPC | BYTECODE_JIT_ARM)
133 138
 
134 139
 #ifdef USE_MPOOL
... ...
@@ -46,4 +46,9 @@
46 46
 
47 47
 #define CLI_DEFAULT_MAXPARTITIONS       50
48 48
 
49
+/* TODO - set better defaults */
50
+#define CLI_DEFAULT_PCRE_MATCH_LIMIT     10000
51
+#define CLI_DEFAULT_PCRE_RECMATCH_LIMIT  5000
52
+#define CLI_DEFAULT_PCRE_MAX_FILESIZE    26214400
53
+
49 54
 #endif
... ...
@@ -57,11 +57,14 @@ CLAMAV_PUBLIC {
57 57
     cl_finish_hash;
58 58
     cl_hash_destroy;
59 59
     cl_engine_stats_enable;
60
+    lsig_sub_matched;
60 61
 };
61 62
 CLAMAV_PRIVATE {
62 63
   global:
63 64
     cli_sigperf_print; 
64 65
     cli_sigperf_events_destroy; 
66
+    cli_pcre_perf_print;
67
+    cli_pcre_perf_events_destroy;
65 68
 
66 69
     cli_gettmpdir;
67 70
     cli_strtok;
... ...
@@ -1104,7 +1104,7 @@ inline static int ac_addtype(struct cli_matched_type **list, cli_file_t type, of
1104 1104
     return CL_SUCCESS;
1105 1105
 }
1106 1106
 
1107
-static inline void lsig_sub_matched(const struct cli_matcher *root, struct cli_ac_data *mdata, uint32_t lsigid1, uint32_t lsigid2, uint32_t realoff, int partial)
1107
+void lsig_sub_matched(const struct cli_matcher *root, struct cli_ac_data *mdata, uint32_t lsigid1, uint32_t lsigid2, uint32_t realoff, int partial)
1108 1108
 {
1109 1109
 	const struct cli_lsig_tdb *tdb = &root->ac_lsigtable[lsigid1]->tdb;
1110 1110
 
... ...
@@ -1359,9 +1359,9 @@ int cli_ac_scanbuff(const unsigned char *buffer, uint32_t length, const char **v
1359 1359
 				    if(res) {
1360 1360
 					newres = (struct cli_ac_result *) malloc(sizeof(struct cli_ac_result));
1361 1361
 					if(!newres) {
1362
-                        cli_errmsg("cli_ac_scanbuff: Can't allocate memory for newres %lu\n", sizeof(struct cli_ac_result));
1362
+					    cli_errmsg("cli_ac_scanbuff: Can't allocate memory for newres %lu\n", (unsigned long)sizeof(struct cli_ac_result));
1363 1363
 					    return CL_EMEM;
1364
-                    }
1364
+					}
1365 1365
 					newres->virname = pt->virname;
1366 1366
 					newres->customdata = pt->customdata;
1367 1367
 					newres->next = *res;
... ...
@@ -1412,9 +1412,9 @@ int cli_ac_scanbuff(const unsigned char *buffer, uint32_t length, const char **v
1412 1412
 				if(res) {
1413 1413
 				    newres = (struct cli_ac_result *) malloc(sizeof(struct cli_ac_result));
1414 1414
 				    if(!newres) {
1415
-                        cli_errmsg("cli_ac_scanbuff: Can't allocate memory for newres %lu\n", sizeof(struct cli_ac_result));
1416
-                        return CL_EMEM;
1417
-                    }
1415
+					cli_errmsg("cli_ac_scanbuff: Can't allocate memory for newres %lu\n", (unsigned long)sizeof(struct cli_ac_result));
1416
+					return CL_EMEM;
1417
+				    }
1418 1418
 				    newres->virname = pt->virname;
1419 1419
 				    newres->customdata = pt->customdata;
1420 1420
 				    newres->offset = realoff;
... ...
@@ -90,6 +90,7 @@ struct cli_ac_result {
90 90
 
91 91
 int cli_ac_addpatt(struct cli_matcher *root, struct cli_ac_patt *pattern);
92 92
 int cli_ac_initdata(struct cli_ac_data *data, uint32_t partsigs, uint32_t lsigs, uint32_t reloffsigs, uint8_t tracklen);
93
+void lsig_sub_matched(const struct cli_matcher *root, struct cli_ac_data *mdata, uint32_t lsigid1, uint32_t lsigid2, uint32_t realoff, int partial);
93 94
 void cli_ac_chkmacro(struct cli_matcher *root, struct cli_ac_data *data, unsigned lsigid1);
94 95
 int cli_ac_chklsig(const char *expr, const char *end, uint32_t *lsigcnt, unsigned int *cnt, uint64_t *ids, unsigned int parse_only);
95 96
 void cli_ac_freedata(struct cli_ac_data *data);
96 97
new file mode 100644
... ...
@@ -0,0 +1,792 @@
0
+/*
1
+ *  Support for matcher using PCRE
2
+ *
3
+ *  Copyright (C) 2007-2013 Sourcefire, Inc.
4
+ *  Copyright (C) 2014 Cisco Systems, Inc.
5
+ *  All Rights Reserved.
6
+ *
7
+ *  Authors: Kevin Lin
8
+ *
9
+ *  This program is free software; you can redistribute it and/or modify
10
+ *  it under the terms of the GNU General Public License version 2 as
11
+ *  published by the Free Software Foundation.
12
+ *
13
+ *  This program is distributed in the hope that it will be useful,
14
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
+ *  GNU General Public License for more details.
17
+ *
18
+ *  You should have received a copy of the GNU General Public License
19
+ *  along with this program; if not, write to the Free Software
20
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
21
+ *  MA 02110-1301, USA.
22
+ */
23
+
24
+#if HAVE_CONFIG_H
25
+#include "clamav-config.h"
26
+#endif
27
+
28
+#include "clamav.h"
29
+#include "cltypes.h"
30
+#include "dconf.h"
31
+#include "events.h"
32
+#include "others.h"
33
+#include "matcher.h"
34
+#include "matcher-ac.h"
35
+#include "matcher-pcre.h"
36
+#include "mpool.h"
37
+#include "regex_pcre.h"
38
+
39
+#if HAVE_PCRE
40
+/* DEBUGGING */
41
+//#define MATCHER_PCRE_DEBUG
42
+#ifdef MATCHER_PCRE_DEBUG
43
+#  define pm_dbgmsg(...) cli_dbgmsg( __VA_ARGS__)
44
+#else
45
+#  define pm_dbgmsg(...)
46
+#endif
47
+#undef MATCHER_PCRE_DEBUG
48
+
49
+/* PERFORMANCE MACROS AND FUNCTIONS */
50
+#define MAX_TRACKED_PCRE 64
51
+#define PCRE_EVENTS_PER_SIG 2
52
+#define MAX_PCRE_SIGEVENT_ID MAX_TRACKED_PCRE*PCRE_EVENTS_PER_SIG
53
+
54
+cli_events_t *p_sigevents = NULL;
55
+unsigned int p_sigid = 0;
56
+
57
+static void pcre_perf_events_init(struct cli_pcre_meta *pm, const char *virname)
58
+{
59
+    int ret;
60
+    size_t namelen = strlen(virname)+strlen(pm->pdata.expression)+3;
61
+
62
+    if (!p_sigevents) {
63
+        p_sigevents = cli_events_new(MAX_PCRE_SIGEVENT_ID);
64
+        if (!p_sigevents) {
65
+            cli_errmsg("pcre_perf: no memory for events table\n");
66
+            return;
67
+        }
68
+    }
69
+
70
+    if (p_sigid > MAX_PCRE_SIGEVENT_ID - PCRE_EVENTS_PER_SIG - 1) {
71
+        cli_errmsg("pcre_perf: events table full. Increase MAX_TRACKED_PCRE\n");
72
+        return;
73
+    }
74
+
75
+    if (!virname)
76
+        virname = "(null)";
77
+
78
+    /* set the name */
79
+    pm->statname = cli_calloc(1, namelen);
80
+    if (!pm->statname) {
81
+        return;
82
+    }
83
+    snprintf(pm->statname, namelen, "%s/%s/", virname, pm->pdata.expression);
84
+
85
+    pm_dbgmsg("pcre_perf: adding sig ids starting %u for %s\n", p_sigid, pm->statname);
86
+
87
+    /* register time event */
88
+    pm->sigtime_id = p_sigid;
89
+    ret = cli_event_define(p_sigevents, p_sigid++, pm->statname, ev_time, multiple_sum);
90
+    if (ret) {
91
+        cli_errmsg("pcre_perf: cli_event_define() error for time event id %d\n", pm->sigtime_id);
92
+        pm->sigtime_id = MAX_PCRE_SIGEVENT_ID+1;
93
+        return;
94
+    }
95
+
96
+    /* register match count */
97
+    pm->sigmatch_id = p_sigid;
98
+    ret = cli_event_define(p_sigevents, p_sigid++, pm->statname, ev_int, multiple_sum);
99
+    if (ret) {
100
+        cli_errmsg("pcre_perf: cli_event_define() error for matches event id %d\n", pm->sigmatch_id);
101
+        pm->sigmatch_id = MAX_PCRE_SIGEVENT_ID+1;
102
+        return;
103
+    }
104
+}
105
+
106
+struct sigperf_elem {
107
+    const char * name;
108
+    uint64_t usecs;
109
+    unsigned long run_count;
110
+    unsigned long match_count;
111
+};
112
+
113
+static int sigelem_comp(const void * a, const void * b)
114
+{
115
+    const struct sigperf_elem *ela = a;
116
+    const struct sigperf_elem *elb = b;
117
+    return elb->usecs/elb->run_count - ela->usecs/ela->run_count;
118
+}
119
+
120
+void cli_pcre_perf_print()
121
+{
122
+    struct sigperf_elem stats[MAX_TRACKED_PCRE], *elem = stats;
123
+    int i, elems = 0, max_name_len = 0, name_len;
124
+
125
+    if (!p_sigid || !p_sigevents) {
126
+        cli_warnmsg("cli_pcre_perf_print: statistics requested but no PCREs were loaded!\n");
127
+        return;
128
+    }
129
+
130
+    memset(stats, 0, sizeof(stats));
131
+    for (i=0;i<MAX_TRACKED_PCRE;i++) {
132
+        union ev_val val;
133
+        uint32_t count;
134
+        const char * name = cli_event_get_name(p_sigevents, i*PCRE_EVENTS_PER_SIG);
135
+        cli_event_get(p_sigevents, i*PCRE_EVENTS_PER_SIG, &val, &count);
136
+        if (!count) {
137
+            if (name)
138
+                cli_dbgmsg("No event triggered for %s\n", name);
139
+            continue;
140
+        }
141
+        if (name)
142
+            name_len = strlen(name);
143
+        else
144
+            name_len = 0;
145
+        if (name_len > max_name_len)
146
+            max_name_len = name_len;
147
+        elem->name = name?name:"\"noname\"";
148
+        elem->usecs = val.v_int;
149
+        elem->run_count = count;
150
+        cli_event_get(p_sigevents, i*PCRE_EVENTS_PER_SIG+1, &val, &count);
151
+        elem->match_count = count;
152
+        elem++;
153
+        elems++;
154
+    }
155
+    if (max_name_len < strlen("PCRE Expression"))
156
+        max_name_len = strlen("PCRE Expression");
157
+
158
+    cli_qsort(stats, elems, sizeof(struct sigperf_elem), sigelem_comp);
159
+
160
+    elem = stats;
161
+    /* name runs matches microsecs avg */
162
+    cli_infomsg (NULL, "%-*s %*s %*s %*s %*s\n", max_name_len, "PCRE Expression",
163
+                 8, "#runs", 8, "#matches", 12, "usecs total", 9, "usecs avg");
164
+    cli_infomsg (NULL, "%-*s %*s %*s %*s %*s\n", max_name_len, "===============",
165
+                 8, "=====", 8, "========", 12, "===========", 9, "=========");
166
+    while (elem->run_count) {
167
+        cli_infomsg (NULL, "%-*s %*lu %*lu %*llu %*.2f\n", max_name_len, elem->name,
168
+                     8, elem->run_count, 8, elem->match_count,
169
+                     12, elem->usecs, 9, (double)elem->usecs/elem->run_count);
170
+        elem++;
171
+    }
172
+}
173
+
174
+
175
+void cli_pcre_perf_events_destroy()
176
+{
177
+    cli_events_free(p_sigevents);
178
+    p_sigid = 0;
179
+}
180
+
181
+
182
+/* PCRE MATCHER FUNCTIONS */
183
+int cli_pcre_addpatt(struct cli_matcher *root, const char *virname, const char *trigger, const char *pattern, const char *cflags, const char *offset, const uint32_t *lsigid, unsigned int options)
184
+{
185
+    struct cli_pcre_meta **newmetatable = NULL, *pm = NULL;
186
+    uint32_t pcre_count;
187
+    const char *opt;
188
+    int ret = CL_SUCCESS, rssigs;
189
+
190
+    if (!root || !trigger || !pattern || !offset) {
191
+        cli_errmsg("cli_pcre_addpatt: NULL root or NULL trigger or NULL pattern or NULL offset\n");
192
+        return CL_ENULLARG;
193
+    }
194
+
195
+    /* TODO: trigger and regex checking (backreference limitations?) (control pattern limitations?) */
196
+    /* cli_ac_chklsig will fail a empty trigger; empty patterns can cause an infinite loop */
197
+    if (*trigger == '\0' || *pattern == '\0') {
198
+        cli_errmsg("cli_pcre_addpatt: trigger or pattern cannot be an empty string\n");
199
+        return CL_EMALFDB;
200
+    }
201
+    if (cflags && *cflags == '\0') {
202
+        cflags = NULL;
203
+    }
204
+
205
+    if (lsigid)
206
+        pm_dbgmsg("cli_pcre_addpatt: Adding /%s/%s%s triggered on (%s) as subsig %d for lsigid %d\n", 
207
+                  pattern, cflags ? " with flags " : "", cflags ? cflags : "", trigger, lsigid[1], lsigid[0]);
208
+    else
209
+        pm_dbgmsg("cli_pcre_addpatt: Adding /%s/%s%s triggered on (%s) [no lsigid]\n",
210
+                  pattern, cflags ? " with flags " : "", cflags ? cflags : "", trigger);
211
+
212
+#ifdef PCRE_BYPASS
213
+    /* check for trigger bypass */
214
+    if (strcmp(trigger, PCRE_BYPASS)) {
215
+#endif
216
+        /* validate the lsig trigger */
217
+        rssigs = cli_ac_chklsig(trigger, trigger + strlen(trigger), NULL, NULL, NULL, 1);
218
+        if(rssigs == -1) {
219
+            cli_errmsg("cli_pcre_addpatt: regex subsig /%s/ is missing a valid logical trigger\n", pattern);
220
+            return CL_EMALFDB;
221
+        }
222
+
223
+        if (lsigid) {
224
+            if (rssigs > lsigid[1]) {
225
+                cli_errmsg("cli_pcre_addpatt: regex subsig %d logical trigger refers to subsequent subsig %d\n", lsigid[1], rssigs);
226
+                return CL_EMALFDB;
227
+            }
228
+            if (rssigs == lsigid[1]) {
229
+                cli_errmsg("cli_pcre_addpatt: regex subsig %d logical trigger is self-referential\n", lsigid[1]);
230
+                return CL_EMALFDB;
231
+            }
232
+        }
233
+        else {
234
+            cli_dbgmsg("cli_pcre_addpatt: regex subsig is missing lsigid data\n");
235
+        }
236
+#ifdef PCRE_BYPASS
237
+    }
238
+#endif
239
+
240
+    /* allocating entries */
241
+    pm = (struct cli_pcre_meta *)mpool_calloc(root->mempool, 1, sizeof(*pm));
242
+    if (!pm) {
243
+        cli_errmsg("cli_pcre_addpatt: Unable to allocate memory for new pcre meta\n");
244
+        return CL_EMEM;
245
+    }
246
+
247
+    pm->trigger = strdup(trigger);
248
+    if (!pm->trigger) {
249
+        cli_errmsg("cli_pcre_addpatt: Unable to allocate memory for trigger string\n");
250
+        cli_pcre_freemeta(pm);
251
+        mpool_free(root->mempool, pm);
252
+        return CL_EMEM;
253
+    }
254
+
255
+    pm->virname = (char *)cli_virname(virname, options & CL_DB_OFFICIAL);
256
+    if(!pm->virname) {
257
+        cli_errmsg("cli_pcre_addpatt: Unable to allocate memory for virname or NULL virname\n");
258
+        cli_pcre_freemeta(pm);
259
+        mpool_free(root->mempool, pm);
260
+        return CL_EMEM;
261
+    }
262
+
263
+    if (lsigid) {
264
+        root->ac_lsigtable[lsigid[0]]->virname = pm->virname;
265
+
266
+        pm->lsigid[0] = 1;
267
+        pm->lsigid[1] = lsigid[0];
268
+        pm->lsigid[2] = lsigid[1];
269
+    }
270
+    else {
271
+        /* sigtool */
272
+        pm->lsigid[0] = 0;
273
+    }
274
+
275
+    pm->pdata.expression = strdup(pattern);
276
+    if (!pm->pdata.expression) {
277
+        cli_errmsg("cli_pcre_addpatt: Unable to allocate memory for expression\n");
278
+        cli_pcre_freemeta(pm);
279
+        mpool_free(root->mempool, pm);
280
+        return CL_EMEM;
281
+    }
282
+
283
+    /* offset parsing and usage, similar to cli_ac_addsig */
284
+    /* relative and type-specific offsets handled during scan */
285
+    ret = cli_caloff(offset, NULL, root->type, pm->offdata, &(pm->offset_min), &(pm->offset_max));
286
+    if (ret != CL_SUCCESS) {
287
+        cli_errmsg("cli_pcre_addpatt: cannot calculate offset data: %s for pattern: %s\n", offset, pattern);
288
+        cli_pcre_freemeta(pm);
289
+        mpool_free(root->mempool, pm);
290
+        return ret;
291
+    }
292
+    if(pm->offdata[0] != CLI_OFF_ANY) {
293
+        if(pm->offdata[0] == CLI_OFF_ABSOLUTE)
294
+            root->pcre_absoff_num++;
295
+        else
296
+            root->pcre_reloff_num++;
297
+    }
298
+
299
+    /* parse and add options, also totally not from snort */
300
+    if (cflags) {
301
+        opt = cflags;
302
+
303
+        /* cli_pcre_addoptions handles pcre specific options */
304
+        while (cli_pcre_addoptions(&(pm->pdata), &opt, 0) != CL_SUCCESS) {
305
+            /* handle matcher specific options here */
306
+            switch (*opt) {
307
+            case 'g':  pm->flags |= CLI_PCRE_GLOBAL;            break;
308
+            case 'r':  pm->flags |= CLI_PCRE_ROLLING;           break;
309
+            case 'e':  pm->flags |= CLI_PCRE_ENCOMPASS;         break;
310
+            default:
311
+                cli_errmsg("cli_pcre_addpatt: unknown/extra pcre option encountered %c\n", *opt);
312
+                cli_pcre_freemeta(pm);
313
+                mpool_free(root->mempool, pm);
314
+                return CL_EMALFDB;
315
+            }
316
+            opt++;
317
+        }
318
+
319
+        if (pm->flags) {
320
+            pm_dbgmsg("Matcher:  %s%s%s\n",
321
+                      pm->flags & CLI_PCRE_GLOBAL ? "CLAMAV_GLOBAL " : "",
322
+                      pm->flags & CLI_PCRE_ROLLING ? "CLAMAV_ROLLING " : "",
323
+                      pm->flags & CLI_PCRE_ENCOMPASS ? "CLAMAV_ENCOMPASS " : "");
324
+        }
325
+        else
326
+            pm_dbgmsg("Matcher:  NONE\n");
327
+
328
+        if (pm->pdata.options) {
329
+            pm_dbgmsg("Compiler: %s%s%s%s%s%s%s\n",
330
+                      pm->pdata.options & PCRE_CASELESS ? "PCRE_CASELESS " : "",
331
+                      pm->pdata.options & PCRE_DOTALL ? "PCRE_DOTALL " : "",
332
+                      pm->pdata.options & PCRE_MULTILINE ? "PCRE_MULTILINE " : "",
333
+                      pm->pdata.options & PCRE_EXTENDED ? "PCRE_EXTENDED " : "",
334
+
335
+                      pm->pdata.options & PCRE_ANCHORED ? "PCRE_ANCHORED " : "",
336
+                      pm->pdata.options & PCRE_DOLLAR_ENDONLY ? "PCRE_DOLLAR_ENDONLY " : "",
337
+                      pm->pdata.options & PCRE_UNGREEDY ? "PCRE_UNGREEDY " : "");
338
+        }
339
+        else
340
+            pm_dbgmsg("Compiler: NONE\n");
341
+    }
342
+
343
+    /* add metadata to the performance tracker */
344
+    if (options & CL_DB_PCRE_STATS)
345
+        pcre_perf_events_init(pm, virname);
346
+
347
+    /* add pcre data to root after reallocation */
348
+    pcre_count = root->pcre_metas+1;
349
+    newmetatable = (struct cli_pcre_meta **)mpool_realloc(root->mempool, root->pcre_metatable,
350
+                                         pcre_count * sizeof(struct cli_pcre_meta *));
351
+    if (!newmetatable) {
352
+        cli_errmsg("cli_pcre_addpatt: Unable to allocate memory for new pcre meta table\n");
353
+        cli_pcre_freemeta(pm);
354
+        mpool_free(root->mempool, pm);
355
+        return CL_EMEM;
356
+    }
357
+
358
+    newmetatable[pcre_count-1] = pm;
359
+    root->pcre_metatable = newmetatable;
360
+
361
+    root->pcre_metas = pcre_count;
362
+
363
+    return CL_SUCCESS;
364
+}
365
+
366
+int cli_pcre_build(struct cli_matcher *root, long long unsigned match_limit, long long unsigned recmatch_limit, const struct cli_dconf *dconf)
367
+{
368
+    unsigned int i;
369
+    int ret;
370
+    struct cli_pcre_meta *pm = NULL;
371
+    int disable_all = 0;
372
+
373
+    if (dconf && !(dconf->pcre & PCRE_CONF_SUPPORT))
374
+        disable_all = 1;
375
+
376
+    for (i = 0; i < root->pcre_metas; ++i) {
377
+        pm = root->pcre_metatable[i];
378
+        if (!pm) {
379
+            cli_errmsg("cli_pcre_build: metadata for pcre %d is missing\n", i);
380
+            return CL_ENULLARG;
381
+        }
382
+
383
+        /* for safety, disable all pcre */
384
+        if (disable_all) {
385
+            pm->flags |= CLI_PCRE_DISABLED;
386
+            continue;
387
+        }
388
+
389
+        if (pm->flags & CLI_PCRE_DISABLED) {
390
+            cli_dbgmsg("cli_pcre_build: Skip compiling regex: %s (disabled)\n", pm->pdata.expression);
391
+            continue;
392
+        }
393
+
394
+        /* disable global */
395
+        if (dconf && !(dconf->pcre & PCRE_CONF_GLOBAL)) {
396
+            cli_dbgmsg("cli_pcre_build: disabling global option for regex /%s/\n", pm->pdata.expression);
397
+            pm->flags &= ~(CLI_PCRE_GLOBAL);
398
+        }
399
+
400
+        /* options override through metadata manipulation */
401
+#ifdef PCRE_NEVER_UTF
402
+        pm->pdata.options |= PCRE_NEVER_UTF; /* implemented in 8.33, disables (?UTF*) potential security vuln */
403
+#endif
404
+        //pm->pdata.options |= PCRE_UCP;/* implemented in 8.20 */
405
+        //pm->pdata.options |= PCRE_AUTO_CALLOUT; /* used with CALLOUT(-BACK) function */
406
+
407
+        if (dconf && (dconf->pcre & PCRE_CONF_OPTIONS)) {
408
+            /* compile the regex, no options override *wink* */
409
+            pm_dbgmsg("cli_pcre_build: Compiling regex: /%s/\n", pm->pdata.expression);
410
+            ret = cli_pcre_compile(&(pm->pdata), match_limit, recmatch_limit, 0, 0);
411
+        }
412
+        else {
413
+            /* compile the regex, options overrided and disabled */
414
+            pm_dbgmsg("cli_pcre_build: Compiling regex: /%s/ (without options)\n", pm->pdata.expression);
415
+            ret = cli_pcre_compile(&(pm->pdata), match_limit, recmatch_limit, 0, 1);
416
+        }
417
+        if (ret != CL_SUCCESS) {
418
+            cli_errmsg("cli_pcre_build: failed to build pcre regex\n");
419
+            pm->flags |= CLI_PCRE_DISABLED; /* disable the pcre, currently will terminate execution */
420
+            return ret;
421
+        }
422
+    }
423
+
424
+    return CL_SUCCESS;
425
+}
426
+
427
+/* TODO - handle VI and Macro offset types */
428
+int cli_pcre_recaloff(struct cli_matcher *root, struct cli_pcre_off *data, struct cli_target_info *info, cli_ctx *ctx)
429
+{
430
+    /* TANGENT: maintain relative offset data in cli_ac_data? */
431
+    int ret;
432
+    unsigned int i;
433
+    struct cli_pcre_meta *pm;
434
+    uint32_t endoff;
435
+
436
+    if (!data) {
437
+        return CL_ENULLARG;
438
+    }
439
+
440
+    if (!root || !root->pcre_metatable || !info || (ctx && ctx->dconf && !(ctx->dconf->pcre & PCRE_CONF_SUPPORT))) {
441
+        data->shift = NULL;
442
+        data->offset = NULL;
443
+        return CL_SUCCESS;
444
+    }
445
+
446
+    /* allocate data structures */
447
+    data->shift = (uint32_t *) cli_calloc(root->pcre_metas, sizeof(uint32_t));
448
+    if (!data->shift) {
449
+        cli_errmsg("cli_pcre_initoff: cannot allocate memory for data->shift\n");
450
+        return CL_EMEM;
451
+    }
452
+    data->offset = (uint32_t *) cli_calloc(root->pcre_metas, sizeof(uint32_t));
453
+    if (!data->offset) {
454
+        cli_errmsg("cli_pcre_initoff: cannot allocate memory for data->offset\n");
455
+        free(data->shift);
456
+        return CL_EMEM;
457
+    }
458
+
459
+    pm_dbgmsg("CLI_OFF_NONE: %u\n", CLI_OFF_NONE);
460
+    pm_dbgmsg("CLI_OFF_ANY: %u\n", CLI_OFF_ANY);
461
+
462
+    /* iterate across all pcre metadata and recalc offsets */
463
+    for (i = 0; i < root->pcre_metas; ++i) {
464
+        pm = root->pcre_metatable[i];
465
+
466
+        /* skip broken pcres, not getting executed anyways */
467
+        if (pm->flags & CLI_PCRE_DISABLED) {
468
+            data->offset[i] = CLI_OFF_NONE;
469
+            data->shift[i] = 0;
470
+            continue;
471
+        }
472
+
473
+        if (pm->offdata[0] == CLI_OFF_ANY) {
474
+            data->offset[i] = CLI_OFF_ANY;
475
+            data->shift[i] = 0;
476
+        }
477
+        else if (pm->offdata[0] == CLI_OFF_NONE) {
478
+            data->offset[i] = CLI_OFF_NONE;
479
+            data->shift[i] = 0;
480
+        }
481
+        else if (pm->offdata[0] == CLI_OFF_ABSOLUTE) {
482
+            data->offset[i] = pm->offdata[1];
483
+            data->shift[i] = pm->offdata[2];
484
+        }
485
+        else {
486
+            ret = cli_caloff(NULL, info, root->type, pm->offdata, &data->offset[i], &endoff);
487
+            if (ret != CL_SUCCESS) {
488
+                cli_errmsg("cli_pcre_recaloff: cannot recalculate relative offset for signature\n");
489
+                free(data->shift);
490
+                free(data->offset);
491
+                return ret;
492
+            }
493
+            /* CLI_OFF_NONE gets passed down, CLI_OFF_ANY gets reinterpreted */
494
+            /* TODO - CLI_OFF_VERSION is interpreted as CLI_OFF_ANY(?) */
495
+            if (data->offset[i] == CLI_OFF_ANY) {
496
+                data->offset[i] = CLI_OFF_ANY;
497
+                data->shift[i] = 0;
498
+            }
499
+            else {
500
+                data->shift[i] = endoff-(data->offset[i]);
501
+            }
502
+        }
503
+
504
+        pm_dbgmsg("%u: %u %u->%u(+%u)\n", i, pm->offdata[0], data->offset[i],
505
+                  data->offset[i]+data->shift[i], data->shift[i]);
506
+    }
507
+
508
+    return CL_SUCCESS;
509
+}
510
+
511
+void cli_pcre_freeoff(struct cli_pcre_off *data)
512
+{
513
+    if (data) {
514
+        free(data->offset);
515
+        data->offset = NULL;
516
+        free(data->shift);
517
+        data->shift = NULL;
518
+    }
519
+}
520
+
521
+int cli_pcre_qoff(struct cli_pcre_meta *pm, uint32_t length, uint32_t *adjbuffer, uint32_t *adjshift)
522
+{
523
+    if (!pm)
524
+        return CL_ENULLARG;
525
+
526
+    /* default to scanning whole buffer but try to use existing offdata */
527
+    if (pm->offdata[0] == CLI_OFF_NONE) {
528
+        return CL_BREAK;
529
+    }
530
+    else if (pm->offdata[0] == CLI_OFF_ANY) {
531
+        *adjbuffer = CLI_OFF_ANY;
532
+        *adjshift = 0;
533
+    }
534
+    else if (pm->offdata[0] == CLI_OFF_ABSOLUTE) {
535
+        *adjbuffer = pm->offdata[1];
536
+        *adjshift = pm->offdata[2];
537
+    }
538
+    else if (pm->offdata[0] == CLI_OFF_EOF_MINUS) {
539
+        *adjbuffer = length - pm->offdata[1];
540
+        *adjshift = pm->offdata[2];
541
+    }
542
+    else {
543
+        /* all relative offsets */
544
+        /* TODO - check if relative offsets apply for normal hex substrs */
545
+        *adjbuffer = 0;
546
+        *adjshift = 0;
547
+    }
548
+
549
+    return CL_SUCCESS;
550
+}
551
+
552
+int cli_pcre_scanbuf(const unsigned char *buffer, uint32_t length, const struct cli_matcher *root, struct cli_ac_data *mdata, struct cli_ac_result **res, const struct cli_pcre_off *data, cli_ctx *ctx)
553
+{
554
+    struct cli_pcre_meta **metatable = root->pcre_metatable, *pm = NULL;
555
+    struct cli_pcre_data *pd;
556
+    struct cli_ac_result *newres;
557
+    uint32_t adjbuffer, adjshift, adjlength;
558
+    unsigned int i, evalcnt;
559
+    uint64_t evalids, maxfilesize;
560
+    uint32_t global, encompass, rolling;
561
+    int rc, offset, options=0, ovector[OVECCOUNT];
562
+
563
+    if ((!root->pcre_metatable) || (ctx && ctx->dconf && !(ctx->dconf->pcre & PCRE_CONF_SUPPORT))) {
564
+        return CL_SUCCESS;
565
+    }
566
+
567
+    /* NOTE: moved pcre maxfilesize limit check to caller [matcher_run] */
568
+
569
+    for (i = 0; i < root->pcre_metas; ++i) {
570
+        pm = root->pcre_metatable[i];
571
+        pd = &(pm->pdata);
572
+
573
+        /* skip checking and running disabled pcres */
574
+        if (pm->flags & CLI_PCRE_DISABLED) {
575
+            cli_dbgmsg("cli_pcre_scanbuf: skipping disabled regex /%s/\n", pd->expression);
576
+            continue;
577
+        }
578
+
579
+        /* skip checking and running CLI_OFF_NONE pcres */
580
+        if (data && data->offset[i] == CLI_OFF_NONE) {
581
+            pm_dbgmsg("cli_pcre_scanbuf: skipping CLI_OFF_NONE regex /%s/\n", pd->expression);
582
+            continue;
583
+        }
584
+
585
+        /* evaluate trigger */
586
+        if (pm->lsigid[0]) {
587
+            cli_dbgmsg("cli_pcre_scanbuf: checking %s; running regex /%s/\n", pm->trigger, pd->expression);
588
+#ifdef PCRE_BYPASS
589
+            if (strcmp(pm->trigger, PCRE_BYPASS))
590
+#endif
591
+                if (cli_ac_chklsig(pm->trigger, pm->trigger + strlen(pm->trigger), mdata->lsigcnt[pm->lsigid[1]], &evalcnt, &evalids, 0) != 1)
592
+                    continue;
593
+        }
594
+        else {
595
+            cli_dbgmsg("cli_pcre_scanbuf: skipping %s check due to unintialized lsigid\n", pm->trigger);
596
+            /* fall-through to unconditional execution - sigtool-only */
597
+        }
598
+
599
+        global = (pm->flags & CLI_PCRE_GLOBAL);       /* globally search for all matches (within bounds) */
600
+        encompass = (pm->flags & CLI_PCRE_ENCOMPASS); /* encompass search to offset->offset+maxshift */
601
+        rolling = (pm->flags & CLI_PCRE_ROLLING);     /* rolling search (unanchored) */
602
+        offset = pd->search_offset;                   /* this is usually 0 */
603
+
604
+        cli_dbgmsg("cli_pcre_scanbuf: triggered %s; running regex /%s/%s%s\n", pm->trigger, pd->expression, 
605
+                   global ? " (global)":"", rolling ? " (rolling)":"");
606
+
607
+        /* adjust the buffer sent to cli_pcre_match for offset and maxshift */
608
+        if (!data) {
609
+            if (cli_pcre_qoff(pm, length, &adjbuffer, &adjshift) != CL_SUCCESS)
610
+                continue;
611
+        }
612
+        else {
613
+            adjbuffer = data->offset[i];
614
+            adjshift = data->shift[i];
615
+        }
616
+
617
+        /* check for need to anchoring */
618
+        if (!rolling && !adjshift && (adjbuffer != CLI_OFF_ANY))
619
+            options |= PCRE_ANCHORED;
620
+        else
621
+            options = 0;
622
+
623
+        if (adjbuffer == CLI_OFF_ANY)
624
+            adjbuffer = 0;
625
+
626
+        /* check the offset bounds */
627
+        if (adjbuffer < length) {
628
+            /* handle encompass flag */
629
+            if (encompass && adjshift != 0 && adjshift != CLI_OFF_NONE) {
630
+                    if (adjbuffer+adjshift > length)
631
+                        adjlength = length - adjbuffer;
632
+                    else
633
+                        adjlength = adjshift;
634
+            }
635
+            else {
636
+                /* NOTE - if using non-encompass method 2, alter shift universally */
637
+                /* TODO - limitations on non-encompassed buffers? */
638
+                adjlength = length - adjbuffer;
639
+            }
640
+        }
641
+        else {
642
+            /* starting offset is outside bounds of file, skip pcre execution silently */
643
+            pm_dbgmsg("cli_pcre_scanbuf: starting offset is outside bounds of file %u >= %u\n", adjbuffer, length);
644
+            continue;
645
+        }
646
+
647
+        pm_dbgmsg("cli_pcre_scanbuf: passed buffer adjusted to %u +%u(%u)[%u]%s\n", adjbuffer, adjlength, adjbuffer+adjlength, adjshift, encompass ? " (encompass)":"");
648
+
649
+        /* if the global flag is set, loop through the scanning */
650
+        do {
651
+            /* performance metrics */
652
+            cli_event_time_start(p_sigevents, pm->sigtime_id);
653
+            rc = cli_pcre_match(pd, buffer+adjbuffer, adjlength, offset, options, ovector, OVECCOUNT);
654
+            cli_event_time_stop(p_sigevents, pm->sigtime_id);
655
+            /* if debug, generate a match report */
656
+            if (cli_debug_flag)
657
+                cli_pcre_report(pd, buffer+adjbuffer, adjlength, rc, ovector, OVECCOUNT);
658
+
659
+            /* matched, rc shouldn't be >0 unless a full match occurs */
660
+            if (rc > 0) {
661
+                /* check if we've gone over offset+shift */
662
+                if (!encompass && adjshift) {
663
+                    if (ovector[0] > adjshift) {
664
+                        /* ignore matched offset (outside of maxshift) */
665
+                        cli_dbgmsg("cli_pcre_scanbuf: match found outside of maxshift @%u\n", adjbuffer+ovector[0]);
666
+                        break;
667
+                    }
668
+                }
669
+
670
+                /* track the detection count */
671
+                cli_event_count(p_sigevents, pm->sigmatch_id);
672
+
673
+                /* for logical signature evaluation */
674
+                if (pm->lsigid[0]) {
675
+                    pm_dbgmsg("cli_pcre_scanbuf: assigning lsigcnt[%d][%d], located @ %d\n",
676
+                              pm->lsigid[1], pm->lsigid[2], adjbuffer+ovector[0]);
677
+
678
+                    lsig_sub_matched(root, mdata, pm->lsigid[1], pm->lsigid[2], adjbuffer+ovector[0], 0);
679
+                }
680
+
681
+                cli_dbgmsg("cli_pcre_scanbuf: located regex match @ %d\n", adjbuffer+ovector[0]);
682
+
683
+                /* for raw match data - sigtool only */
684
+                if(res) {
685
+                    newres = (struct cli_ac_result *) malloc(sizeof(struct cli_ac_result));
686
+                    if(!newres) {
687
+                        cli_errmsg("cli_pcre_scanbuff: Can't allocate memory for newres %u\n", sizeof(struct cli_ac_result));
688
+                        return CL_EMEM;
689
+                    }
690
+                    newres->virname = NULL;    /* get value? */
691
+                    newres->customdata = NULL; /* get value? */
692
+                    newres->next = *res;
693
+                    newres->offset = adjbuffer+ovector[0];
694
+                    *res = newres;
695
+                }
696
+            }
697
+
698
+            /* move off to the end of the match for next match; offset is relative to adjbuffer
699
+             * NOTE: misses matches starting within the last match; TODO: start from start of last match? */
700
+            offset = ovector[1];
701
+
702
+            /* clear the ovector results (they fall through the pcre_match) */
703
+            memset(ovector, 0, sizeof(ovector));
704
+        } while (global && rc > 0 && offset < adjlength);
705
+
706
+        /* handle error codes */
707
+        if (rc < 0 && rc != PCRE_ERROR_NOMATCH) {
708
+            switch (rc) {
709
+            case PCRE_ERROR_CALLOUT:
710
+                break;
711
+            case PCRE_ERROR_NOMEMORY:
712
+                cli_errmsg("cli_pcre_scanbuf: cli_pcre_match: pcre_exec: out of memory\n");
713
+                return CL_EMEM;
714
+            case PCRE_ERROR_MATCHLIMIT:
715
+                cli_dbgmsg("cli_pcre_scanbuf: cli_pcre_match: pcre_exec: match limit exceeded\n");
716
+                break;
717
+            case PCRE_ERROR_RECURSIONLIMIT:
718
+                cli_dbgmsg("cli_pcre_scanbuf: cli_pcre_match: pcre_exec: recursive limit exceeded\n");
719
+                break;
720
+            default:
721
+                cli_errmsg("cli_pcre_scanbuf: cli_pcre_match: pcre_exec: returned error %d\n", rc);
722
+                return CL_BREAK;
723
+            }
724
+        }
725
+    }
726
+
727
+    return CL_SUCCESS;
728
+}
729
+
730
+void cli_pcre_freemeta(struct cli_pcre_meta *pm)
731
+{
732
+    if (!pm)
733
+        return;
734
+
735
+    if (pm->trigger) {
736
+        free(pm->trigger);
737
+        pm->trigger = NULL;
738
+    }
739
+
740
+    if (pm->virname) {
741
+        free(pm->virname);
742
+        pm->virname = NULL;
743
+    }
744
+
745
+    if (pm->statname) {
746
+        free(pm->statname);
747
+        pm->statname = NULL;
748
+    }
749
+
750
+    cli_pcre_free_single(&(pm->pdata));
751
+}
752
+
753
+void cli_pcre_freetable(struct cli_matcher *root)
754
+{
755
+    uint32_t i;
756
+    struct cli_pcre_meta *pm = NULL;
757
+
758
+    for (i = 0; i < root->pcre_metas; ++i) {
759
+        /* free pcre meta */
760
+        pm = root->pcre_metatable[i];
761
+        cli_pcre_freemeta(pm);
762
+        mpool_free(root->mempool, pm);
763
+    }
764
+
765
+    /* free holding structures and set count to zero */
766
+    mpool_free(root->mempool, root->pcre_metatable);
767
+    root->pcre_metatable = NULL;
768
+    root->pcre_metas = 0;
769
+}
770
+
771
+#else
772
+/* NO-PCRE FUNCTIONS */
773
+int cli_pcre_recaloff(struct cli_matcher *root, struct cli_pcre_off *data, struct cli_target_info *info, cli_ctx *ctx)
774
+{
775
+    UNUSEDPARAM(root);
776
+    UNUSEDPARAM(info);
777
+    UNUSEDPARAM(ctx);
778
+    if (data) {
779
+        data->offset = NULL;
780
+        data->shift = NULL;
781
+    }
782
+    return CL_SUCCESS;
783
+}
784
+
785
+void cli_pcre_freeoff(struct cli_pcre_off *data)
786
+{
787
+    UNUSEDPARAM(data);
788
+    return;
789
+}
790
+
791
+#endif /* HAVE_PCRE */
0 792
new file mode 100644
... ...
@@ -0,0 +1,88 @@
0
+/*
1
+ *  Support for matcher using PCRE
2
+ *
3
+ *  Copyright (C) 2007-2013 Sourcefire, Inc.
4
+ *  Copyright (C) 2014 Cisco Systems, Inc.
5
+ *  All Rights Reserved.
6
+ *
7
+ *  Authors: Kevin Lin
8
+ *
9
+ *  This program is free software; you can redistribute it and/or modify
10
+ *  it under the terms of the GNU General Public License version 2 as
11
+ *  published by the Free Software Foundation.
12
+ *
13
+ *  This program is distributed in the hope that it will be useful,
14
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
+ *  GNU General Public License for more details.
17
+ *
18
+ *  You should have received a copy of the GNU General Public License
19
+ *  along with this program; if not, write to the Free Software
20
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
21
+ *  MA 02110-1301, USA.
22
+ */
23
+
24
+#ifndef __MATCHER_PCRE_H
25
+#define __MATCHER_PCRE_H
26
+
27
+#if HAVE_CONFIG_H
28
+#include "clamav-config.h"
29
+#endif
30
+
31
+#include <sys/types.h>
32
+
33
+#include "cltypes.h"
34
+#include "dconf.h"
35
+#include "mpool.h"
36
+#include "regex_pcre.h"
37
+
38
+#define PNRE_SCAN_NONE 0
39
+#define PCRE_SCAN_BUFF 1
40
+#define PCRE_SCAN_FMAP 2
41
+
42
+/* stores offset data */
43
+struct cli_pcre_off {
44
+    uint32_t *offset, *shift;
45
+};
46
+
47
+#if HAVE_PCRE
48
+#define PCRE_BYPASS "7374756c747a676574737265676578"
49
+#define CLI_PCRE_GLOBAL    0x00000001 /* g */
50
+#define CLI_PCRE_ENCOMPASS 0x00000002 /* e */
51
+#define CLI_PCRE_ROLLING   0x00000004 /* r */
52
+
53
+#define CLI_PCRE_DISABLED  0x80000000 /* used for dconf or fail to build */
54
+
55
+struct cli_pcre_meta {
56
+    char *trigger;
57
+    char *virname;
58
+    uint32_t lsigid[3]; /* 0=valid, 1=lsigid, 2=subsigid */
59
+    struct cli_pcre_data pdata;
60
+    /* clamav offset data */
61
+    uint32_t offdata[4];
62
+    uint32_t offset_min, offset_max;
63
+    /* internal flags (bitfield?) */
64
+    uint32_t flags;
65
+    /* performance tracking */
66
+    char *statname; /* freed by us, not cli_events_free */
67
+    uint32_t sigtime_id, sigmatch_id;
68
+};
69
+
70
+/* PCRE PERFORMANCE DECLARATIONS */
71
+void cli_pcre_perf_print();
72
+void cli_pcre_perf_events_destroy();
73
+
74
+/* PCRE MATCHER DECLARATIONS */
75
+int cli_pcre_addpatt(struct cli_matcher *root, const char *virname, const char *trigger,  const char *pattern, const char *cflags, const char *offset, const uint32_t *lsigid, unsigned int options);
76
+int cli_pcre_build(struct cli_matcher *root, long long unsigned match_limit, long long unsigned recmatch_limit, const struct cli_dconf *dconf);
77
+int cli_pcre_recaloff(struct cli_matcher *root, struct cli_pcre_off *data, struct cli_target_info *info, cli_ctx *ctx);
78
+void cli_pcre_freeoff(struct cli_pcre_off *data);
79
+int cli_pcre_scanbuf(const unsigned char *buffer, uint32_t length, const struct cli_matcher *root, struct cli_ac_data *mdata,  struct cli_ac_result **res, const struct cli_pcre_off *data, cli_ctx *ctx);
80
+void cli_pcre_freemeta(struct cli_pcre_meta *pm);
81
+void cli_pcre_freetable(struct cli_matcher *root);
82
+#else
83
+/* NO-PCRE DECLARATIONS - defined because encasing everything in '#if' is a pain */
84
+int cli_pcre_recaloff(struct cli_matcher *root, struct cli_pcre_off *data, struct cli_target_info *info, cli_ctx *ctx);
85
+void cli_pcre_freeoff(struct cli_pcre_off *data);
86
+#endif /* HAVE_PCRE */
87
+#endif /*__MATCHER_PCRE_H*/
... ...
@@ -1,5 +1,7 @@
1 1
 /*
2 2
  *  Copyright (C) 2007-2013 Sourcefire, Inc.
3
+ *  Copyright (C) 2014 Cisco Systems, Inc
4
+ *  All Rights Reserved.
3 5
  *
4 6
  *  Authors: Tomasz Kojm
5 7
  *
... ...
@@ -34,6 +36,7 @@
34 34
 #include "others.h"
35 35
 #include "matcher-ac.h"
36 36
 #include "matcher-bm.h"
37
+#include "matcher-pcre.h"
37 38
 #include "filetypes.h"
38 39
 #include "matcher.h"
39 40
 #include "pe.h"
... ...
@@ -95,21 +98,21 @@ static inline int matcher_run(const struct cli_matcher *root,
95 95
 			      cli_file_t ftype,
96 96
 			      struct cli_matched_type **ftoffset,
97 97
 			      unsigned int acmode,
98
+                              unsigned int pcremode,
98 99
 			      struct cli_ac_result **acres,
99 100
 			      fmap_t *map,
100 101
 			      struct cli_bm_off *offdata,
102
+			      struct cli_pcre_off *poffdata,
101 103
 			      uint32_t *viroffset,
102 104
 			      cli_ctx *ctx)
103 105
 {
104
-    int ret;
106
+    int ret, tmp;
105 107
     int32_t pos = 0;
106 108
     struct filter_match_info info;
107 109
     uint32_t orig_length, orig_offset;
108 110
     const unsigned char* orig_buffer;
109 111
     unsigned int viruses_found = 0;
110 112
 
111
-    UNUSEDPARAM(map);
112
-
113 113
     if (root->filter) {
114 114
 	if(filter_search_ext(root->filter, buffer, length, &info) == -1) {
115 115
 	    /*  for safety always scan last maxpatlen bytes */
... ...
@@ -161,6 +164,58 @@ static inline int matcher_run(const struct cli_matcher *root,
161 161
     if (ctx && SCAN_ALL && viruses_found)
162 162
 	return CL_VIRUS;
163 163
 
164
+    /* due to logical triggered, pcres cannot be evaluated until after full subsig matching */
165
+    /* cannot save pcre execution state without possible evasion; must scan entire buffer */
166
+    /* however, scanning the whole buffer may require the whole buffer being loaded into memory */
167
+#if HAVE_PCRE
168
+    if (root->pcre_metas) {
169
+        int rc;
170
+        uint64_t maxfilesize;
171
+
172
+        if (map && (pcremode == PCRE_SCAN_FMAP)) {
173
+            if (offset+length >= map->len) {
174
+                /* check that scanned map does not exceed pcre maxfilesize limit */
175
+                maxfilesize = (uint64_t)cl_engine_get_num(ctx->engine, CL_ENGINE_PCRE_MAX_FILESIZE, &rc);
176
+                if (rc != CL_SUCCESS)
177
+                    return rc;
178
+                if (maxfilesize && (map->len > maxfilesize)) {
179
+                    cli_dbgmsg("matcher_run: pcre max filesize (map) exceeded (limit: %llu, needed: %llu)\n", maxfilesize, (long long unsigned)map->len);
180
+                    return CL_EMAXSIZE;
181
+                }
182
+
183
+                cli_dbgmsg("matcher_run: performing regex matching on full map: %u+%u(%u) >= %zu\n", offset, length, offset+length, map->len);
184
+
185
+                buffer = fmap_need_off_once(map, 0, map->len);
186
+                if (!buffer)
187
+                    return CL_EMEM;
188
+
189
+                /* scan the full buffer */
190
+                tmp = cli_pcre_scanbuf(buffer, map->len, root, mdata, acres, poffdata, ctx);
191
+                if((tmp == CL_VIRUS && !SCAN_ALL) || tmp == CL_EMEM) {
192
+                    return tmp;
193
+                }
194
+            }
195
+        }
196
+        else if (pcremode == PCRE_SCAN_BUFF) {
197
+            /* check that scanned buffer does not exceed pcre maxfilesize limit */
198
+            maxfilesize = (uint64_t)cl_engine_get_num(ctx->engine, CL_ENGINE_PCRE_MAX_FILESIZE, &rc);
199
+            if (rc != CL_SUCCESS)
200
+                return rc;
201
+            if (maxfilesize && (length > maxfilesize)) {
202
+                cli_dbgmsg("matcher_run: pcre max filesize (buf) exceeded (limit: %llu, needed: %u)\n", maxfilesize, length);
203
+                return CL_EMAXSIZE;
204
+            }
205
+
206
+            cli_dbgmsg("matcher_run: performing regex matching on buffer with no map: %u+%u(%u)\n", offset, length, offset+length);
207
+            /* scan the specified buffer */
208
+            tmp = cli_pcre_scanbuf(buffer, length, root, mdata, acres, poffdata, ctx);
209
+            if((tmp == CL_VIRUS && !SCAN_ALL) || tmp == CL_EMEM) {
210
+                return tmp;
211
+            }
212
+        }
213
+    }
214
+#endif /* HAVE_PCRE */
215
+    /* end experimental fragment */
164 216
     return ret;
165 217
 }
166 218
 
... ...
@@ -197,7 +252,7 @@ int cli_scanbuff(const unsigned char *buffer, uint32_t length, uint32_t offset,
197 197
 	if(!acdata && (ret = cli_ac_initdata(&mdata, troot->ac_partsigs, troot->ac_lsigs, troot->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN)))
198 198
 	    return ret;
199 199
 
200
-	ret = matcher_run(troot, buffer, length, &virname, acdata ? (acdata[0]): (&mdata), offset, NULL, ftype, NULL, AC_SCAN_VIR, NULL, *ctx->fmap, NULL, NULL, ctx);
200
+	ret = matcher_run(troot, buffer, length, &virname, acdata ? (acdata[0]): (&mdata), offset, NULL, ftype, NULL, AC_SCAN_VIR, PCRE_SCAN_BUFF, NULL, *ctx->fmap, NULL, NULL, NULL, ctx);
201 201
 
202 202
 	if(!acdata)
203 203
 	    cli_ac_freedata(&mdata);
... ...
@@ -217,7 +272,7 @@ int cli_scanbuff(const unsigned char *buffer, uint32_t length, uint32_t offset,
217 217
     if(!acdata && (ret = cli_ac_initdata(&mdata, groot->ac_partsigs, groot->ac_lsigs, groot->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN)))
218 218
 	return ret;
219 219
 
220
-    ret = matcher_run(groot, buffer, length, &virname, acdata ? (acdata[1]): (&mdata), offset, NULL, ftype, NULL, AC_SCAN_VIR, NULL, *ctx->fmap, NULL, NULL, ctx);
220
+    ret = matcher_run(groot, buffer, length, &virname, acdata ? (acdata[1]): (&mdata), offset, NULL, ftype, NULL, AC_SCAN_VIR, PCRE_SCAN_BUFF, NULL, *ctx->fmap, NULL, NULL, NULL, ctx);
221 221
 
222 222
     if(!acdata)
223 223
 	cli_ac_freedata(&mdata);
... ...
@@ -725,6 +780,7 @@ int cli_fmap_scandesc(cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli
725 725
     uint32_t maxpatlen, offset = 0;
726 726
     struct cli_ac_data gdata, tdata;
727 727
     struct cli_bm_off toff;
728
+    struct cli_pcre_off gpoff, tpoff;
728 729
     unsigned char digest[CLI_HASH_AVAIL_TYPES][32];
729 730
     struct cli_matcher *groot = NULL, *troot = NULL;
730 731
     struct cli_target_info info;
... ...
@@ -801,12 +857,26 @@ int cli_fmap_scandesc(cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli
801 801
             cl_hash_destroy(sha256ctx);
802 802
             return ret;
803 803
         }
804
+        if((ret = cli_pcre_recaloff(groot, &gpoff, &info, ctx))) {
805
+            cli_ac_freedata(&gdata);
806
+            if(info.exeinfo.section)
807
+                free(info.exeinfo.section);
808
+
809
+            cli_hashset_destroy(&info.exeinfo.vinfo);
810
+            cl_hash_destroy(md5ctx);
811
+            cl_hash_destroy(sha1ctx);
812
+            cl_hash_destroy(sha256ctx);
813
+            return ret;
814
+
815
+        }
804 816
     }
805 817
 
806 818
     if(troot) {
807 819
         if((ret = cli_ac_initdata(&tdata, troot->ac_partsigs, troot->ac_lsigs, troot->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN)) || (ret = cli_ac_caloff(troot, &tdata, &info))) {
808
-            if(!ftonly)
820
+            if(!ftonly) {
809 821
                 cli_ac_freedata(&gdata);
822
+                cli_pcre_freeoff(&gpoff);
823
+            }
810 824
             if(info.exeinfo.section)
811 825
                 free(info.exeinfo.section);
812 826
 
... ...
@@ -819,8 +889,10 @@ int cli_fmap_scandesc(cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli
819 819
         if(troot->bm_offmode) {
820 820
             if(map->len >= CLI_DEFAULT_BM_OFFMODE_FSIZE) {
821 821
                 if((ret = cli_bm_initoff(troot, &toff, &info))) {
822
-                    if(!ftonly)
822
+                    if(!ftonly) {
823 823
                         cli_ac_freedata(&gdata);
824
+                        cli_pcre_freeoff(&gpoff);
825
+                    }
824 826
 
825 827
                     cli_ac_freedata(&tdata);
826 828
                     if(info.exeinfo.section)
... ...
@@ -836,6 +908,24 @@ int cli_fmap_scandesc(cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli
836 836
                 bm_offmode = 1;
837 837
             }
838 838
         }
839
+        if ((ret = cli_pcre_recaloff(troot, &tpoff, &info, ctx))) {
840
+            if(!ftonly) {
841
+                cli_ac_freedata(&gdata);
842
+                cli_pcre_freeoff(&gpoff);
843
+            }
844
+
845
+            cli_ac_freedata(&tdata);
846
+            if(bm_offmode)
847
+                cli_bm_freeoff(&toff);
848
+            if(info.exeinfo.section)
849
+                free(info.exeinfo.section);
850
+
851
+            cli_hashset_destroy(&info.exeinfo.vinfo);
852
+            cl_hash_destroy(md5ctx);
853
+            cl_hash_destroy(sha1ctx);
854
+            cl_hash_destroy(sha256ctx);
855
+            return ret;
856
+        }
839 857
     }
840 858
 
841 859
     hdb = ctx->engine->hm_hdb;
... ...
@@ -878,19 +968,22 @@ int cli_fmap_scandesc(cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli
878 878
         if(troot) {
879 879
                 virname = NULL;
880 880
                 viroffset = 0;
881
-                ret = matcher_run(troot, buff, bytes, &virname, &tdata, offset, &info, ftype, ftoffset, acmode, acres, map, bm_offmode ? &toff : NULL, &viroffset, ctx);
881
+                ret = matcher_run(troot, buff, bytes, &virname, &tdata, offset, &info, ftype, ftoffset, acmode, PCRE_SCAN_FMAP, acres, map, bm_offmode ? &toff : NULL, &tpoff, &viroffset, ctx);
882 882
 
883 883
             if (virname) {
884 884
                 /* virname already appended by matcher_run */
885 885
                 viruses_found = 1;
886 886
             }
887 887
             if((ret == CL_VIRUS && !SCAN_ALL) || ret == CL_EMEM) {
888
-                if(!ftonly)
888
+                if(!ftonly) {
889 889
                     cli_ac_freedata(&gdata);
890
+                    cli_pcre_freeoff(&gpoff);
891
+                }
890 892
 
891 893
                 cli_ac_freedata(&tdata);
892 894
                 if(bm_offmode)
893 895
                     cli_bm_freeoff(&toff);
896
+                cli_pcre_freeoff(&tpoff);
894 897
 
895 898
                 if(info.exeinfo.section)
896 899
                     free(info.exeinfo.section);
... ...
@@ -906,7 +999,7 @@ int cli_fmap_scandesc(cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli
906 906
         if(!ftonly) {
907 907
             virname = NULL;
908 908
             viroffset = 0;
909
-            ret = matcher_run(groot, buff, bytes, &virname, &gdata, offset, &info, ftype, ftoffset, acmode, acres, map, NULL, &viroffset, ctx);
909
+            ret = matcher_run(groot, buff, bytes, &virname, &gdata, offset, &info, ftype, ftoffset, acmode, PCRE_SCAN_FMAP, acres, map, NULL, &gpoff, &viroffset, ctx);
910 910
 
911 911
             if (virname) {
912 912
                 /* virname already appended by matcher_run */
... ...
@@ -914,10 +1007,12 @@ int cli_fmap_scandesc(cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli
914 914
             }
915 915
             if((ret == CL_VIRUS && !SCAN_ALL) || ret == CL_EMEM) {
916 916
                 cli_ac_freedata(&gdata);
917
+                cli_pcre_freeoff(&gpoff);
917 918
                 if(troot) {
918 919
                     cli_ac_freedata(&tdata);
919 920
                     if(bm_offmode)
920 921
                         cli_bm_freeoff(&toff);
922
+                    cli_pcre_freeoff(&tpoff);
921 923
                 }
922 924
 
923 925
                 if(info.exeinfo.section)
... ...
@@ -1043,12 +1138,14 @@ int cli_fmap_scandesc(cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli
1043 1043
         cli_ac_freedata(&tdata);
1044 1044
         if(bm_offmode)
1045 1045
             cli_bm_freeoff(&toff);
1046
+        cli_pcre_freeoff(&tpoff);
1046 1047
     }
1047 1048
 
1048 1049
     if(groot) {
1049 1050
         if(ret != CL_VIRUS || SCAN_ALL)
1050 1051
             ret = cli_lsig_eval(ctx, groot, &gdata, &info, (const char *)refhash);
1051 1052
         cli_ac_freedata(&gdata);
1053
+        cli_pcre_freeoff(&gpoff);
1052 1054
     }
1053 1055
 
1054 1056
     if(info.exeinfo.section)
... ...
@@ -38,6 +38,8 @@ struct cli_target_info {
38 38
 #include "matcher-ac.h"
39 39
 #include "matcher-bm.h"
40 40
 #include "matcher-hash.h"
41
+#include "matcher-pcre.h"
42
+#include "regex_pcre.h"
41 43
 #include "fmap.h"
42 44
 #include "mpool.h"
43 45
 
... ...
@@ -107,6 +109,14 @@ struct cli_matcher {
107 107
 
108 108
     uint16_t maxpatlen;
109 109
     uint8_t ac_only;
110
+
111
+    /* Perl-Compiled Regular Expressions */
112
+#if HAVE_PCRE
113
+    uint32_t pcre_metas;
114
+    struct cli_pcre_meta **pcre_metatable;
115
+    uint32_t pcre_reloff_num, pcre_absoff_num;
116
+#endif
117
+
110 118
 #ifdef USE_MPOOL
111 119
     mpool_t *mempool;
112 120
 #endif
... ...
@@ -428,6 +428,14 @@ struct cl_engine *cl_engine_new(void)
428 428
     /* Engine max settings */
429 429
     new->maxiconspe = CLI_DEFAULT_MAXICONSPE;
430 430
 
431
+    /* PCRE matching limitations */
432
+#if HAVE_PCRE
433
+    cli_pcre_init();
434
+#endif
435
+    new->pcre_match_limit = CLI_DEFAULT_PCRE_MATCH_LIMIT;
436
+    new->pcre_recmatch_limit = CLI_DEFAULT_PCRE_RECMATCH_LIMIT;
437
+    new->pcre_max_filesize = CLI_DEFAULT_PCRE_MAX_FILESIZE;
438
+
431 439
     cli_dbgmsg("Initialized %s engine\n", cl_retver());
432 440
     return new;
433 441
 }
... ...
@@ -571,11 +579,20 @@ int cl_engine_set_num(struct cl_engine *engine, enum cl_engine_field field, long
571 571
 	    engine->maxpartitions = (uint32_t)num;
572 572
 	    break;
573 573
 	case CL_ENGINE_MAX_ICONSPE:
574
-	   engine->maxiconspe = (uint32_t)num;
575
-	   break;
574
+	    engine->maxiconspe = (uint32_t)num;
575
+	    break;
576 576
 	case CL_ENGINE_TIME_LIMIT:
577 577
             engine->time_limit = (uint32_t)num;
578 578
             break;
579
+	case CL_ENGINE_PCRE_MATCH_LIMIT:
580
+	    engine->pcre_match_limit = (uint64_t)num;
581
+	    break;
582
+	case CL_ENGINE_PCRE_RECMATCH_LIMIT:
583
+	    engine->pcre_recmatch_limit = (uint64_t)num;
584
+	    break;
585
+	case CL_ENGINE_PCRE_MAX_FILESIZE:
586
+	    engine->pcre_max_filesize = (uint64_t)num;
587
+	    break;
579 588
 	default:
580 589
 	    cli_errmsg("cl_engine_set_num: Incorrect field number\n");
581 590
 	    return CL_EARG;
... ...
@@ -651,6 +668,12 @@ long long cl_engine_get_num(const struct cl_engine *engine, enum cl_engine_field
651 651
 	    return engine->maxiconspe;
652 652
 	case CL_ENGINE_TIME_LIMIT:
653 653
             return engine->time_limit;
654
+	case CL_ENGINE_PCRE_MATCH_LIMIT:
655
+	    return engine->pcre_match_limit;
656
+	case CL_ENGINE_PCRE_RECMATCH_LIMIT:
657
+	    return engine->pcre_recmatch_limit;
658
+	case CL_ENGINE_PCRE_MAX_FILESIZE:
659
+	    return engine->pcre_max_filesize;
654 660
 	default:
655 661
 	    cli_errmsg("cl_engine_get: Incorrect field number\n");
656 662
 	    if(err)
... ...
@@ -763,6 +786,10 @@ struct cl_settings *cl_engine_settings_copy(const struct cl_engine *engine)
763 763
 
764 764
     settings->maxiconspe = engine->maxiconspe;
765 765
 
766
+    settings->pcre_match_limit = engine->pcre_match_limit;
767
+    settings->pcre_recmatch_limit = engine->pcre_recmatch_limit;
768
+    settings->pcre_max_filesize = engine->pcre_max_filesize;
769
+
766 770
     return settings;
767 771
 }
768 772
 
... ...
@@ -831,6 +858,10 @@ int cl_engine_settings_apply(struct cl_engine *engine, const struct cl_settings
831 831
 
832 832
     engine->maxiconspe = settings->maxiconspe;
833 833
 
834
+    engine->pcre_match_limit = settings->pcre_match_limit;
835
+    engine->pcre_recmatch_limit = settings->pcre_recmatch_limit;
836
+    engine->pcre_max_filesize = settings->pcre_max_filesize;
837
+
834 838
     return CL_SUCCESS;
835 839
 }
836 840
 
... ...
@@ -67,7 +67,7 @@
67 67
  * in re-enabling affected modules.
68 68
  */
69 69
 
70
-#define CL_FLEVEL 79
70
+#define CL_FLEVEL 80
71 71
 #define CL_FLEVEL_DCONF	CL_FLEVEL
72 72
 #define CL_FLEVEL_SIGTOOL CL_FLEVEL
73 73
 
... ...
@@ -352,6 +352,11 @@ struct cl_engine {
352 352
 
353 353
     /* millisecond time limit for preclassification scanning */
354 354
     uint32_t time_limit;
355
+
356
+    /* PCRE matching limitations */
357
+    uint64_t pcre_match_limit;
358
+    uint64_t pcre_recmatch_limit;
359
+    uint64_t pcre_max_filesize;
355 360
 };
356 361
 
357 362
 struct cl_settings {
... ...
@@ -411,6 +416,11 @@ struct cl_settings {
411 411
 
412 412
     /* Engine max settings */
413 413
     uint32_t maxiconspe; /* max number of icons to scan for PE */
414
+
415
+    /* PCRE matching limitations */
416
+    uint64_t pcre_match_limit;
417
+    uint64_t pcre_recmatch_limit;
418
+    uint64_t pcre_max_filesize;
414 419
 };
415 420
 
416 421
 extern int (*cli_unrar_open)(int fd, const char *dirname, unrar_state_t *state);
... ...
@@ -84,7 +84,7 @@ static pthread_mutex_t cli_ref_mutex = PTHREAD_MUTEX_INITIALIZER;
84 84
 #include "yara_clam.h"
85 85
 #endif
86 86
 
87
-char *cli_virname(char *virname, unsigned int official)
87
+char *cli_virname(const char *virname, unsigned int official)
88 88
 {
89 89
 	char *newname, *pt;
90 90
 
... ...
@@ -112,6 +112,7 @@ char *cli_virname(char *virname, unsigned int official)
112 112
     return newname;
113 113
 }
114 114
 
115
+#define PCRE_TOKENS 4
115 116
 int cli_parse_add(struct cli_matcher *root, const char *virname, const char *hexsig, uint16_t rtype, uint16_t type, const char *offset, uint8_t target, const uint32_t *lsigid, unsigned int options)
116 117
 {
117 118
     struct cli_bm_patt *bm_new;
... ...
@@ -173,8 +174,54 @@ int cli_parse_add(struct cli_matcher *root, const char *virname, const char *hex
173 173
 
174 174
         return CL_SUCCESS;
175 175
     }
176
+    if (strchr(hexsig, '/')) {
177
+#if HAVE_PCRE
178
+        /* expected format => ^offset:trigger/regex/[cflags]$ */
179
+	const char *trigger, *pattern, *cflags;
180
+        char *start, *end;
176 181
 
177
-    if((wild = strchr(hexsig, '{'))) {
182
+        /* get checked */
183
+        if (hexsig[0] == '/') {
184
+            cli_errmsg("cli_parseadd(): PCRE subsig must contain logical trigger\n");
185
+            return CL_EMALFDB;
186
+        }
187
+
188
+        /* get copied */
189
+        hexcpy = cli_calloc(hexlen+1, sizeof(char));
190
+        if(!hexcpy)
191
+            return CL_EMEM;
192
+        strncpy(hexcpy, hexsig, hexlen);
193
+
194
+        /* get delimiters-ed */
195
+        start = strchr(hexcpy, '/');
196
+        end = strrchr(hexcpy, '/');
197
+        if (start == end) {
198
+            cli_errmsg("cli_parseadd(): PCRE expression must be delimited by '/'\n");
199
+            free(hexcpy);
200
+            return CL_EMALFDB;
201
+        }
202
+
203
+        /* get NULL-ed */
204
+        *start = '\0';
205
+        *end = '\0';
206
+
207
+        /* get tokens-ed */
208
+        trigger = hexcpy;
209
+        pattern = start+1;
210
+        cflags = end+1;
211
+        if (*cflags == '\0') /* get compat-ed */
212
+            cflags = NULL;
213
+
214
+        /* normal trigger, get added */
215
+        ret = cli_pcre_addpatt(root, virname, trigger, pattern, cflags, offset, lsigid, options);
216
+        free(hexcpy);
217
+        return ret;
218
+#else
219
+        cli_errmsg("cli_parseadd(): cannot parse PCRE subsig without PCRE support\n");
220
+        return CL_EPARSE;
221
+#endif
222
+    }
223
+    else if((wild = strchr(hexsig, '{'))) {
178 224
         if(sscanf(wild, "%c%u%c", &l, &range, &r) == 3 && l == '{' && r == '}' && range > 0 && range < 128) {
179 225
             hexcpy = cli_calloc(hexlen + 2 * range, sizeof(char));
180 226
             if(!hexcpy)
... ...
@@ -1308,10 +1355,17 @@ static int load_oneldb(char *buffer, int chkpua, struct cl_engine *engine, unsig
1308 1308
     }
1309 1309
 
1310 1310
     subsigs++;
1311
-    if(subsigs > 64) {
1312
-        cli_errmsg("cli_loadldb: Broken logical expression or too many subsignatures\n");
1313
-        return CL_EMALFDB;
1311
+
1312
+#if !HAVE_PCRE
1313
+    /* Regex Usage and Support Check */
1314
+    for (i = 0; i < subsigs; ++i) {
1315
+        if (strchr(tokens[i+3], '/')) {
1316
+            cli_dbgmsg("cli_loadldb: logical signature for %s uses PCREs but support is disabled, skipping\n", virname);
1317
+            (*sigs)--;
1318
+            return CL_SUCCESS;
1319
+        }
1314 1320
     }
1321
+#endif
1315 1322
 
1316 1323
     if (!line) {
1317 1324
         /* This is a logical signature from the bytecode, we need all
... ...
@@ -1327,6 +1381,12 @@ static int load_oneldb(char *buffer, int chkpua, struct cl_engine *engine, unsig
1327 1327
         return CL_EMALFDB;
1328 1328
     }
1329 1329
 
1330
+    /* enforce 64 subsig cap */
1331
+    if(subsigs > 64) {
1332
+	cli_errmsg("cli_loadldb: Broken logical expression or too many subsignatures\n");
1333
+	return CL_EMALFDB;
1334
+    }
1335
+
1330 1336
     /* TDB */
1331 1337
     memset(&tdb, 0, sizeof(tdb));
1332 1338
 #ifdef USE_MPOOL
... ...
@@ -3562,6 +3622,9 @@ int cl_engine_free(struct cl_engine *engine)
3562 3562
 		    }
3563 3563
 		    mpool_free(engine->mempool, root->ac_lsigtable);
3564 3564
 		}
3565
+#if HAVE_PCRE
3566
+                cli_pcre_freetable(root);
3567
+#endif /* HAVE_PCRE */
3565 3568
 		mpool_free(engine->mempool, root);
3566 3569
 	    }
3567 3570
 	}
... ...
@@ -3675,7 +3738,6 @@ int cl_engine_compile(struct cl_engine *engine)
3675 3675
 	int ret;
3676 3676
 	struct cli_matcher *root;
3677 3677
 
3678
-
3679 3678
     if(!engine)
3680 3679
 	return CL_ENULLARG;
3681 3680
 
... ...
@@ -3687,7 +3749,14 @@ int cl_engine_compile(struct cl_engine *engine)
3687 3687
 	if((root = engine->root[i])) {
3688 3688
 	    if((ret = cli_ac_buildtrie(root)))
3689 3689
 		return ret;
3690
-	    cli_dbgmsg("Matcher[%u]: %s: AC sigs: %u (reloff: %u, absoff: %u) BM sigs: %u (reloff: %u, absoff: %u) maxpatlen %u %s\n", i, cli_mtargets[i].name, root->ac_patterns, root->ac_reloff_num, root->ac_absoff_num, root->bm_patterns, root->bm_reloff_num, root->bm_absoff_num, root->maxpatlen, root->ac_only ? "(ac_only mode)" : "");
3690
+#if HAVE_PCRE
3691
+            if((ret = cli_pcre_build(root, engine->pcre_match_limit, engine->pcre_recmatch_limit, engine->dconf)))
3692
+                return ret;
3693
+
3694
+	    cli_dbgmsg("Matcher[%u]: %s: AC sigs: %u (reloff: %u, absoff: %u) BM sigs: %u (reloff: %u, absoff: %u) PCREs: %u (reloff: %u, absoff: %u) maxpatlen %u %s\n", i, cli_mtargets[i].name, root->ac_patterns, root->ac_reloff_num, root->ac_absoff_num, root->bm_patterns, root->bm_reloff_num, root->bm_absoff_num, root->pcre_metas, root->pcre_reloff_num, root->pcre_absoff_num, root->maxpatlen, root->ac_only ? "(ac_only mode)" : "");
3695
+#else
3696
+	    cli_dbgmsg("Matcher[%u]: %s: AC sigs: %u (reloff: %u, absoff: %u) BM sigs: %u (reloff: %u, absoff: %u) maxpatlen %u PCREs: 0 (disabled) %s\n", i, cli_mtargets[i].name, root->ac_patterns, root->ac_reloff_num, root->ac_absoff_num, root->bm_patterns, root->bm_reloff_num, root->bm_absoff_num, root->maxpatlen, root->ac_only ? "(ac_only mode)" : "");
3697
+#endif
3691 3698
 	}
3692 3699
     }
3693 3700
     if(engine->hm_hdb)
... ...
@@ -67,7 +67,7 @@
67 67
 	cli_strbcasestr(ext, ".yara")		\
68 68
     )
69 69
 
70
-char *cli_virname(char *virname, unsigned int official);
70
+char *cli_virname(const char *virname, unsigned int official);
71 71
 
72 72
 int cli_parse_add(struct cli_matcher *root, const char *virname, const char *hexsig, uint16_t rtype, uint16_t type, const char *offset, uint8_t target, const uint32_t *lsigid, unsigned int options);
73 73
 
74 74
new file mode 100644
... ...
@@ -0,0 +1,269 @@
0
+/*
1
+ *  Support for PCRE regex variant
2
+ *
3
+ *  Copyright (C) 2007-2013 Sourcefire, Inc.
4
+ *  Copyright (C) 2014 Cisco Systems, Inc.
5
+ *  All Rights Reserved.
6
+ *
7
+ *  Authors: Kevin Lin
8
+ *
9
+ *  This program is free software; you can redistribute it and/or modify
10
+ *  it under the terms of the GNU General Public License version 2 as
11
+ *  published by the Free Software Foundation.
12
+ *
13
+ *  This program is distributed in the hope that it will be useful,
14
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
+ *  GNU General Public License for more details.
17
+ *
18
+ *  You should have received a copy of the GNU General Public License
19
+ *  along with this program; if not, write to the Free Software
20
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
21
+ *  MA 02110-1301, USA.
22
+ */
23
+
24
+#if HAVE_CONFIG_H
25
+#include "clamav-config.h"
26
+#endif
27
+
28
+#if HAVE_PCRE
29
+#include <pcre.h>
30
+
31
+#include "clamav.h"
32
+#include "cltypes.h"
33
+#include "others.h"
34
+#include "regex_pcre.h"
35
+
36
+/* TODO: cli_pcre_init: redefine pcre_malloc and pcre_free, setup callback function? */
37
+int cli_pcre_init()
38
+{
39
+    pcre_malloc = cli_malloc;
40
+    pcre_free = free;
41
+    pcre_stack_malloc = cli_malloc;
42
+    pcre_stack_free = free;
43
+
44
+    return CL_SUCCESS;
45
+}
46
+
47
+int cli_pcre_addoptions(struct cli_pcre_data *pd, const char **opt, int errout)
48
+{
49
+    if (!pd || !opt || !(*opt))
50
+        return CL_ENULLARG;
51
+
52
+    while (**opt != '\0') {
53
+        switch(**opt) {
54
+        case 'i':  pd->options |= PCRE_CASELESS;            break;
55
+        case 's':  pd->options |= PCRE_DOTALL;              break;
56
+        case 'm':  pd->options |= PCRE_MULTILINE;           break;
57
+        case 'x':  pd->options |= PCRE_EXTENDED;            break;
58
+
59
+            /* these are pcre specific... don't work with perl */
60
+        case 'A':  pd->options |= PCRE_ANCHORED;            break;
61
+        case 'E':  pd->options |= PCRE_DOLLAR_ENDONLY;      break;
62
+        case 'G':  pd->options |= PCRE_UNGREEDY;            break;
63
+
64
+        default:
65
+            if (errout) {
66
+                cli_errmsg("cli_pcre_addoptions: unknown/extra pcre option encountered %c\n", **opt);
67
+                return CL_EMALFDB;
68
+            }
69
+            else
70
+                return CL_EPARSE; /* passed to caller to handle */
71
+        }
72
+        (*opt)++;
73
+    }
74
+
75
+    return CL_SUCCESS;
76
+}
77
+
78
+int cli_pcre_compile(struct cli_pcre_data *pd, long long unsigned match_limit, long long unsigned match_limit_recursion, unsigned int options, int opt_override)
79
+{
80
+    const char *error;
81
+    int erroffset;
82
+
83
+    if (!pd || !pd->expression) {
84
+        cli_errmsg("cli_pcre_compile: NULL pd or NULL pd->expression\n");
85
+        return CL_ENULLARG;
86
+    }
87
+
88
+    /* compile the pcre regex last arg is charset, allow for options override */
89
+    if (opt_override)
90
+        pd->re = pcre_compile(pd->expression, options, &error, &erroffset, NULL); /* pd->re handled by libpcre -> call pcre_free() -> calls free() */
91
+    else
92
+        pd->re = pcre_compile(pd->expression, pd->options, &error, &erroffset, NULL); /* pd->re handled by libpcre -> call pcre_free() -> calls free() */
93
+    if (pd->re == NULL) {
94
+        cli_errmsg("cli_pcre_parse: PCRE compilation failed at offset %d: %s\n", erroffset, error);
95
+        return CL_EMALFDB;
96
+    }
97
+
98
+    /* now study it... (section totally not from snort) */
99
+    pd->ex = pcre_study(pd->re, 0, &error);
100
+    if (!(pd->ex)) {
101
+        pd->ex = (pcre_extra *)cli_calloc(1, sizeof(*(pd->ex)));
102
+        if (!(pd->ex)) {
103
+            cli_errmsg("cli_pcre_parse: Unable to allocate memory\n");
104
+            return CL_EMEM;
105
+        }
106
+    }
107
+
108
+    /* set the match limits */
109
+    if (pd->ex->flags & PCRE_EXTRA_MATCH_LIMIT) {
110
+        pd->ex->match_limit = match_limit;
111
+    }
112
+    else {
113
+        pd->ex->flags |= PCRE_EXTRA_MATCH_LIMIT;
114
+        pd->ex->match_limit = match_limit;
115
+    }
116
+
117
+    /* set the recursion match limits */
118
+#ifdef PCRE_EXTRA_MATCH_LIMIT_RECURSION
119
+    if (pd->ex->flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) {
120
+        pd->ex->match_limit_recursion = match_limit_recursion;
121
+    }
122
+    else {
123
+        pd->ex->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
124
+        pd->ex->match_limit_recursion = match_limit_recursion;
125
+    }
126
+#endif /* PCRE_EXTRA_MATCH_LIMIT_RECURSION */
127
+
128
+    /* non-dynamic allocated fields set by caller */
129
+    return CL_SUCCESS;
130
+}
131
+
132
+int cli_pcre_match(struct cli_pcre_data *pd, const unsigned char *buffer, uint32_t buflen, int override_offset, int options, int *ovector, size_t ovlen)
133
+{
134
+    int startoffset;
135
+
136
+    if (ovlen % 3) {
137
+        cli_dbgmsg("cli_pcre_match: ovector length is not a multiple of 3\n");
138
+        return CL_EARG;
139
+    }
140
+
141
+    /* set the startoffset, override if a value is specified */
142
+    startoffset = pd->search_offset;
143
+    if (override_offset >= 0)
144
+        startoffset = override_offset;
145
+
146
+    /* execute the pcre and return */
147
+    return pcre_exec(pd->re, pd->ex, buffer, buflen, startoffset, options, ovector, ovlen);
148
+}
149
+
150
+#define DISABLE_PCRE_REPORT 0
151
+#define MATCH_MAXLEN 1028 /*because lolz*/
152
+
153
+/* TODO: audit this function */
154
+static void named_substr_print(const struct cli_pcre_data *pd, const unsigned char *buffer, int *ovector, size_t ovlen)
155
+{
156
+    int i, j, length, namecount, trunc;
157
+    unsigned char *tabptr;
158
+    int name_entry_size;
159
+    unsigned char *name_table;
160
+    const char *start;
161
+    char outstr[2*MATCH_MAXLEN+1];
162
+
163
+    /* determine if there are named substrings */
164
+    (void)pcre_fullinfo(pd->re, pd->ex, PCRE_INFO_NAMECOUNT, &namecount);
165
+    if (namecount <= 0) {
166
+        cli_dbgmsg("cli_pcre_report: no named substrings\n");
167
+    }
168
+    else {
169
+        cli_dbgmsg("cli_pcre_report: named substrings\n");
170
+
171
+        /* extract named substring translation table */
172
+        (void)pcre_fullinfo(pd->re, pd->ex, PCRE_INFO_NAMETABLE, &name_table);
173
+        (void)pcre_fullinfo(pd->re, pd->ex, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
174
+
175
+        /* print named substring information */
176
+        tabptr = name_table;
177
+        for (i = 0; i < namecount; i++) {
178
+            int n = (tabptr[0] << 8) | tabptr[1];
179
+
180
+            start = buffer + ovector[2*n];
181
+            length = ovector[2*n+1] - ovector[2*n];
182
+
183
+            trunc = 0;
184
+            if (length > MATCH_MAXLEN) {
185
+                trunc = 1;
186
+                length = MATCH_MAXLEN;
187
+            }
188
+
189
+            for (j = 0; j < length; ++j)
190
+                snprintf(outstr+(2*j), sizeof(outstr)-(2*j), "%02x", (unsigned int)*(start+j));
191
+
192
+            cli_dbgmsg("cli_pcre_report: (%d) %*s: %s%s\n", n, name_entry_size - 3, tabptr + 2,
193
+                       outstr, trunc ? " (trunc)":"");
194
+            /*
195
+            cli_dbgmsg("named_substr:  (%d) %*s: %.*s%s\n", n, name_entry_size - 3, tabptr + 2,
196
+                       length, start, trunc ? " (trunc)":"");
197
+            */
198
+            tabptr += name_entry_size;
199
+        }
200
+    }
201
+}
202
+
203
+/* TODO: audit this function */
204
+void cli_pcre_report(const struct cli_pcre_data *pd, const unsigned char *buffer, uint32_t buflen, int rc, int *ovector, size_t ovlen)
205
+{
206
+    int i, j, length, trunc;
207
+    const char *start;
208
+    char outstr[2*MATCH_MAXLEN+1];
209
+
210
+    /* print out additional diagnostics if cli_debug_flag is set */
211
+    if (!DISABLE_PCRE_REPORT) {
212
+        cli_dbgmsg("\n");
213
+        cli_dbgmsg("cli_pcre_report: PCRE Execution Report:\n");
214
+        cli_dbgmsg("cli_pcre_report: running regex /%s/ returns %d\n", pd->expression, rc);
215
+        if (rc > 0) {
216
+            /* print out full-match and capture groups */
217
+            for (i = 0; i < rc; ++i) {
218
+                start = buffer + ovector[2*i];
219
+                length = ovector[2*i+1] - ovector[2*i];
220
+
221
+                if (ovector[2*i+1] > buflen) {
222
+                    cli_warnmsg("cli_pcre_report: reported match goes outside buffer\n");
223
+                    continue;
224
+                }
225
+
226
+                trunc = 0;
227
+                if (length > MATCH_MAXLEN) {
228
+                    trunc = 1;
229
+                    length = MATCH_MAXLEN;
230
+                }
231
+
232
+                for (j = 0; j < length; ++j) 
233
+                    snprintf(outstr+(2*j), sizeof(outstr)-(2*j), "%02x", (unsigned int)*(start+j));
234
+
235
+                cli_dbgmsg("cli_pcre_report:  %d: %s%s\n", i, outstr, trunc ? " (trunc)":"");
236
+                //cli_dbgmsg("cli_pcre_report:  %d: %.*s%s\n", i, length, start, trunc ? " (trunc)":"");
237
+            }
238
+
239
+            named_substr_print(pd, buffer, ovector, ovlen);
240
+        }
241
+        else if (rc == 0 || rc == PCRE_ERROR_NOMATCH) {
242
+            cli_dbgmsg("cli_pcre_report: no match found\n");
243
+        }
244
+        else {
245
+            cli_dbgmsg("cli_pcre_report: error occurred in pcre_match: %d\n", rc);
246
+            /* error handled by caller */
247
+        }
248
+        cli_dbgmsg("cli_pcre_report: PCRE Execution Report End\n");
249
+        cli_dbgmsg("\n");
250
+    }
251
+}
252
+
253
+void cli_pcre_free_single(struct cli_pcre_data *pd)
254
+{
255
+    if (pd->re) {
256
+        pcre_free(pd->re);
257
+        pd->re = NULL;
258
+    }
259
+    if (pd->ex) {
260
+        free(pd->ex);
261
+        pd->ex = NULL;
262
+    }
263
+    if (pd->expression) {
264
+        free(pd->expression);
265
+        pd->expression = NULL;
266
+    }
267
+}
268
+#endif /* HAVE_PCRE */
0 269
new file mode 100644
... ...
@@ -0,0 +1,58 @@
0
+/*
1
+ *  Support for PCRE regex variant
2
+ *
3
+ *  Copyright (C) 2007-2013 Sourcefire, Inc.
4
+ *  Copyright (C) 2014 Cisco Systems, Inc.
5
+ *  All Rights Reserved.
6
+ *
7
+ *  Authors: Kevin Lin
8
+ *
9
+ *  This program is free software; you can redistribute it and/or modify
10
+ *  it under the terms of the GNU General Public License version 2 as
11
+ *  published by the Free Software Foundation.
12
+ *
13
+ *  This program is distributed in the hope that it will be useful,
14
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
+ *  GNU General Public License for more details.
17
+ *
18
+ *  You should have received a copy of the GNU General Public License
19
+ *  along with this program; if not, write to the Free Software
20
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
21
+ *  MA 02110-1301, USA.
22
+ */
23
+
24
+#ifndef _REGEX_PCRE_H_
25
+#define _REGEX_PCRE_H_
26
+
27
+#if HAVE_PCRE
28
+#if HAVE_CONFIG_H
29
+#include "clamav-config.h"
30
+#endif
31
+
32
+#include <pcre.h>
33
+
34
+#include "cltypes.h"
35
+#include "mpool.h"
36
+
37
+/* used for setting overrides */
38
+#define CLI_PCREMATCH_NOOFFSETOVERRIDE -1
39
+/* must be multiple of 3 */
40
+#define OVECCOUNT 300
41
+
42
+struct cli_pcre_data {
43
+    pcre *re;               /* compiled pcre regex */
44
+    pcre_extra *ex;         /* pcre extra data - limits */
45
+    int options;            /* pcre options */
46
+    char *expression;       /* copied regular expression */
47
+    uint32_t search_offset; /* start offset to search at for pcre_exec */
48
+};
49
+
50
+int cli_pcre_init();
51
+int cli_pcre_addoptions(struct cli_pcre_data *pd, const char **opt, int errout);
52
+int cli_pcre_compile(struct cli_pcre_data *pd, long long unsigned match_limit, long long unsigned match_limit_recursion, unsigned int options, int opt_override);
53
+int cli_pcre_match(struct cli_pcre_data *pd, const unsigned char *buffer, uint32_t buflen, int override_offset, int options, int *ovector, size_t ovlen);
54
+void cli_pcre_report(const struct cli_pcre_data *pd, const unsigned char *buffer, uint32_t buflen, int rc, int *ovector, size_t ovlen);
55
+void cli_pcre_free_single(struct cli_pcre_data *pd);
56
+#endif /* HAVE_PCRE */
57
+#endif /*_REGEX_PCRE_H_*/
... ...
@@ -7,37 +7,36 @@ AC_ARG_WITH([pcre],
7 7
                           /usr/local or /usr if not found in /usr/local)],
8 8
 [
9 9
   AC_MSG_CHECKING([for libpcre installation])
10
-dnl --with-pcre=no
11
-  if test "X$withval" = "Xno"; then
10
+  case "$withval" in
11
+  no)
12 12
     AC_MSG_RESULT([no])
13
-  else
14
-dnl --with-pcre=yes
15
-    if test "X$withval" = "Xyes"; then
16
-      PCRE_HOME=/usr/local
17
-      if test ! -x "$PCRE_HOME/bin/pcre-config"; then
18
-        PCRE_HOME=/usr
19
-        if test ! -x "$PCRE_HOME/bin/pcre-config"; then
20
-          PCRE_HOME=""
21
-        fi
22
-      fi
23
-dnl --with-pcre=something
24
-    elif test "$withval"; then
25
-      PCRE_HOME="$withval"
13
+    ;;
14
+  yes)
15
+    PCRE_HOME=/usr/local
16
+    if test ! -x "$PCRE_HOME/bin/pcre-config"; then
17
+      PCRE_HOME=/usr
26 18
       if test ! -x "$PCRE_HOME/bin/pcre-config"; then
27 19
         PCRE_HOME=""
28
-        AC_MSG_ERROR([cannot locate libpcre at $withval])
20
+        AC_MSG_ERROR([cannot locate libpcre at /usr/local or /usr])
29 21
       fi
30
-dnl --with-pcre=""
31
-    else
32
-      AC_MSG_ERROR([cannot assign blank value to --with-pcre])
33 22
     fi
34
-
35
-    if test "x$PCRE_HOME" != "x"; then
36
-      AC_MSG_RESULT([using $PCRE_HOME])
37
-    else
38
-      AC_MSG_RESULT([not found])
39
-      AC_MSG_WARN([cannot locate libpcre at /usr/local or /usr])
23
+    ;;
24
+  "")
25
+    AC_MSG_ERROR([cannot assign blank value to --with-pcre])
26
+    ;;
27
+  *)
28
+    PCRE_HOME="$withval"
29
+    if test ! -x "$PCRE_HOME/bin/pcre-config"; then
30
+      PCRE_HOME=""
31
+      AC_MSG_ERROR([cannot locate libpcre at $withval])
40 32
     fi
33
+    ;;
34
+  esac
35
+
36
+  if test "x$PCRE_HOME" != "x"; then
37
+    AC_MSG_RESULT([using $PCRE_HOME])
38
+  else
39
+    AC_MSG_RESULT([not found])
41 40
   fi
42 41
 ],[
43 42
 dnl --with-pcre not specified
... ...
@@ -48,7 +48,7 @@
48 48
 
49 49
 #include "getopt.h"
50 50
 
51
-#define MAXCMDOPTS  120
51
+#define MAXCMDOPTS  150
52 52
 
53 53
 #define MATCH_NUMBER "^[0-9]+$"
54 54
 #define MATCH_SIZE "^[0-9]+[KM]?$"
... ...
@@ -297,7 +297,7 @@ const struct clam_option __clam_options[] = {
297 297
     { "BytecodeMode", "bytecode-mode", 0, CLOPT_TYPE_STRING, "^(Auto|ForceJIT|ForceInterpreter|Test)$", -1, "Auto", FLAG_REQUIRED, OPT_CLAMD | OPT_CLAMSCAN,
298 298
 	"Set bytecode execution mode.\nPossible values:\n\tAuto - automatically choose JIT if possible, fallback to interpreter\nForceJIT - always choose JIT, fail if not possible\nForceIntepreter - always choose interpreter\nTest - run with both JIT and interpreter and compare results. Make all failures fatal.","Auto"},
299 299
 
300
-    { "BytecodeStatistics", "bytecode-statistics", 0, CLOPT_TYPE_BOOL, MATCH_BOOL, 0, NULL, 0, OPT_CLAMSCAN | OPT_CLAMBC, "Collect and print bytecode execution statistics.", "no" },
300
+    { "Statistics", "statistics", 0, CLOPT_TYPE_STRING, "^(none|None|bytecode|Bytecode|pcre|PCRE)$", -1, NULL, FLAG_MULTIPLE, OPT_CLAMSCAN | OPT_CLAMBC, "Collect and print execution statistics.\nPossible values:\n\tBytecode - reports bytecode statistics\nPCRE - reports PCRE execution statistics\nNone - reports no statistics", "None" },
301 301
 
302 302
    { "DetectPUA", "detect-pua", 0, CLOPT_TYPE_BOOL, MATCH_BOOL, 0, NULL, 0, OPT_CLAMD | OPT_CLAMSCAN, "Detect Potentially Unwanted Applications.", "yes" },
303 303
 
... ...
@@ -380,6 +380,12 @@ const struct clam_option __clam_options[] = {
380 380
 
381 381
     { "TimeLimit", "timelimit", 0, CLOPT_TYPE_NUMBER, MATCH_NUMBER, 0, NULL, 0, OPT_CLAMSCAN, "This clamscan option is currently for testing only. It sets the engine parameter CL_ENGINE_TIME_LIMIT. The value is in milliseconds.", "0" },
382 382
 
383
+    { "PCREMatchLimit", "pcre-match-limit", 0, CLOPT_TYPE_NUMBER, MATCH_NUMBER, CLI_DEFAULT_PCRE_MATCH_LIMIT, NULL, 0, OPT_CLAMD | OPT_CLAMSCAN, "This option sets the maximum calls to the PCRE match function during an instance of regex matching.\nInstances using more than this limit will be terminated and alert the user but the scan will continue.\nFor more information on match_limit, see the PCRE documentation.\nNegative values are not allowed.\nWARNING: setting this limit too high may severely impact performance.", "10000" },
384
+
385
+    { "PCRERecMatchLimit", "pcre-recmatch-limit", 0, CLOPT_TYPE_NUMBER, MATCH_NUMBER, CLI_DEFAULT_PCRE_RECMATCH_LIMIT, NULL, 0, OPT_CLAMD | OPT_CLAMSCAN, "This option sets the maximum recursive calls to the PCRE match function during an instance of regex matching.\nInstances using more than this limit will be terminated and alert the user but the scan will continue.\nFor more information on match_limit_recursion, see the PCRE documentation.\nNegative values are not allowed and values > PCREMatchLimit are superfluous.\nWARNING: setting this limit too high may severely impact performance.", "5000" },
386
+
387
+    { "PCREMaxFileSize", "pcre-max-filesize", 0, CLOPT_TYPE_NUMBER, MATCH_NUMBER, CLI_DEFAULT_PCRE_MAX_FILESIZE, NULL, 0, OPT_CLAMD | OPT_CLAMSCAN, "This option sets the maximum filesize for which PCRE subsigs will be executed.\nFiles exceeding this limit will not have PCRE subsigs executed unless a subsig is encompassed to a smaller buffer.\nNegative values are not allowed.\nSetting this value to zero disables the limit.\nWARNING: setting this limit too high or disabling it may severely impact performance.", "25M" },
388
+
383 389
     /* OnAccess settings */
384 390
     { "ScanOnAccess", NULL, 0, CLOPT_TYPE_BOOL, MATCH_BOOL, -1, NULL, 0, OPT_CLAMD, "This option enables on-access scanning (Linux only)", "no" },
385 391
 
... ...
@@ -1984,6 +1984,8 @@ static void matchsig(const char *sig, const char *offset, int fd)
1984 1984
 	cli_ctx ctx;
1985 1985
 	int ret;
1986 1986
 
1987
+    mprintf("SUBSIG: %s\n", sig);
1988
+
1987 1989
     if(!(engine = cl_engine_new())) {
1988 1990
 	mprintf("!matchsig: Can't create new engine\n");
1989 1991
 	return;
... ...
@@ -2278,14 +2280,81 @@ static char *decodehexspecial(const char *hex, unsigned int *dlen)
2278 2278
 
2279 2279
 static int decodehex(const char *hexsig)
2280 2280
 {
2281
-	char *pt, *hexcpy, *start, *n, *decoded;
2281
+	char *pt, *hexcpy, *start, *n, *decoded, *wild;
2282 2282
 	int asterisk = 0;
2283 2283
 	unsigned int i, j, hexlen, dlen, parts = 0, bw;
2284 2284
 	int mindist = 0, maxdist = 0, error = 0;
2285 2285
 
2286 2286
 
2287 2287
     hexlen = strlen(hexsig);
2288
-    if(strchr(hexsig, '{') || strchr(hexsig, '[')) {
2288
+    if ((wild = strchr(hexsig, '/'))) {
2289
+	/* ^offset:trigger-logic/regex/options$ */
2290
+	char *trigger, *regex, *regex_end, *cflags;
2291
+	size_t tlen = wild-hexsig, rlen, clen;
2292
+
2293
+	/* check for trigger */
2294
+	if (!tlen) {
2295
+	    mprintf("!pcre without logical trigger\n");
2296
+	    return -1;
2297
+	}
2298
+
2299
+	/* locate end of regex for options start, locate options length */
2300
+	if ((regex_end = strchr(wild+1, '/')) == NULL) {
2301
+	    mprintf("!missing regex expression terminator /\n");
2302
+	    return -1;
2303
+	}
2304
+	rlen = regex_end-wild-1;
2305
+	clen = hexlen-tlen-rlen-2; /* 2 from regex boundaries '/' */
2306
+
2307
+	/* get the trigger statement */
2308
+	trigger = cli_calloc(tlen+1, sizeof(char));
2309
+	if (!trigger) {
2310
+	    mprintf("!cannot allocate memory for trigger string\n");
2311
+	    return -1;
2312
+	}
2313
+	strncpy(trigger, hexsig, tlen);
2314
+	trigger[tlen] = '\0';
2315
+
2316
+	/* get the regex expression */
2317
+	regex = cli_calloc(rlen+1, sizeof(char));
2318
+	if (!regex) {
2319
+	    mprintf("!cannot allocate memory for regex expression\n");
2320
+	    return -1;
2321
+	}
2322
+	strncpy(regex, hexsig+tlen+1, rlen);
2323
+	regex[rlen] = '\0';
2324
+
2325
+	/* get the compile flags */
2326
+	if (clen) {
2327
+	    cflags = cli_calloc(clen+1, sizeof(char));
2328
+	    if (!cflags) {
2329
+		mprintf("!cannot allocate memory for compile flags\n");
2330
+		return -1;
2331
+	    }
2332
+	    strncpy(cflags, hexsig+tlen+rlen+2, clen);
2333
+	    cflags[clen] = '\0';
2334
+	}
2335
+	else {
2336
+	    cflags = NULL;
2337
+	}
2338
+
2339
+	/* print components of regex subsig */
2340
+	mprintf("     +-> TRIGGER: %s\n", trigger);
2341
+	mprintf("     +-> REGEX: %s\n", regex);
2342
+	mprintf("     +-> CFLAGS: %s\n", cflags);
2343
+
2344
+	free(trigger);
2345
+	free(regex);
2346
+	if (cflags)
2347
+	    free(cflags);
2348
+#if HAVE_PCRE
2349
+	return 0;
2350
+#else
2351
+	mprintf("!PCRE subsig cannot be loaded without PCRE support\n");
2352
+	return -1;
2353
+#endif
2354
+    }
2355
+    else if(strchr(hexsig, '{') || strchr(hexsig, '[')) {
2289 2356
 	if(!(hexcpy = strdup(hexsig)))
2290 2357
 	    return -1;
2291 2358
 
... ...
@@ -2481,8 +2550,8 @@ static int decodesig(char *sig, int fd)
2481 2481
 		mprintf(" +-> OFFSET: ANY\n");
2482 2482
 	    }
2483 2483
 	    if(fd == -1) {
2484
-		mprintf(" +-> DECODED SUBSIGNATURE:\n");
2485
-		decodehex(pt ? pt : tokens[3 + i]);
2484
+                mprintf(" +-> DECODED SUBSIGNATURE:\n");
2485
+                decodehex(pt ? pt : tokens[3 + i]);
2486 2486
 	    } else {
2487 2487
 		mprintf(" +-> ");
2488 2488
 		matchsig(pt ? pt : tokens[3 + i], pt ? tokens[3 + i] : NULL, fd);
... ...
@@ -166,6 +166,8 @@ EXPORTS cli_checkfp_pe @44369 NONAME
166 166
 EXPORTS cli_bytefunc_describe @44370 NONAME
167 167
 EXPORTS cli_bytetype_describe @44371 NONAME
168 168
 EXPORTS cli_bytevalue_describe @44372 NONAME
169
+EXPORTS cli_pcre_freeoff @44373 NONAME
170
+EXPORTS cli_pcre_recaloff @44374 NONAME
169 171
 
170 172
 ; compatibility layer, tommath, zlib
171 173
 EXPORTS w32_srand @44269 NONAME
... ...
@@ -1,4 +1,4 @@
1
-<?xml version="1.0" encoding="utf-8"?>
1
+<?xml version="1.0" encoding="utf-8"?>
2 2
 <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
3 3
   <ItemGroup Label="ProjectConfigurations">
4 4
     <ProjectConfiguration Include="Debug|Win32">
... ...
@@ -348,6 +348,7 @@
348 348
     <ClCompile Include="..\libclamav\macho.c" />
349 349
     <ClCompile Include="..\libclamav\matcher-ac.c" />
350 350
     <ClCompile Include="..\libclamav\matcher-bm.c" />
351
+    <ClCompile Include="..\libclamav\matcher-pcre.c" />
351 352
     <ClCompile Include="..\libclamav\matcher.c" />
352 353
     <ClCompile Include="..\libclamav\mbox.c" />
353 354
     <ClCompile Include="..\libclamav\mbr.c" />
... ...
@@ -383,6 +384,7 @@
383 383
     <ClCompile Include="..\libclamav\regex\regexec.c" />
384 384
     <ClCompile Include="..\libclamav\regex\regfree.c" />
385 385
     <ClCompile Include="..\libclamav\regex\strlcpy.c" />
386
+    <ClCompile Include="..\libclamav\regex_pcre.c" />
386 387
     <ClCompile Include="..\libclamav\sis.c" />
387 388
     <ClCompile Include="..\libclamav\special.c" />
388 389
     <ClCompile Include="..\libclamav\spin.c" />
... ...
@@ -520,4 +522,4 @@
520 520
     <Library Include="libeay32.lib" />
521 521
     <Library Include="ssleay32.lib" />
522 522
   </ItemGroup>
523
-</Project>
524 523
\ No newline at end of file
524
+</Project>
... ...
@@ -1,4 +1,4 @@
1
-<?xml version="1.0" encoding="utf-8"?>
1
+<?xml version="1.0" encoding="utf-8"?>
2 2
 <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
3 3
   <ItemGroup>
4 4
     <Filter Include="Source Files">
... ...
@@ -177,6 +177,9 @@
177 177
     <ClCompile Include="..\libclamav\matcher-bm.c">
178 178
       <Filter>Source Files</Filter>
179 179
     </ClCompile>
180
+    <ClCompile Include="..\libclamav\matcher-pcre.c">
181
+      <Filter>Source Files</Filter>
182
+    </ClCompile>
180 183
     <ClCompile Include="..\libclamav\mbox.c">
181 184
       <Filter>Source Files</Filter>
182 185
     </ClCompile>
... ...
@@ -357,6 +360,9 @@
357 357
     <ClCompile Include="..\libclamav\regex\regfree.c">
358 358
       <Filter>Source Files\regex</Filter>
359 359
     </ClCompile>
360
+    <ClCompile Include="..\libclamav\regex_pcre.c">
361
+      <Filter>Source Files\regex</Filter>
362
+    </ClCompile>
360 363
     <ClCompile Include="..\libclamav\nsis\infblock.c">
361 364
       <Filter>Source Files\nsis</Filter>
362 365
     </ClCompile>
... ...
@@ -937,4 +943,4 @@
937 937
     <Library Include="libeay32.lib" />
938 938
     <Library Include="ssleay32.lib" />
939 939
   </ItemGroup>
940
-</Project>
941 940
\ No newline at end of file
941
+</Project>