unit_tests/check_regex.c
82167b0a
 /*
  *  Unit tests for regular expression processing.
  *
  *  Copyright (C) 2008 Sourcefire, Inc.
  *
  *  Authors: Török Edvin
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2 as
  *  published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
  */
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
 
 #include <stdio.h>
 #ifdef HAVE_CHECK
 #include <stdlib.h>
 #include <limits.h>
 #include <string.h>
 #include <check.h>
 #include "../libclamav/clamav.h"
 #include "../libclamav/others.h"
3dcc2d78
 #include "../libclamav/mbox.h"
 #include "../libclamav/message.h"
a497dce5
 #include "../libclamav/htmlnorm.h"
3dcc2d78
 #include "../libclamav/phishcheck.h"
a497dce5
 #include "../libclamav/regex_suffix.h"
 #include "../libclamav/regex_list.h"
 #include "../libclamav/phish_domaincheck_db.h"
 #include "../libclamav/phish_whitelist.h"
82167b0a
 #include "checks.h"
 
a497dce5
 static size_t cb_called = 0;
82167b0a
 
a497dce5
 static int cb_fail(void *cbdata, const char *suffix, size_t len, const struct regex_list *regex)
82167b0a
 {
 	fail("this pattern is not supposed to have a suffix");
 	return -1;
 }
 
a497dce5
 static int cb_expect_single(void *cbdata, const char *suffix, size_t len, const struct regex_list *regex)
82167b0a
 {
 	const char *expected = cbdata;
 	cb_called++;
 	fail_unless(suffix && strcmp(suffix, expected) == 0,
 			"suffix mismatch, was: %s, expected: %s\n", suffix, expected);
a497dce5
 	return 0;
82167b0a
 }
 
 static struct regex_list regex;
 START_TEST (empty)
 {
 	const char pattern[] = "";
 	int rc;
a497dce5
 	regex_t *preg;
 
82167b0a
 	errmsg_expected();
a497dce5
 	preg = malloc(sizeof(*regex.preg));
 	fail_unless(!!preg, "malloc");
 	rc = cli_regex2suffix(pattern, preg, cb_fail, NULL);
 	free(preg);
82167b0a
 	fail_unless(rc == REG_EMPTY, "empty pattern");
 	fail_unless(cb_called == 0, "callback shouldn't be called");
 }
 END_TEST
 
 START_TEST (one)
 {
a497dce5
 	char pattern[] = "a";
82167b0a
 	int rc;
a497dce5
 	regex_t *preg;
 
 	preg = malloc(sizeof(*regex.preg));
 	fail_unless(!!preg, "malloc");
 	rc = cli_regex2suffix(pattern, preg, cb_expect_single, pattern);
82167b0a
 	fail_unless(rc == 0, "single character pattern");
a497dce5
 	cli_regfree(preg);
 	free(preg);
82167b0a
 	fail_unless(cb_called == 1, "callback should be called once");
 }
 END_TEST
 
 
a497dce5
 static const char *ex1[] =
82167b0a
  {"com|de","moc","ed",NULL};
a497dce5
 static const char *ex2[] =
82167b0a
  {"xd|(a|e)bc","dx","cba","cbe",NULL};
 
 static const char **tests[] = {
a497dce5
 	ex1,
 	ex2
82167b0a
 };
 
 
a497dce5
 static int cb_expect_multi(void *cbdata, const char *suffix, size_t len, const struct regex_list *r)
82167b0a
 {
 	const char **exp = cbdata;
 	fail_unless(!!exp, "expected data");
 	exp++;
 	fail_unless(!!*exp, "expected no suffix, got: %s\n",suffix);
 	fail_unless(!!exp[cb_called], "expected less suffixes, but already got: %d\n", cb_called);
 	fail_unless(strcmp(exp[cb_called], suffix) == 0,
 			"suffix mismatch, was: %s, expected: %s\n",suffix, exp[cb_called]);
 	fail_unless(strlen(suffix) == len, "incorrect suffix len, expected: %d, got: %d\n", strlen(suffix), len);
 	cb_called++;
a497dce5
 	return 0;
82167b0a
 }
 
99f74adc
 #ifdef CHECK_HAVE_LOOPS
82167b0a
 START_TEST (test_suffix)
 {
 	int rc;
a497dce5
 	regex_t *preg;
82167b0a
 	const char *pattern = tests[_i][0];
 	size_t n=0;
 	const char **p=tests[_i];
 
 	fail_unless(!!pattern, "test pattern");
a497dce5
 	preg = malloc(sizeof(*regex.preg));
 	fail_unless(!!preg, "malloc");
 	rc = cli_regex2suffix(pattern, preg, cb_expect_multi, tests[_i]);
82167b0a
 	fail_unless(rc == 0, "single character pattern");
a497dce5
 	cli_regfree(preg);
 	free(preg);
82167b0a
 	p++;
 	while(*p++) n++;
 	fail_unless(cb_called == n,
 			"suffix number mismatch, expected: %d, was: %d\n", n, cb_called);
 }
 END_TEST
61b295f4
 #endif /* CHECK_HAVE_LOOPS */
82167b0a
 
 static void setup(void)
 {
 	cb_called = 0;
 }
 
 static void teardown(void)
 {
 }
 
3dcc2d78
 static struct regex_matcher matcher;
 
 static void rsetup(void)
 {
 	int rc = init_regex_list(&matcher);
 	fail_unless(rc == 0, "init_regex_list");
 }
 
 static void rteardown(void)
 {
 	regex_list_done(&matcher);
 }
 
 static const struct rtest {
 	const char *pattern;/* NULL if not meant for whitelist testing */
 	const char *realurl;
 	const char *displayurl;
1126559f
 	int result;/* 0 - phish, 1 - whitelisted, 2 - clean, 
dfc0c031
 		      3 - blacklisted if 2nd db is loaded,
 		      4 - invalid regex*/
3dcc2d78
 } rtests[] = {
dfc0c031
 	{NULL,"http://fake.example.com","&#61;&#61;&#61;&#61;&#61;key.com",0},
 	{NULL,"http://key.com","&#61;&#61;&#61;&#61;&#61;key.com",2},
 	{NULL,"http://key.com@fake.example.com","key.com",0},
3dcc2d78
 	/* entry taken from .wdb with a / appended */
 	{".+\\.ebayrtm\\.com([/?].*)?:.+\\.ebay\\.(de|com|co\\.uk)([/?].*)?/",
 		"http://srx.main.ebayrtm.com",
 		"pages.ebay.de",
9ee053fe
 		1 /* should be whitelisted */},
 	{".+\\.ebayrtm\\.com([/?].*)?:.+\\.ebay\\.(de|com|co\\.uk)([/?].*)?/",
 		"http://srx.main.ebayrtm.com.evil.example.com",
 		"pages.ebay.de",
a2d14e06
 		0},
 	{".+\\.ebayrtm\\.com([/?].*)?:.+\\.ebay\\.(de|com|co\\.uk)([/?].*)?/",
 		"www.www.ebayrtm.com?somecgi",
 		"www.ebay.com/something",1},
 	{NULL,
 		"http://key.com","go to key.com",2
888973cc
 	},
 	{NULL, "http://somefakeurl.example.com","someotherdomain-key.com",2},
1126559f
 	{NULL, "http://somefakeurl.example.com","someotherdomain.key.com",0},
 	{NULL, "http://1.test.example.com/something","test",3},
 	{NULL, "http://1.test.example.com/2","test",3},
 	{NULL, "http://user@1.test.example.com/2","test",3},
dfc0c031
 	{NULL, "http://user@1.test.example.com/2/test","test",3},
 	{NULL, "http://user@1.test.example.com/","test",3},
 	{NULL, "http://x.exe","http:///x.exe",2},
 	{".+\\.ebayrtm\\.com([/?].*)?:[^.]+\\.ebay\\.(de|com|co\\.uk)/",
 		"http://srx.main.ebayrtm.com",
 		"pages.ebay.de",
 		1 /* should be whitelisted */},
 	{".+\\.ebayrtm\\.com([/?].*)?:.+[r-t]\\.ebay\\.(de|com|co\\.uk)/",
 		"http://srx.main.ebayrtm.com",
 		"pages.ebay.de",
 		1 /* should be whitelisted */},
 	{".+\\.ebayrtm\\.com([/?].*)?:.+[r-t]\\.ebay\\.(de|com|co\\.uk)/",
 		"http://srx.main.ebayrtm.com",
 		"pages.ebay.de",
 		1 /* should be whitelisted */},
 	{"[t-","","",4},
 	{NULL,"http://co.uk","http:// co.uk",2},
 	{NULL,"http://co.uk","     ",2},
 	{NULL,"127.0.0.1","pages.ebay.de",2},
 	{".+\\.ebayrtm\\.com([/?].*)?:.+\\.ebay\\.(de|com|co\\.uk)([/?].*)?/",
 		"http://pages.ebay.de@fake.example.com","pages.ebay.de",0},
 	{NULL,"http://key.com","https://key.com",0},
 	{NULL,"http://key.com%00fake.example.com","https://key.com",0},
3dcc2d78
 };
 
99f74adc
 #ifdef CHECK_HAVE_LOOPS
3dcc2d78
 START_TEST (regex_list_match_test)
 {
 	const char *info;
 	const struct rtest *rtest = &rtests[_i];
a497dce5
 	char *pattern, *realurl;
 	int rc;
3dcc2d78
 
 	if(!rtest->pattern) {
 		fail_unless(rtest->result != 1,
 				"whitelist test must have pattern set");
 		/* this test entry is not meant for whitelist testing */
 		return;
 	}
 
dfc0c031
 	fail_unless(rtest->result == 0 || rtest->result == 1 || rtest->result==4,
 			"whitelist test result must be either 0 or 1 or 4");
3dcc2d78
 	pattern = cli_strdup(rtest->pattern);
 	fail_unless(!!pattern, "cli_strdup");
 
a497dce5
 	rc = regex_list_add_pattern(&matcher, pattern);
dfc0c031
 	if(rtest->result == 4) {
 		fail_unless(rc, "regex_list_add_pattern should return error");
 		free(pattern);
 		return;
 	} else
 		fail_unless(rc == 0,"regex_list_add_pattern");
10290ba3
 	free(pattern);
3dcc2d78
 
 	matcher.list_loaded = 1;
 
 	rc = cli_build_regex_list(&matcher);
 	fail_unless(rc == 0,"cli_build_regex_list");
 
 	fail_unless(is_regex_ok(&matcher),"is_regex_ok");
 
a497dce5
 	realurl = cli_strdup(rtest->realurl);
 	rc = regex_list_match(&matcher, realurl, rtest->displayurl, NULL, 1, &info, 1);
3dcc2d78
 	fail_unless(rc == rtest->result,"regex_list_match");
a497dce5
 	/* regex_list_match is not supposed to modify realurl in this case */
 	fail_unless(!strcmp(realurl, rtest->realurl), "realurl altered");
 	free(realurl);
3dcc2d78
 }
 END_TEST
61b295f4
 #endif /* CHECK_HAVE_LOOPS */
3dcc2d78
 
 static struct cl_engine *engine;
1126559f
 static int loaded_2 = 0;
 
 static void psetup_impl(int load2)
3dcc2d78
 {
 	FILE *f;
 	struct phishcheck *pchk;
 	int rc;
 	rc = cli_initengine(&engine, 0);
 	fail_unless(rc == 0, "cl_initengine");
 
 	rc = phishing_init(engine);
 	fail_unless(rc == 0,"phishing_init");
 	pchk = engine->phishcheck;
 	fail_unless(!!pchk, "engine->phishcheck");
 
 	rc = init_domainlist(engine);
 	fail_unless(rc == 0,"init_domainlist");
 
 	f = fdopen(open_testfile("input/daily.pdb"),"r");
 	fail_unless(!!f, "fopen daily.pdb");
 
 	rc = load_regex_matcher(engine->domainlist_matcher,  f, 0, 0, NULL);
 	fail_unless(rc == 0, "load_regex_matcher");
 	fclose(f);
 
1126559f
 	if(load2) {
 		f = fdopen(open_testfile("input/daily.pdb2"),"r");
 		fail_unless(!!f, "fopen daily.pdb2");
 
 		rc = load_regex_matcher(engine->domainlist_matcher,  f, 0, 0, NULL);
 		fail_unless(rc == 0, "load_regex_matcher");
 		fclose(f);
 	}
 	loaded_2 = load2;
 
3dcc2d78
 	rc = init_whitelist(engine);
 	fail_unless(rc == 0,"init_whitelist");
 
 	f = fdopen(open_testfile("input/daily.wdb"),"r");
 	rc = load_regex_matcher(engine->whitelist_matcher, f, 0, 1, NULL);
 	fail_unless(rc == 0,"load_regex_matcher");
 	fclose(f);
 
 	rc = cli_build_regex_list(engine->whitelist_matcher);
 	fail_unless(rc == 0,"cli_build_regex_list");
 
 	rc = cli_build_regex_list(engine->domainlist_matcher);
 	fail_unless(rc == 0,"cli_build_regex_list");
 
 	fail_unless(is_regex_ok(engine->whitelist_matcher),"is_regex_ok");
 	fail_unless(is_regex_ok(engine->domainlist_matcher),"is_regex_ok");
 }
 
1126559f
 static void psetup(void)
 {
 	psetup_impl(0);
 }
 
 static void psetup2(void)
 {
 	psetup_impl(1);
 }
 
 
3dcc2d78
 static void pteardown(void)
 {
1126559f
 	if(engine) {
 		phishing_done(engine);
 		cl_free(engine);
 	}
3dcc2d78
 	engine = NULL;
 }
 
a2d14e06
 
 static void do_phishing_test(const struct rtest *rtest)
3dcc2d78
 {
 	char *realurl;
 	cli_ctx ctx;
 	const char *virname;
 	tag_arguments_t hrefs;
 	int rc;
 
 	memset(&ctx, 0, sizeof(ctx));
 
 	realurl = cli_strdup(rtest->realurl);
 	fail_unless(!!realurl, "cli_strdup");
 
 	hrefs.count = 1;
 	hrefs.value = cli_malloc(sizeof(*hrefs.value));
 	fail_unless(!!hrefs.value, "cli_malloc");
a497dce5
 	hrefs.value[0] = (unsigned char*)realurl;
3dcc2d78
 	hrefs.contents = cli_malloc(sizeof(*hrefs.contents));
 	fail_unless(!!hrefs.contents, "cli_malloc");
 	hrefs.contents[0] = blobCreate();
 	hrefs.tag = cli_malloc(sizeof(*hrefs.tag));
 	fail_unless(!!hrefs.tag, "cli_malloc");
a497dce5
 	hrefs.tag[0] = (unsigned char*)cli_strdup("href");
 	blobAddData(hrefs.contents[0], (const unsigned char*) rtest->displayurl, strlen(rtest->displayurl)+1);
3dcc2d78
 
 	ctx.engine = engine;
 	ctx.virname = &virname;
a2d14e06
 
3dcc2d78
 	rc = phishingScan(NULL, NULL, &ctx, &hrefs);
a2d14e06
 
10290ba3
 	html_tag_arg_free(&hrefs);
a2d14e06
 	fail_unless(rc == CL_CLEAN,"phishingScan");
 	switch(rtest->result) {
 		case 0:
 			fail_unless(ctx.found_possibly_unwanted,
 					"this should be phishing, realURL: %s, displayURL: %s",
 					rtest->realurl, rtest->displayurl);
 			break;
 		case 1:
 			fail_unless(!ctx.found_possibly_unwanted,
 					"this should be whitelisted, realURL: %s, displayURL: %s",
 					rtest->realurl, rtest->displayurl);
 			break;
 		case 2:
 			fail_unless(!ctx.found_possibly_unwanted,
 					"this should be clean, realURL: %s, displayURL: %s",
 					rtest->realurl, rtest->displayurl);
 			break;
1126559f
 		case 3:
 			if(!loaded_2)
 				fail_unless(!ctx.found_possibly_unwanted,
 					"this should be clean, realURL: %s, displayURL: %s",
 					rtest->realurl, rtest->displayurl);
 			else {
 				fail_unless(ctx.found_possibly_unwanted,
 					"this should be blacklisted, realURL: %s, displayURL: %s",
 					rtest->realurl, rtest->displayurl);
 				fail_unless(!strstr((const char*)ctx.virname,"Blacklisted"),
 						"should be blacklisted, but is: %s\n", ctx.virname);
 			}
 			break;
a2d14e06
 	}
 }
 
99f74adc
 #ifdef CHECK_HAVE_LOOPS
a2d14e06
 START_TEST (phishingScan_test)
 {
 	do_phishing_test(&rtests[_i]);
3dcc2d78
 }
 END_TEST
99f74adc
 #endif
3dcc2d78
 
a2d14e06
 START_TEST(phishing_fake_test)
 {
 	char buf[4096];
 	FILE *f = fdopen(open_testfile("input/daily.pdb"),"r");
 	fail_unless(!!f,"fopen daily.pdb");
 	while(fgets(buf, sizeof(buf), f)) {
 		struct rtest rtest;
 		const char *pdb = strchr(buf,':');
 		fail_unless(!!pdb, "missing : in pdb");
 		rtest.realurl = pdb;
 		rtest.displayurl = pdb;
 		rtest.result = 2;
 		do_phishing_test(&rtest);
 		rtest.realurl = "http://fake.example.com";
 		rtest.result = 0;
 		do_phishing_test(&rtest);
 	}
 	fclose(f);
 }
 END_TEST
 
82167b0a
 Suite *test_regex_suite(void)
 {
 	Suite *s = suite_create("regex");
1126559f
 	TCase *tc_api, *tc_matching, *tc_phish, *tc_phish2;
82167b0a
 
 	tc_api = tcase_create("cli_regex2suffix");
 	suite_add_tcase(s, tc_api);
3dcc2d78
 	tcase_add_checked_fixture (tc_api, setup, teardown);
82167b0a
 	tcase_add_test(tc_api, empty);
 	tcase_add_test(tc_api, one);
99f74adc
 #ifdef CHECK_HAVE_LOOPS
82167b0a
 	tcase_add_loop_test(tc_api, test_suffix, 0, sizeof(tests)/sizeof(tests[0]));
99f74adc
 #endif
3dcc2d78
 	tc_matching = tcase_create("regex_list");
 	suite_add_tcase(s, tc_matching);
 	tcase_add_checked_fixture (tc_matching, rsetup, rteardown);
99f74adc
 #ifdef CHECK_HAVE_LOOPS
3dcc2d78
 	tcase_add_loop_test(tc_matching, regex_list_match_test, 0, sizeof(rtests)/sizeof(rtests[0]));
99f74adc
 #endif
3dcc2d78
 	tc_phish = tcase_create("phishingScan");
 	suite_add_tcase(s, tc_phish);
 	tcase_add_checked_fixture(tc_phish, psetup, pteardown);
99f74adc
 #ifdef CHECK_HAVE_LOOPS
3dcc2d78
 	tcase_add_loop_test(tc_phish, phishingScan_test, 0, sizeof(rtests)/sizeof(rtests[0]));
99f74adc
 #endif
a2d14e06
 	tcase_add_test(tc_phish, phishing_fake_test);
3dcc2d78
 
1126559f
 
 	tc_phish2 = tcase_create("phishingScan with 2 dbs");
 	suite_add_tcase(s, tc_phish2);
 	tcase_add_checked_fixture(tc_phish2, psetup2, pteardown);
99f74adc
 #ifdef CHECK_HAVE_LOOPS
1126559f
 	tcase_add_loop_test(tc_phish2, phishingScan_test, 0, sizeof(rtests)/sizeof(rtests[0]));
99f74adc
 #endif
1126559f
 	tcase_add_test(tc_phish2, phishing_fake_test);
 
82167b0a
 	return s;
 }
 
 #endif