git-svn: trunk@2256
aCaB authored on 2006/09/14 07:50:56... | ... |
@@ -1,3 +1,12 @@ |
1 |
+Thu Sep 14 00:35:56 CEST 2006 (acab) |
|
2 |
+------------------------------------ |
|
3 |
+ * docs: Added preliminary documentation related to Edvin phishing module. |
|
4 |
+ Big thanks to Google for the SoC, to Edvin for the very good |
|
5 |
+ work and finally to paste.debian.org for helping with the merge! |
|
6 |
+ |
|
7 |
+ * libclamav: Using CL_EXPERIMENTAL instead of CONFIG_EXPERIMENTAL |
|
8 |
+ in suecrypt decryptor |
|
9 |
+ |
|
1 | 10 |
Wed Sep 13 22:38:22 BST 2006 (njh) |
2 | 11 |
---------------------------------- |
3 | 12 |
* libclamav/mbox.c: Committed ACAB's merge of Edvin's Phish code, |
4 | 13 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,943 @@ |
0 |
+#LyX 1.4.2 created this file. For more info see http://www.lyx.org/ |
|
1 |
+\lyxformat 245 |
|
2 |
+\begin_document |
|
3 |
+\begin_header |
|
4 |
+\textclass article |
|
5 |
+\language english |
|
6 |
+\inputencoding auto |
|
7 |
+\fontscheme pslatex |
|
8 |
+\graphics default |
|
9 |
+\paperfontsize default |
|
10 |
+\spacing single |
|
11 |
+\papersize a4paper |
|
12 |
+\use_geometry false |
|
13 |
+\use_amsmath 1 |
|
14 |
+\cite_engine basic |
|
15 |
+\use_bibtopic false |
|
16 |
+\paperorientation portrait |
|
17 |
+\secnumdepth 3 |
|
18 |
+\tocdepth 3 |
|
19 |
+\paragraph_separation indent |
|
20 |
+\defskip medskip |
|
21 |
+\quotes_language english |
|
22 |
+\papercolumns 1 |
|
23 |
+\papersides 1 |
|
24 |
+\paperpagestyle default |
|
25 |
+\tracking_changes false |
|
26 |
+\output_changes false |
|
27 |
+\end_header |
|
28 |
+ |
|
29 |
+\begin_body |
|
30 |
+ |
|
31 |
+\begin_layout Title |
|
32 |
+ |
|
33 |
+\family roman |
|
34 |
+\series medium |
|
35 |
+\shape up |
|
36 |
+\size normal |
|
37 |
+\emph off |
|
38 |
+\bar no |
|
39 |
+\noun off |
|
40 |
+\color none |
|
41 |
+Phishing signatures creation HOWTO |
|
42 |
+\end_layout |
|
43 |
+ |
|
44 |
+\begin_layout Author |
|
45 | ||
46 |
+\end_layout |
|
47 |
+ |
|
48 |
+\begin_layout Section |
|
49 |
+Database file format |
|
50 |
+\end_layout |
|
51 |
+ |
|
52 |
+\begin_layout Standard |
|
53 |
+The database file format is common for the whitelist (.wdb), and domainlist |
|
54 |
+ (.pdb), and it consists of (multiple) lines of form: |
|
55 |
+\end_layout |
|
56 |
+ |
|
57 |
+\begin_layout Standard |
|
58 |
+ |
|
59 |
+\series bold |
|
60 |
+Flags\InsetSpace ~ |
|
61 |
+RealURL\InsetSpace ~ |
|
62 |
+DisplayedURL |
|
63 |
+\end_layout |
|
64 |
+ |
|
65 |
+\begin_layout Itemize |
|
66 |
+Where |
|
67 |
+\noun on |
|
68 |
+Flags |
|
69 |
+\noun default |
|
70 |
+ is: |
|
71 |
+\end_layout |
|
72 |
+ |
|
73 |
+\begin_deeper |
|
74 |
+\begin_layout Itemize |
|
75 |
+an (optional) character : |
|
76 |
+\end_layout |
|
77 |
+ |
|
78 |
+\begin_deeper |
|
79 |
+\begin_layout Description |
|
80 |
+R regex, has to match entire url, see section |
|
81 |
+\end_layout |
|
82 |
+ |
|
83 |
+\begin_layout Description |
|
84 |
+H has to match the host part of url only (a simple pattern, i.e. |
|
85 |
+ it is matched literally) |
|
86 |
+\end_layout |
|
87 |
+ |
|
88 |
+\begin_layout Description |
|
89 |
+no\InsetSpace ~ |
|
90 |
+character matches the entire url, but as a simple pattern (non-regex) |
|
91 |
+\end_layout |
|
92 |
+ |
|
93 |
+\end_deeper |
|
94 |
+\begin_layout Itemize |
|
95 |
+followed by an (optional) 3-digit hexadecimal number representing flags |
|
96 |
+ that should be filtered. |
|
97 |
+\end_layout |
|
98 |
+ |
|
99 |
+\begin_deeper |
|
100 |
+\begin_layout Itemize |
|
101 |
+flag filtering only makes sense in .pdb files, (however clamav won't complain |
|
102 |
+ if you put flags in .wdb files, it just won't use them) |
|
103 |
+\end_layout |
|
104 |
+ |
|
105 |
+\begin_layout Itemize |
|
106 |
+for details on how to construct a flag number see section |
|
107 |
+\begin_inset LatexCommand \vref{sec:Flags} |
|
108 |
+ |
|
109 |
+\end_inset |
|
110 |
+ |
|
111 |
+ |
|
112 |
+\end_layout |
|
113 |
+ |
|
114 |
+\end_deeper |
|
115 |
+\end_deeper |
|
116 |
+\begin_layout Itemize |
|
117 |
+ |
|
118 |
+\noun on |
|
119 |
+RealURL |
|
120 |
+\noun default |
|
121 |
+is the URL the user is sent to |
|
122 |
+\end_layout |
|
123 |
+ |
|
124 |
+\begin_layout Itemize |
|
125 |
+ |
|
126 |
+\noun on |
|
127 |
+displayedURL |
|
128 |
+\noun default |
|
129 |
+ is the URL description displayed to the user, that is where it is |
|
130 |
+\emph on |
|
131 |
+claimed |
|
132 |
+\emph default |
|
133 |
+ they are sent, the most obvious example is that of an html anchor (<a>tag): |
|
134 |
+ its href attribute is the |
|
135 |
+\noun on |
|
136 |
+realURL |
|
137 |
+\noun default |
|
138 |
+, and its contents is the |
|
139 |
+\noun on |
|
140 |
+displayedURL |
|
141 |
+\end_layout |
|
142 |
+ |
|
143 |
+\begin_layout Itemize |
|
144 |
+see section |
|
145 |
+\begin_inset LatexCommand \vref{sub:Extraction-of-realURL,} |
|
146 |
+ |
|
147 |
+\end_inset |
|
148 |
+ |
|
149 |
+ for more details on what |
|
150 |
+\noun on |
|
151 |
+realURL/displayedURL |
|
152 |
+\noun default |
|
153 |
+ is |
|
154 |
+\end_layout |
|
155 |
+ |
|
156 |
+\begin_layout Standard |
|
157 |
+Note: The spaces are mandatory, and empty lines are skipped. |
|
158 |
+\end_layout |
|
159 |
+ |
|
160 |
+\begin_layout Standard |
|
161 |
+If any of the lines of daily.wdb/daily.pdb don't conform to the above file |
|
162 |
+ format, the loading of the file shall fail, and whitelist/domainlist feature |
|
163 |
+ will be disabled. |
|
164 |
+ If the loading of the whitelist fails, the phishing checks will be disabled |
|
165 |
+ entirely. |
|
166 |
+\end_layout |
|
167 |
+ |
|
168 |
+\begin_layout Standard |
|
169 |
+Therefore it is important to test the daily.wdb/daily.pdb before packing it |
|
170 |
+ into daily.cvd! |
|
171 |
+\end_layout |
|
172 |
+ |
|
173 |
+\begin_layout Subsection |
|
174 |
+How matching works |
|
175 |
+\end_layout |
|
176 |
+ |
|
177 |
+\begin_layout Subsubsection |
|
178 |
+RealURL, displayedURL concatenation |
|
179 |
+\begin_inset LatexCommand \label{sub:RealURL,-displayedURL-concatenation} |
|
180 |
+ |
|
181 |
+\end_inset |
|
182 |
+ |
|
183 |
+ |
|
184 |
+\end_layout |
|
185 |
+ |
|
186 |
+\begin_layout Standard |
|
187 |
+The phishing detection module processes pairs of realURL/displayedURL, and |
|
188 |
+ the matching against daily.wdb/daily.pdb is done as follows: the realURL |
|
189 |
+ is concatenated with a space, and with the displayedURL, then that |
|
190 |
+\emph on |
|
191 |
+line |
|
192 |
+\emph default |
|
193 |
+is matched against the lines in daily.wdb/daily.pdb |
|
194 |
+\end_layout |
|
195 |
+ |
|
196 |
+\begin_layout Standard |
|
197 |
+So if you have a line like |
|
198 |
+\end_layout |
|
199 |
+ |
|
200 |
+\begin_layout Standard |
|
201 |
+ |
|
202 |
+\shape italic |
|
203 |
+\InsetSpace ~ |
|
204 |
+www.google.ro\InsetSpace ~ |
|
205 |
+www.google.com |
|
206 |
+\end_layout |
|
207 |
+ |
|
208 |
+\begin_layout Standard |
|
209 |
+and a href like: |
|
210 |
+\emph on |
|
211 |
+<a href= |
|
212 |
+\begin_inset Quotes erd |
|
213 |
+\end_inset |
|
214 |
+ |
|
215 |
+http://www.google.ro |
|
216 |
+\begin_inset Quotes erd |
|
217 |
+\end_inset |
|
218 |
+ |
|
219 |
+>www.google.com</a>, |
|
220 |
+\emph default |
|
221 |
+then it will match, but: |
|
222 |
+\emph on |
|
223 |
+<a href= |
|
224 |
+\begin_inset Quotes erd |
|
225 |
+\end_inset |
|
226 |
+ |
|
227 |
+http://images.google.com |
|
228 |
+\begin_inset Quotes erd |
|
229 |
+\end_inset |
|
230 |
+ |
|
231 |
+>www.google.com</a> |
|
232 |
+\emph default |
|
233 |
+ will not match. |
|
234 |
+\end_layout |
|
235 |
+ |
|
236 |
+\begin_layout Standard |
|
237 |
+If you use the |
|
238 |
+\series bold |
|
239 |
+\noun on |
|
240 |
+H |
|
241 |
+\noun default |
|
242 |
+ |
|
243 |
+\series default |
|
244 |
+flag, then the 2nd href will match too. |
|
245 |
+\end_layout |
|
246 |
+ |
|
247 |
+\begin_layout Subsubsection |
|
248 |
+What happens when a match is found |
|
249 |
+\end_layout |
|
250 |
+ |
|
251 |
+\begin_layout Standard |
|
252 |
+In the case of the whitelist, a match means that the realURL/displayedURL |
|
253 |
+ combination is considered |
|
254 |
+\noun on |
|
255 |
+clean |
|
256 |
+\noun default |
|
257 |
+, and no further checks are performed on it. |
|
258 |
+\end_layout |
|
259 |
+ |
|
260 |
+\begin_layout Standard |
|
261 |
+In the case of the domainlist, a match means that the realURL/displayedURL |
|
262 |
+ is going to be checked for phishing attempts. |
|
263 |
+ This is only done if you don't run clamav with the |
|
264 |
+\emph on |
|
265 |
+alldomains |
|
266 |
+\emph default |
|
267 |
+ option (since then all urls are checked). |
|
268 |
+ Furthermore you can restrict what checks are to be performed by specifying |
|
269 |
+ the 3-digit hexnumber. |
|
270 |
+\end_layout |
|
271 |
+ |
|
272 |
+\begin_layout Subsubsection |
|
273 |
+Extraction of |
|
274 |
+\noun on |
|
275 |
+realURL |
|
276 |
+\noun default |
|
277 |
+, |
|
278 |
+\noun on |
|
279 |
+displayedURL |
|
280 |
+\noun default |
|
281 |
+ from HTML tags |
|
282 |
+\begin_inset LatexCommand \label{sub:Extraction-of-realURL,} |
|
283 |
+ |
|
284 |
+\end_inset |
|
285 |
+ |
|
286 |
+ |
|
287 |
+\end_layout |
|
288 |
+ |
|
289 |
+\begin_layout Standard |
|
290 |
+The html parser extracts pairs of |
|
291 |
+\noun on |
|
292 |
+realURL |
|
293 |
+\noun default |
|
294 |
+/ |
|
295 |
+\noun on |
|
296 |
+displayedURL |
|
297 |
+\noun default |
|
298 |
+ based on the following rules: |
|
299 |
+\end_layout |
|
300 |
+ |
|
301 |
+\begin_layout Description |
|
302 |
+a (anchor) the |
|
303 |
+\emph on |
|
304 |
+href |
|
305 |
+\emph default |
|
306 |
+ is the |
|
307 |
+\noun on |
|
308 |
+realURL |
|
309 |
+\noun default |
|
310 |
+, its |
|
311 |
+\emph on |
|
312 |
+contents |
|
313 |
+\emph default |
|
314 |
+ is the |
|
315 |
+\noun on |
|
316 |
+displayedURL |
|
317 |
+\end_layout |
|
318 |
+ |
|
319 |
+\begin_deeper |
|
320 |
+\begin_layout Description |
|
321 |
+contents is the tag-stripped contents of the <a> tags, so for example <b> |
|
322 |
+ tags are stripped (but not their contents) |
|
323 |
+\end_layout |
|
324 |
+ |
|
325 |
+\begin_layout Standard |
|
326 |
+nesting another <a> tag withing an <a> tag (besides being invalid html) |
|
327 |
+ is treated as a </a><a.. |
|
328 |
+\end_layout |
|
329 |
+ |
|
330 |
+\end_deeper |
|
331 |
+\begin_layout Description |
|
332 |
+form the |
|
333 |
+\emph on |
|
334 |
+action |
|
335 |
+\emph default |
|
336 |
+attribute is the |
|
337 |
+\noun on |
|
338 |
+realURL |
|
339 |
+\noun default |
|
340 |
+, and a nested <a> tag is the |
|
341 |
+\noun on |
|
342 |
+displayedURL |
|
343 |
+\end_layout |
|
344 |
+ |
|
345 |
+\begin_layout Description |
|
346 |
+img/area if nested within an |
|
347 |
+\emph on |
|
348 |
+ <a> |
|
349 |
+\emph default |
|
350 |
+ tag, the |
|
351 |
+\noun on |
|
352 |
+realURL |
|
353 |
+\noun default |
|
354 |
+ is the |
|
355 |
+\emph on |
|
356 |
+href |
|
357 |
+\emph default |
|
358 |
+ of the a tag, and the |
|
359 |
+\emph on |
|
360 |
+src/dynsrc/area |
|
361 |
+\emph default |
|
362 |
+ is the |
|
363 |
+\noun on |
|
364 |
+displayedURL |
|
365 |
+\noun default |
|
366 |
+ of the img |
|
367 |
+\end_layout |
|
368 |
+ |
|
369 |
+\begin_deeper |
|
370 |
+\begin_layout Standard |
|
371 |
+if nested withing a |
|
372 |
+\emph on |
|
373 |
+form |
|
374 |
+\emph default |
|
375 |
+ tag, then the action attribute of the |
|
376 |
+\emph on |
|
377 |
+form |
|
378 |
+\emph default |
|
379 |
+ tag is the |
|
380 |
+\noun on |
|
381 |
+realURL |
|
382 |
+\noun default |
|
383 |
+ |
|
384 |
+\end_layout |
|
385 |
+ |
|
386 |
+\end_deeper |
|
387 |
+\begin_layout Description |
|
388 |
+iframe if nested withing an |
|
389 |
+\emph on |
|
390 |
+<a> |
|
391 |
+\emph default |
|
392 |
+ tag the |
|
393 |
+\emph on |
|
394 |
+src |
|
395 |
+\emph default |
|
396 |
+ attribute is the displayedURL, and the |
|
397 |
+\emph on |
|
398 |
+href |
|
399 |
+\emph default |
|
400 |
+ of its parent |
|
401 |
+\emph on |
|
402 |
+ a |
|
403 |
+\emph default |
|
404 |
+ tag is the |
|
405 |
+\noun on |
|
406 |
+realURL |
|
407 |
+\end_layout |
|
408 |
+ |
|
409 |
+\begin_deeper |
|
410 |
+\begin_layout Standard |
|
411 |
+if nested withing a |
|
412 |
+\emph on |
|
413 |
+form |
|
414 |
+\emph default |
|
415 |
+ tag, then the action attribute of the |
|
416 |
+\emph on |
|
417 |
+form |
|
418 |
+\emph default |
|
419 |
+ tag is the |
|
420 |
+\noun on |
|
421 |
+realURL |
|
422 |
+\end_layout |
|
423 |
+ |
|
424 |
+\end_deeper |
|
425 |
+\begin_layout Subsection |
|
426 |
+Simple patterns |
|
427 |
+\begin_inset LatexCommand \label{sec:Simple-patterns} |
|
428 |
+ |
|
429 |
+\end_inset |
|
430 |
+ |
|
431 |
+ |
|
432 |
+\end_layout |
|
433 |
+ |
|
434 |
+\begin_layout Standard |
|
435 |
+Simple patterns are matched literally, i.e. |
|
436 |
+ if you say: www.google.com, it is going to match www.google.com, and only that. |
|
437 |
+ The . |
|
438 |
+ character has no special meaning (see the section on regexes |
|
439 |
+\begin_inset LatexCommand \vref{sec:Regular-expressions} |
|
440 |
+ |
|
441 |
+\end_inset |
|
442 |
+ |
|
443 |
+ for how the . |
|
444 |
+ character behaves there) |
|
445 |
+\end_layout |
|
446 |
+ |
|
447 |
+\begin_layout Subsection |
|
448 |
+Regular expressions |
|
449 |
+\begin_inset LatexCommand \label{sec:Regular-expressions} |
|
450 |
+ |
|
451 |
+\end_inset |
|
452 |
+ |
|
453 |
+ |
|
454 |
+\end_layout |
|
455 |
+ |
|
456 |
+\begin_layout Standard |
|
457 |
+POSIX regular expressions are supported, and you can consider that internally |
|
458 |
+ it is wrapped by |
|
459 |
+\emph on |
|
460 |
+^ |
|
461 |
+\emph default |
|
462 |
+, and |
|
463 |
+\emph on |
|
464 |
+$. |
|
465 |
+ |
|
466 |
+\emph default |
|
467 |
+In other words, this means that the regular expression has to match the |
|
468 |
+ entire concatenated (see section |
|
469 |
+\begin_inset LatexCommand \vref{sub:RealURL,-displayedURL-concatenation} |
|
470 |
+ |
|
471 |
+\end_inset |
|
472 |
+ |
|
473 |
+ for details on concatenation) url. |
|
474 |
+\end_layout |
|
475 |
+ |
|
476 |
+\begin_layout Standard |
|
477 |
+It is recomended that you read section |
|
478 |
+\begin_inset LatexCommand \vref{sec:Introduction-to-regular} |
|
479 |
+ |
|
480 |
+\end_inset |
|
481 |
+ |
|
482 |
+ to learn how to write regular expressions, and then come back and read |
|
483 |
+ this for hints. |
|
484 |
+\end_layout |
|
485 |
+ |
|
486 |
+\begin_layout Standard |
|
487 |
+Be advised that clamav contains an internal, very basic regex matcher to |
|
488 |
+ reduce the load on the regex matching core. |
|
489 |
+ Thus it is recomended that you avoid using regex syntax not supported by |
|
490 |
+ it at the very beginning of regexes (at least the first few characters). |
|
491 |
+\end_layout |
|
492 |
+ |
|
493 |
+\begin_layout Standard |
|
494 |
+Currently the clamav regex matcher supports: |
|
495 |
+\end_layout |
|
496 |
+ |
|
497 |
+\begin_layout Itemize |
|
498 |
+. |
|
499 |
+ (dot) character |
|
500 |
+\end_layout |
|
501 |
+ |
|
502 |
+\begin_layout Itemize |
|
503 |
+ |
|
504 |
+\backslash |
|
505 |
+ (escaping special characters) |
|
506 |
+\end_layout |
|
507 |
+ |
|
508 |
+\begin_layout Itemize |
|
509 |
+| (pipe) alternatives |
|
510 |
+\end_layout |
|
511 |
+ |
|
512 |
+\begin_layout Itemize |
|
513 |
+[] (character classes) |
|
514 |
+\end_layout |
|
515 |
+ |
|
516 |
+\begin_layout Itemize |
|
517 |
+() (paranthesis for grouping, but no group extraction is performed) |
|
518 |
+\end_layout |
|
519 |
+ |
|
520 |
+\begin_layout Itemize |
|
521 |
+other non-special characters |
|
522 |
+\end_layout |
|
523 |
+ |
|
524 |
+\begin_layout Standard |
|
525 |
+Thus the following are not supported: |
|
526 |
+\end_layout |
|
527 |
+ |
|
528 |
+\begin_layout Itemize |
|
529 |
++ repetition |
|
530 |
+\end_layout |
|
531 |
+ |
|
532 |
+\begin_layout Itemize |
|
533 |
+* repetition |
|
534 |
+\end_layout |
|
535 |
+ |
|
536 |
+\begin_layout Itemize |
|
537 |
+{} repetition |
|
538 |
+\end_layout |
|
539 |
+ |
|
540 |
+\begin_layout Itemize |
|
541 |
+backreferences |
|
542 |
+\end_layout |
|
543 |
+ |
|
544 |
+\begin_layout Itemize |
|
545 |
+lookaround |
|
546 |
+\end_layout |
|
547 |
+ |
|
548 |
+\begin_layout Itemize |
|
549 |
+other |
|
550 |
+\begin_inset Quotes eld |
|
551 |
+\end_inset |
|
552 |
+ |
|
553 |
+advanced |
|
554 |
+\begin_inset Quotes erd |
|
555 |
+\end_inset |
|
556 |
+ |
|
557 |
+ features not listed in the supported list ;) |
|
558 |
+\end_layout |
|
559 |
+ |
|
560 |
+\begin_layout Standard |
|
561 |
+This however shouldn't discourage you from using the |
|
562 |
+\begin_inset Quotes eld |
|
563 |
+\end_inset |
|
564 |
+ |
|
565 |
+not directly supported features |
|
566 |
+\begin_inset Quotes eld |
|
567 |
+\end_inset |
|
568 |
+ |
|
569 |
+, because if the internal engine encounters unsupported syntax, it passes |
|
570 |
+ it on to the POSIX regex core (beginning from the first unsupported token, |
|
571 |
+ everything before that is still processed by the internal matcher). |
|
572 |
+ An example might make this more clear: |
|
573 |
+\end_layout |
|
574 |
+ |
|
575 |
+\begin_layout Standard |
|
576 |
+ |
|
577 |
+\emph on |
|
578 |
+www |
|
579 |
+\backslash |
|
580 |
|
|
581 |
+\backslash |
|
582 |
+.(com|ro|it) www |
|
583 |
+\backslash |
|
584 |
+.([a-zA-Z])+ |
|
585 |
+\backslash |
|
586 |
|
|
587 |
+\backslash |
|
588 |
+.com |
|
589 |
+\end_layout |
|
590 |
+ |
|
591 |
+\begin_layout Standard |
|
592 |
+Everything till |
|
593 |
+\emph on |
|
594 |
+([a-zA-Z])+ |
|
595 |
+\emph default |
|
596 |
+ is processed internally, that paranthesis (and everything beyond) is processed |
|
597 |
+ by the posix core. |
|
598 |
+\end_layout |
|
599 |
+ |
|
600 |
+\begin_layout Subsection |
|
601 |
+Flags |
|
602 |
+\begin_inset LatexCommand \label{sec:Flags} |
|
603 |
+ |
|
604 |
+\end_inset |
|
605 |
+ |
|
606 |
+ |
|
607 |
+\end_layout |
|
608 |
+ |
|
609 |
+\begin_layout Standard |
|
610 |
+Flags are a binary OR of the following numbers: |
|
611 |
+\end_layout |
|
612 |
+ |
|
613 |
+\begin_layout Description |
|
614 |
+HOST_SUFFICIENT 1 |
|
615 |
+\end_layout |
|
616 |
+ |
|
617 |
+\begin_layout Description |
|
618 |
+DOMAIN_SUFFICIENT 2 |
|
619 |
+\end_layout |
|
620 |
+ |
|
621 |
+\begin_layout Description |
|
622 |
+DO_REVERSE_LOOKUP 4 |
|
623 |
+\end_layout |
|
624 |
+ |
|
625 |
+\begin_layout Description |
|
626 |
+CHECK_REDIR 8 |
|
627 |
+\end_layout |
|
628 |
+ |
|
629 |
+\begin_layout Description |
|
630 |
+CHECK_SSL 16 |
|
631 |
+\end_layout |
|
632 |
+ |
|
633 |
+\begin_layout Description |
|
634 |
+CHECK_CLOAKING 32 |
|
635 |
+\end_layout |
|
636 |
+ |
|
637 |
+\begin_layout Description |
|
638 |
+CLEANUP_URL 64 |
|
639 |
+\end_layout |
|
640 |
+ |
|
641 |
+\begin_layout Description |
|
642 |
+CHECK_DOMAIN_REVERSE 128 |
|
643 |
+\end_layout |
|
644 |
+ |
|
645 |
+\begin_layout Description |
|
646 |
+CHECK_IMG_URL 256 |
|
647 |
+\end_layout |
|
648 |
+ |
|
649 |
+\begin_layout Description |
|
650 |
+DOMAINLIST_REQUIRED 512 |
|
651 |
+\end_layout |
|
652 |
+ |
|
653 |
+\begin_layout Standard |
|
654 |
+The names of the constants are self-explanatory. |
|
655 |
+\end_layout |
|
656 |
+ |
|
657 |
+\begin_layout Standard |
|
658 |
+These constants are defined in libclamav/phishcheck.h, you can check there |
|
659 |
+ for the latest flags. |
|
660 |
+\end_layout |
|
661 |
+ |
|
662 |
+\begin_layout Standard |
|
663 |
+There is a default set of flags that are enabled, these are currently: (CLEANUP_ |
|
664 |
+URL|DOMAIN_SUFFICIENT|CHECK_SSL|CHECK_CLOAKING|DOMAINLIST_REQUIRED|CHECK_IMG_URL |
|
665 |
+), ssl checking is performed only for a tags currently. |
|
666 |
+\end_layout |
|
667 |
+ |
|
668 |
+\begin_layout Standard |
|
669 |
+You must decide for each line in the domainlist if you want to filter any |
|
670 |
+ flags (that is you don't want certain checks to be done), and then calculate |
|
671 |
+ the binary OR of those constants, and then convert it into a 3-digit hexnumber. |
|
672 |
+ For example you devide that domain_sufficient shouldn't be used for ebay.com, |
|
673 |
+ and you don't want to check images either, so you come up with this flag |
|
674 |
+ number: |
|
675 |
+\begin_inset Formula $2|256\Rightarrow$ |
|
676 |
+\end_inset |
|
677 |
+ |
|
678 |
+258 |
|
679 |
+\begin_inset Formula $(decimal)\Rightarrow102(hexadecimal)$ |
|
680 |
+\end_inset |
|
681 |
+ |
|
682 |
+ |
|
683 |
+\end_layout |
|
684 |
+ |
|
685 |
+\begin_layout Standard |
|
686 |
+So you add this line to daily.wdb: |
|
687 |
+\end_layout |
|
688 |
+ |
|
689 |
+\begin_layout Standard |
|
690 |
+R102\InsetSpace ~ |
|
691 |
+www.ebay.com\InsetSpace ~ |
|
692 |
+.+ |
|
693 |
+\end_layout |
|
694 |
+ |
|
695 |
+\begin_layout Section |
|
696 |
+Introduction to regular expressions |
|
697 |
+\begin_inset LatexCommand \label{sec:Introduction-to-regular} |
|
698 |
+ |
|
699 |
+\end_inset |
|
700 |
+ |
|
701 |
+ |
|
702 |
+\end_layout |
|
703 |
+ |
|
704 |
+\begin_layout Standard |
|
705 |
+Recomended reading: |
|
706 |
+\end_layout |
|
707 |
+ |
|
708 |
+\begin_layout Itemize |
|
709 |
+http://www.regular-expressions.info/quickstart.html |
|
710 |
+\end_layout |
|
711 |
+ |
|
712 |
+\begin_layout Itemize |
|
713 |
+http://www.regular-expressions.info/tutorial.html |
|
714 |
+\end_layout |
|
715 |
+ |
|
716 |
+\begin_layout Itemize |
|
717 |
+regex(7) man-page: http://www.tin.org/bin/man.cgi?section=7&topic=regex |
|
718 |
+\end_layout |
|
719 |
+ |
|
720 |
+\begin_layout Subsection |
|
721 |
+Special characters |
|
722 |
+\end_layout |
|
723 |
+ |
|
724 |
+\begin_layout Description |
|
725 |
+[ the opening square bracket - it marks the beginning of a character class, |
|
726 |
+ see section |
|
727 |
+\begin_inset LatexCommand \vref{sub:Character-classes} |
|
728 |
+ |
|
729 |
+\end_inset |
|
730 |
+ |
|
731 |
+ |
|
732 |
+\end_layout |
|
733 |
+ |
|
734 |
+\begin_layout Description |
|
735 |
+ |
|
736 |
+\backslash |
|
737 |
+ the backslash - escapes special characters, see section |
|
738 |
+\begin_inset LatexCommand \vref{sub:Escaping} |
|
739 |
+ |
|
740 |
+\end_inset |
|
741 |
+ |
|
742 |
+ |
|
743 |
+\end_layout |
|
744 |
+ |
|
745 |
+\begin_layout Description |
|
746 |
+\i \^{ } |
|
747 |
+ the caret - matches the beginning of a line (not needed in clamav regexes, |
|
748 |
+ this is implied) |
|
749 |
+\end_layout |
|
750 |
+ |
|
751 |
+\begin_layout Description |
|
752 |
+$ the dollar sign - matches the end of a line (not needed in clamav regexes, |
|
753 |
+ this is implied) |
|
754 |
+\end_layout |
|
755 |
+ |
|
756 |
+\begin_layout Description |
|
757 |
+\i \.{ } |
|
758 |
+ the period or dot - matches |
|
759 |
+\emph on |
|
760 |
+any |
|
761 |
+\emph default |
|
762 |
+ character |
|
763 |
+\end_layout |
|
764 |
+ |
|
765 |
+\begin_layout Description |
|
766 |
+| the vertical bar or pipe symbol - matches either of the token on its left |
|
767 |
+ and right side, see section |
|
768 |
+\begin_inset LatexCommand \vref{sub:Alternation} |
|
769 |
+ |
|
770 |
+\end_inset |
|
771 |
+ |
|
772 |
+ |
|
773 |
+\end_layout |
|
774 |
+ |
|
775 |
+\begin_layout Description |
|
776 |
+? the question mark - matches optionally the left-side token, see section |
|
777 |
+\begin_inset LatexCommand \vref{sub:Optional-matching,-and} |
|
778 |
+ |
|
779 |
+\end_inset |
|
780 |
+ |
|
781 |
+ |
|
782 |
+\end_layout |
|
783 |
+ |
|
784 |
+\begin_layout Description |
|
785 |
+* the asterisk or star - matches 0 or more occurences of the left-side token, |
|
786 |
+ see section |
|
787 |
+\begin_inset LatexCommand \vref{sub:Optional-matching,-and} |
|
788 |
+ |
|
789 |
+\end_inset |
|
790 |
+ |
|
791 |
+ |
|
792 |
+\end_layout |
|
793 |
+ |
|
794 |
+\begin_layout Description |
|
795 |
++ the plus sign - matches 1 or more occurences of the left-side token, see |
|
796 |
+ section |
|
797 |
+\begin_inset LatexCommand \vref{sub:Optional-matching,-and} |
|
798 |
+ |
|
799 |
+\end_inset |
|
800 |
+ |
|
801 |
+ |
|
802 |
+\end_layout |
|
803 |
+ |
|
804 |
+\begin_layout Description |
|
805 |
+( the opening round bracket - \i \c{m} |
|
806 |
+arks beginning of a group, see section |
|
807 |
+\begin_inset LatexCommand \vref{sub:Groups} |
|
808 |
+ |
|
809 |
+\end_inset |
|
810 |
+ |
|
811 |
+ |
|
812 |
+\end_layout |
|
813 |
+ |
|
814 |
+\begin_layout Description |
|
815 |
+) the closing round bracket - marks end of a group, see section |
|
816 |
+\begin_inset LatexCommand \vref{sub:Groups} |
|
817 |
+ |
|
818 |
+\end_inset |
|
819 |
+ |
|
820 |
+ |
|
821 |
+\end_layout |
|
822 |
+ |
|
823 |
+\begin_layout Subsection |
|
824 |
+Character classes |
|
825 |
+\begin_inset LatexCommand \label{sub:Character-classes} |
|
826 |
+ |
|
827 |
+\end_inset |
|
828 |
+ |
|
829 |
+ |
|
830 |
+\end_layout |
|
831 |
+ |
|
832 |
+\begin_layout Subsection |
|
833 |
+Escaping |
|
834 |
+\begin_inset LatexCommand \label{sub:Escaping} |
|
835 |
+ |
|
836 |
+\end_inset |
|
837 |
+ |
|
838 |
+ |
|
839 |
+\end_layout |
|
840 |
+ |
|
841 |
+\begin_layout Standard |
|
842 |
+Escaping has two purposes: |
|
843 |
+\end_layout |
|
844 |
+ |
|
845 |
+\begin_layout Itemize |
|
846 |
+it allows you to actually match the special characters themselves, for example |
|
847 |
+ to match the literal |
|
848 |
+\emph on |
|
849 |
++ |
|
850 |
+\emph default |
|
851 |
+, you would write |
|
852 |
+\emph on |
|
853 |
+ |
|
854 |
+\backslash |
|
855 |
++ |
|
856 |
+\end_layout |
|
857 |
+ |
|
858 |
+\begin_layout Itemize |
|
859 |
+it also allows you to match non-printable characters, such as the tab ( |
|
860 |
+\emph on |
|
861 |
+ |
|
862 |
+\backslash |
|
863 |
+t |
|
864 |
+\emph default |
|
865 |
+), newline ( |
|
866 |
+\emph on |
|
867 |
+ |
|
868 |
+\backslash |
|
869 |
+n |
|
870 |
+\emph default |
|
871 |
+), .. |
|
872 |
+\end_layout |
|
873 |
+ |
|
874 |
+\begin_layout Standard |
|
875 |
+However since non-printable characters are not valid inside an url, you |
|
876 |
+ won't have a reason to use them. |
|
877 |
+\end_layout |
|
878 |
+ |
|
879 |
+\begin_layout Subsection |
|
880 |
+Alternation |
|
881 |
+\begin_inset LatexCommand \label{sub:Alternation} |
|
882 |
+ |
|
883 |
+\end_inset |
|
884 |
+ |
|
885 |
+ |
|
886 |
+\end_layout |
|
887 |
+ |
|
888 |
+\begin_layout Subsection |
|
889 |
+Optional matching, and repetition |
|
890 |
+\begin_inset LatexCommand \label{sub:Optional-matching,-and} |
|
891 |
+ |
|
892 |
+\end_inset |
|
893 |
+ |
|
894 |
+ |
|
895 |
+\end_layout |
|
896 |
+ |
|
897 |
+\begin_layout Subsection |
|
898 |
+Groups |
|
899 |
+\begin_inset LatexCommand \label{sub:Groups} |
|
900 |
+ |
|
901 |
+\end_inset |
|
902 |
+ |
|
903 |
+ |
|
904 |
+\end_layout |
|
905 |
+ |
|
906 |
+\begin_layout Standard |
|
907 |
+Groups are usually used together with repetition, or alternation. |
|
908 |
+ For example: |
|
909 |
+\emph on |
|
910 |
+(com|it)+ |
|
911 |
+\emph default |
|
912 |
+ means: match 1 or more repetitions of |
|
913 |
+\emph on |
|
914 |
+com |
|
915 |
+\emph default |
|
916 |
+ or |
|
917 |
+\emph on |
|
918 |
+it, |
|
919 |
+\emph default |
|
920 |
+ that is it matches: com, it, comcom, comcomcom, comit, itit, ititcom,... |
|
921 |
+ you get the idea. |
|
922 |
+\end_layout |
|
923 |
+ |
|
924 |
+\begin_layout Standard |
|
925 |
+Groups can also be used to extract substring, but this is not supported |
|
926 |
+ by the clam engine, and not needed either in this case. |
|
927 |
+\end_layout |
|
928 |
+ |
|
929 |
+\begin_layout Section |
|
930 |
+Hints and recomandations |
|
931 |
+\end_layout |
|
932 |
+ |
|
933 |
+\begin_layout Section |
|
934 |
+Examples |
|
935 |
+\end_layout |
|
936 |
+ |
|
937 |
+\begin_layout Standard |
|
938 |
+ |
|
939 |
+\end_layout |
|
940 |
+ |
|
941 |
+\end_body |
|
942 |
+\end_document |
... | ... |
@@ -819,7 +819,7 @@ int cli_scanpe(int desc, cli_ctx *ctx) |
819 | 819 |
} |
820 | 820 |
|
821 | 821 |
|
822 |
-#ifdef CONFIG_EXPERIMENTAL |
|
822 |
+#ifdef CL_EXPERIMENTAL |
|
823 | 823 |
/* SUE */ |
824 | 824 |
|
825 | 825 |
if(nsections > 2 && EC32(optional_hdr32.AddressOfEntryPoint) == EC32(section_hdr[nsections - 1].VirtualAddress) && EC32(section_hdr[nsections - 1].SizeOfRawData) > 0x350 && EC32(section_hdr[nsections - 1].SizeOfRawData) < 0x292+0x350+1000) { |