git-svn: trunk@3584
Török Edvin authored on 2008/02/05 06:38:34... | ... |
@@ -1,3 +1,8 @@ |
1 |
+Mon Feb 4 23:20:12 EET 2008 (edwin) |
|
2 |
+------------------------------------ |
|
3 |
+ * libclamav/scanners, filetypes, dconf: |
|
4 |
+ support for generic text normalizer (CL_TYPE_SCRIPT) |
|
5 |
+ |
|
1 | 6 |
Mon Feb 4 23:06:34 EET 2008 (edwin) |
2 | 7 |
--------------------------------- |
3 | 8 |
* libclamav/textnorm.[ch]: generic text normalizer (bb #241) |
... | ... |
@@ -86,6 +86,7 @@ static struct dconf_module modules[] = { |
86 | 86 |
{ "DOCUMENT", "HTML", DOC_CONF_HTML, 1 }, |
87 | 87 |
{ "DOCUMENT", "RTF", DOC_CONF_RTF, 1 }, |
88 | 88 |
{ "DOCUMENT", "PDF", DOC_CONF_PDF, 1 }, |
89 |
+ { "DOCUMENT", "SCRIPT", DOC_CONF_SCRIPT, 1 }, |
|
89 | 90 |
|
90 | 91 |
{ "MAIL", "MBOX", MAIL_CONF_MBOX, 1 }, |
91 | 92 |
{ "MAIL", "TNEF", MAIL_CONF_TNEF, 1 }, |
... | ... |
@@ -76,6 +76,7 @@ static const struct ftmap_s { |
76 | 76 |
{ "CL_TYPE_PDF", CL_TYPE_PDF }, |
77 | 77 |
{ "CL_TYPE_UUENCODED", CL_TYPE_UUENCODED }, |
78 | 78 |
{ "CL_TYPE_HTML_UTF16", CL_TYPE_HTML_UTF16 }, |
79 |
+ { "CL_TYPE_SCRIPT", CL_TYPE_SCRIPT }, |
|
79 | 80 |
{ "CL_TYPE_RTF", CL_TYPE_RTF }, |
80 | 81 |
{ "CL_TYPE_HTML", CL_TYPE_HTML }, |
81 | 82 |
{ "CL_TYPE_MAIL", CL_TYPE_MAIL }, |
... | ... |
@@ -85,6 +85,7 @@ |
85 | 85 |
#include "unarj.h" |
86 | 86 |
#include "nulsft.h" |
87 | 87 |
#include "autoit.h" |
88 |
+#include "textnorm.h" |
|
88 | 89 |
#include <zlib.h> |
89 | 90 |
#include "unzip.h" |
90 | 91 |
|
... | ... |
@@ -1064,6 +1065,73 @@ static int cli_scanhtml(int desc, cli_ctx *ctx) |
1064 | 1064 |
return ret; |
1065 | 1065 |
} |
1066 | 1066 |
|
1067 |
+static int cli_scanscript(int desc, cli_ctx *ctx) |
|
1068 |
+{ |
|
1069 |
+ unsigned char buff[FILEBUFF]; |
|
1070 |
+ unsigned char normalized[SCANBUFF]; |
|
1071 |
+ struct text_norm_state state; |
|
1072 |
+ struct stat sb; |
|
1073 |
+ char *tmpname = NULL; |
|
1074 |
+ int ofd = -1, ret; |
|
1075 |
+ ssize_t nread; |
|
1076 |
+ |
|
1077 |
+ cli_dbgmsg("in cli_scantext()\n"); |
|
1078 |
+ |
|
1079 |
+ if(fstat(desc, &sb) == -1) { |
|
1080 |
+ cli_errmsg("cli_scanscript: fstat() failed for descriptor %d\n", desc); |
|
1081 |
+ return CL_EIO; |
|
1082 |
+ } |
|
1083 |
+ |
|
1084 |
+ /* don't normalize files that are too large */ |
|
1085 |
+ if(sb.st_size > 10485760) { |
|
1086 |
+ cli_dbgmsg("cli_scanscript: exiting (file larger than 10 MB)\n"); |
|
1087 |
+ return CL_CLEAN; |
|
1088 |
+ } |
|
1089 |
+ |
|
1090 |
+ /* dump to disk only if explicitly asked to, |
|
1091 |
+ * otherwise we can process just in-memory */ |
|
1092 |
+ if(cli_leavetemps_flag) { |
|
1093 |
+ if((ret = cli_gentempfd(NULL, &tmpname, &ofd))) { |
|
1094 |
+ cli_dbgmsg("cli_scanscript: Can't generate temporary file/descriptor\n"); |
|
1095 |
+ return ret; |
|
1096 |
+ } |
|
1097 |
+ } |
|
1098 |
+ |
|
1099 |
+ text_normalize_init(&state, normalized, sizeof(normalized)); |
|
1100 |
+ ret = CL_CLEAN; |
|
1101 |
+ |
|
1102 |
+ do { |
|
1103 |
+ nread = cli_readn(desc, buff, sizeof(buff)); |
|
1104 |
+ if(nread <= 0 || state.out_pos + nread > state.out_len) { |
|
1105 |
+ /* flush if error/EOF, or too little buffer space left */ |
|
1106 |
+ if((ofd != -1) && (write(ofd, state.out, state.out_pos) == -1)) { |
|
1107 |
+ cli_errmsg("cli_scanscript: can't write to file %s\n",tmpname); |
|
1108 |
+ close(ofd); |
|
1109 |
+ ofd = -1; |
|
1110 |
+ /* we can continue to scan in memory */ |
|
1111 |
+ } |
|
1112 |
+ /* when we flush the buffer also scan */ |
|
1113 |
+ if(cli_scanbuff(state.out, state.out_pos, ctx->virname, ctx->engine, CL_TYPE_TEXT_ASCII) == CL_VIRUS) { |
|
1114 |
+ ret = CL_VIRUS; |
|
1115 |
+ break; |
|
1116 |
+ } |
|
1117 |
+ text_normalize_reset(&state); |
|
1118 |
+ } |
|
1119 |
+ if(nread > 0 && (text_normalize_buffer(&state, buff, nread)) != nread) { |
|
1120 |
+ cli_dbgmsg("cli_scanscript: short read during normalizing\n"); |
|
1121 |
+ } |
|
1122 |
+ /* used a do {}while() here, since we need to flush our buffers at the end, |
|
1123 |
+ * and using while(){} loop would mean code duplication */ |
|
1124 |
+ } while (nread > 0); |
|
1125 |
+ |
|
1126 |
+ if(cli_leavetemps_flag) { |
|
1127 |
+ free(tmpname); |
|
1128 |
+ close(ofd); |
|
1129 |
+ } |
|
1130 |
+ |
|
1131 |
+ return ret; |
|
1132 |
+} |
|
1133 |
+ |
|
1067 | 1134 |
static int cli_scanhtml_utf16(int desc, cli_ctx *ctx) |
1068 | 1135 |
{ |
1069 | 1136 |
char *tempname, buff[512], *decoded; |
... | ... |
@@ -1838,6 +1906,11 @@ int cli_magic_scandesc(int desc, cli_ctx *ctx) |
1838 | 1838 |
ret = cli_scanhtml_utf16(desc, ctx); |
1839 | 1839 |
break; |
1840 | 1840 |
|
1841 |
+ case CL_TYPE_SCRIPT: |
|
1842 |
+ if(DCONF_DOC & DOC_CONF_SCRIPT) |
|
1843 |
+ ret = cli_scanscript(desc, ctx); |
|
1844 |
+ break; |
|
1845 |
+ |
|
1841 | 1846 |
case CL_TYPE_RTF: |
1842 | 1847 |
if(DCONF_DOC & DOC_CONF_RTF) |
1843 | 1848 |
ret = cli_scanrtf(desc, ctx); |