GitList

Browse code

support for generic text normalizer (CL_TYPE_SCRIPT)

git-svn: trunk@3584

Török Edvin authored on 2008/02/05 06:38:34
Showing 6 changed files

ChangeLog index ed72e4a..4b54b28 100644
libclamav/dconf.c index a6ba277..c5bf2bc 100644
libclamav/dconf.h index e059fe5..dd8494e 100644
libclamav/filetypes.c index 45ddfc8..554a401 100644
libclamav/filetypes.h index 68ee154..57a6a7b 100644
libclamav/scanners.c index 749e555..0c928d4 100644

@@ -1,3 +1,8 @@
                     +Mon Feb  4 23:20:12 EET 2008 (edwin)
                     +------------------------------------
                     +  * libclamav/scanners, filetypes, dconf:
                     +	support for generic text normalizer (CL_TYPE_SCRIPT)
+                    +
                      Mon Feb  4 23:06:34 EET 2008 (edwin)
                      ---------------------------------
                        * libclamav/textnorm.[ch]: generic text normalizer (bb #241)

libclamav/dconf.c

History View file @ 015ce4a

@@ -86,6 +86,7 @@ static struct dconf_module modules[] = {
                          { "DOCUMENT",   "HTML",	    DOC_CONF_HTML,	    1 },
                          { "DOCUMENT",   "RTF",	    DOC_CONF_RTF,	    1 },
                          { "DOCUMENT",   "PDF",	    DOC_CONF_PDF,	    1 },
                     +    { "DOCUMENT",   "SCRIPT",	    DOC_CONF_SCRIPT,	    1 },
                          { "MAIL",	    "MBOX",	    MAIL_CONF_MBOX,	    1 },
                          { "MAIL",	    "TNEF",	    MAIL_CONF_TNEF,	    1 },

libclamav/dconf.h

History View file @ 015ce4a

@@ -74,6 +74,7 @@ struct cli_dconf {
                      #define DOC_CONF_HTML	    0x1
                      #define DOC_CONF_RTF	    0x2
                      #define DOC_CONF_PDF	    0x4
                     +#define DOC_CONF_SCRIPT	    0x8
                      /* Mail flags */
                      #define MAIL_CONF_MBOX	    0x1

libclamav/filetypes.c

History View file @ 015ce4a

@@ -76,6 +76,7 @@ static const struct ftmap_s {
                          { "CL_TYPE_PDF",		CL_TYPE_PDF		},
                          { "CL_TYPE_UUENCODED",	CL_TYPE_UUENCODED	},
                          { "CL_TYPE_HTML_UTF16",	CL_TYPE_HTML_UTF16	},
                     +    { "CL_TYPE_SCRIPT",         CL_TYPE_SCRIPT          },
                          { "CL_TYPE_RTF",		CL_TYPE_RTF		},
                          { "CL_TYPE_HTML",		CL_TYPE_HTML		},
                          { "CL_TYPE_MAIL",		CL_TYPE_MAIL		},

libclamav/filetypes.h

History View file @ 015ce4a

@@ -63,6 +63,7 @@ typedef enum {
                          CL_TYPE_CRYPTFF,
                          CL_TYPE_PDF,
                          CL_TYPE_UUENCODED,
                     +    CL_TYPE_SCRIPT,
                          CL_TYPE_HTML_UTF16,
                          CL_TYPE_RTF,

libclamav/scanners.c

History View file @ 015ce4a

@@ -85,6 +85,7 @@
                      #include "unarj.h"
                      #include "nulsft.h"
                      #include "autoit.h"
                     +#include "textnorm.h"
                      #include <zlib.h>
                      #include "unzip.h"
@@ -1064,6 +1065,73 @@ static int cli_scanhtml(int desc, cli_ctx *ctx)
                          return ret;
+                     }
                     +static int cli_scanscript(int desc, cli_ctx *ctx)
                     +{
                     +	unsigned char buff[FILEBUFF];
                     +	unsigned char normalized[SCANBUFF];
                     +	struct text_norm_state state;
                     +	struct stat sb;
                     +	char *tmpname = NULL;
                     +	int ofd = -1, ret;
                     +	ssize_t nread;
+                    +
                     +	cli_dbgmsg("in cli_scantext()\n");
+                    +
                     +	if(fstat(desc, &sb) == -1) {
                     +		cli_errmsg("cli_scanscript: fstat() failed for descriptor %d\n", desc);
                     +		return CL_EIO;
                     +	}
+                    +
                     +	/* don't normalize files that are too large */
                     +	if(sb.st_size > 10485760) {
                     +		cli_dbgmsg("cli_scanscript: exiting (file larger than 10 MB)\n");
                     +		return CL_CLEAN;
                     +	}
+                    +
                     +	/* dump to disk only if explicitly asked to,
                     +	 * otherwise we can process just in-memory */
                     +	if(cli_leavetemps_flag) {
                     +		if((ret = cli_gentempfd(NULL, &tmpname, &ofd))) {
                     +			cli_dbgmsg("cli_scanscript: Can't generate temporary file/descriptor\n");
                     +			return ret;
                     +		}
                     +	}
+                    +
                     +	text_normalize_init(&state, normalized, sizeof(normalized));
                     +	ret = CL_CLEAN;
+                    +
                     +	do {
                     +		nread = cli_readn(desc, buff, sizeof(buff));
                     +		if(nread <= 0 || state.out_pos + nread > state.out_len) {
                     +			/* flush if error/EOF, or too little buffer space left */
                     +			if((ofd != -1) && (write(ofd, state.out, state.out_pos) == -1)) {
                     +				cli_errmsg("cli_scanscript: can't write to file %s\n",tmpname);
                     +				close(ofd);
                     +				ofd = -1;
                     +				/* we can continue to scan in memory */
                     +			}
                     +			/* when we flush the buffer also scan */
                     +			if(cli_scanbuff(state.out, state.out_pos, ctx->virname, ctx->engine, CL_TYPE_TEXT_ASCII) == CL_VIRUS) {
                     +				ret = CL_VIRUS;
                     +				break;
                     +			}
                     +			text_normalize_reset(&state);
                     +		}
                     +		if(nread > 0 && (text_normalize_buffer(&state, buff, nread)) != nread) {
                     +			cli_dbgmsg("cli_scanscript: short read during normalizing\n");
                     +		}
                     +		/* used a do {}while() here, since we need to flush our buffers at the end,
                     +		 * and using while(){} loop would mean code duplication */
                     +	} while (nread > 0);
+                    +
                     +	if(cli_leavetemps_flag) {
                     +		free(tmpname);
                     +		close(ofd);
                     +	}
+                    +
                     +	return ret;
                     +}
+                    +
                      static int cli_scanhtml_utf16(int desc, cli_ctx *ctx)
+                     {
                      	char *tempname, buff[512], *decoded;
@@ -1838,6 +1906,11 @@ int cli_magic_scandesc(int desc, cli_ctx *ctx)
                      		ret = cli_scanhtml_utf16(desc, ctx);
                      	    break;
                     +	case CL_TYPE_SCRIPT:
                     +	    if(DCONF_DOC & DOC_CONF_SCRIPT)
                     +	        ret = cli_scanscript(desc, ctx);
                     +	    break;
+                    +
                      	case CL_TYPE_RTF:
                      	    if(DCONF_DOC & DOC_CONF_RTF)
                      		ret = cli_scanrtf(desc, ctx);