libclamav/text.c
e3aaff8e
 /*
  *  Copyright (C) 2002 Nigel Horne <njh@bandsman.co.uk>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 2 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
48b7b4a7
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
d21556c8
  *
  * $Log: text.c,v $
95e11e5a
  * Revision 1.25  2007/02/12 20:46:09  njh
  * Various tidy
  *
04995b8c
  * Revision 1.24  2006/09/13 20:53:50  njh
  * Added debug
  *
011d1ffc
  * Revision 1.23  2006/07/14 12:13:08  njh
  * Typo
  *
46d546c0
  * Revision 1.22  2006/07/01 21:03:36  njh
  * Better use of destroy mode
  *
2673dc74
  * Revision 1.21  2006/07/01 16:17:35  njh
  * Added destroy flag
  *
826864d6
  * Revision 1.20  2006/07/01 03:47:50  njh
  * Don't loop if binhex runs out of memory
  *
d4a7dd82
  * Revision 1.19  2006/05/19 11:02:12  njh
  * Just include mbox.h
  *
01c99f53
  * Revision 1.18  2006/05/04 10:37:03  nigelhorne
  * Speed up scanning of clean files
  *
0f7f7682
  * Revision 1.17  2006/05/03 09:36:40  nigelhorne
  * Pass full ctx into the mbox code
  *
48b7b4a7
  * Revision 1.16  2006/04/09 19:59:28  kojm
  * update GPL headers with new address for FSF
  *
9c107190
  * Revision 1.15  2005/03/10 08:50:49  nigelhorne
  * Tidy
  *
324c8ced
  * Revision 1.14  2005/01/19 05:31:55  nigelhorne
  * Added textIterate
  *
1d619765
  * Revision 1.13  2004/12/08 19:03:41  nigelhorne
  * Fix compilation error on Solaris
  *
8a892c3b
  * Revision 1.12  2004/12/04 16:03:55  nigelhorne
  * Text/plain now handled as no encoding
  *
496e1116
  * Revision 1.11  2004/11/27 21:54:26  nigelhorne
  * Tidy
  *
0e5a0129
  * Revision 1.10  2004/08/22 10:34:24  nigelhorne
  * Use fileblob
  *
b2223aad
  * Revision 1.9  2004/08/21 11:57:57  nigelhorne
  * Use line.[ch]
  *
0c0894b8
  * Revision 1.8  2004/07/20 14:35:29  nigelhorne
  * Some MYDOOM.I were getting through
  *
02927896
  * Revision 1.7  2004/06/22 04:08:02  nigelhorne
  * Optimise empty lines
  *
b6ba5281
  * Revision 1.6  2004/05/05 09:37:52  nigelhorne
  * Removed textClean - not needed in clamAV
  *
c81143fc
  * Revision 1.5  2004/03/25 22:40:46  nigelhorne
  * Removed even more calls to realloc and some duplicated code
  *
d21556c8
  * Revision 1.4  2004/02/26 13:26:34  nigelhorne
  * Handle spaces at the end of uuencoded lines
  *
e3aaff8e
  */
 
95e11e5a
 static	char	const	rcsid[] = "$Id: text.c,v 1.25 2007/02/12 20:46:09 njh Exp $";
d21556c8
 
6d6e8271
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
 
1d619765
 #ifndef	CL_DEBUG
 #define	NDEBUG	/* map CLAMAV debug onto standard */
 #endif
 
e3aaff8e
 #include <stdlib.h>
8a892c3b
 #ifdef	C_DARWIN
e3aaff8e
 #include <sys/types.h>
 #include <sys/malloc.h>
 #else
 #ifdef HAVE_MALLOC_H /* tk: FreeBSD-CURRENT doesn't support malloc.h */
b2223aad
 #ifndef	C_BSD	/* BSD now uses stdlib.h */
e3aaff8e
 #include <malloc.h>
 #endif
 #endif
b2223aad
 #endif
e3aaff8e
 #include <string.h>
 #include <ctype.h>
 #include <assert.h>
0e5a0129
 #include <stdio.h>
e3aaff8e
 
0f7f7682
 #include "others.h"
d4a7dd82
 
e3aaff8e
 #include "mbox.h"
 
8a892c3b
 static	text	*textCopy(const text *t_head);
324c8ced
 static	void	addToFileblob(const line_t *line, void *arg);
 static	void	getLength(const line_t *line, void *arg);
 static	void	addToBlob(const line_t *line, void *arg);
2673dc74
 static	void	*textIterate(text *t_text, void (*cb)(const line_t *line, void *arg), void *arg, int destroy);
8a892c3b
 
e3aaff8e
 void
 textDestroy(text *t_head)
 {
 	while(t_head) {
c81143fc
 		text *t_next = t_head->t_next;
b2223aad
 		if(t_head->t_line)
9c107190
 			(void)lineUnlink(t_head->t_line);
e3aaff8e
 		free(t_head);
 		t_head = t_next;
 	}
 }
 
 /*
  * Remove trailing spaces from the lines and trailing blank lines
496e1116
  * This could be used to remove trailing blank lines, empty lines etc.,
  *	but it probably isn't worth the time taken given that it won't reclaim
  *	much memory
e3aaff8e
  */
 text *
 textClean(text *t_head)
 {
b6ba5281
 	return t_head;
e3aaff8e
 }
 
 /* Clone the current object */
8a892c3b
 static text *
e3aaff8e
 textCopy(const text *t_head)
 {
 	text *first = NULL, *last = NULL;
 
 	while(t_head) {
 		if(first == NULL)
 			last = first = (text *)cli_malloc(sizeof(text));
 		else {
 			last->t_next = (text *)cli_malloc(sizeof(text));
 			last = last->t_next;
 		}
 
9c107190
 		if(last == NULL) {
 			if(first)
 				textDestroy(first);
 			return NULL;
 		}
e3aaff8e
 
b2223aad
 		if(t_head->t_line)
 			last->t_line = lineLink(t_head->t_line);
 		else
 			last->t_line = NULL;
e3aaff8e
 
 		t_head = t_head->t_next;
 	}
 
 	if(first)
 		last->t_next = NULL;
 
 	return first;
 }
 
c81143fc
 /* Add a copy of a text to the end of the current object */
e3aaff8e
 text *
 textAdd(text *t_head, const text *t)
 {
 	text *ret;
324c8ced
 	int count;
e3aaff8e
 
 	if(t_head == NULL)
 		return textCopy(t);
 
d4d14218
 	if(t == NULL)
 		return t_head;
e3aaff8e
 
d4d14218
 	ret = t_head;
e3aaff8e
 
324c8ced
 	count = 0;
 	while(t_head->t_next) {
 		count++;
e3aaff8e
 		t_head = t_head->t_next;
324c8ced
 	}
 
 	cli_dbgmsg("textAdd: count = %d\n", count);
e3aaff8e
 
 	while(t) {
 		t_head->t_next = (text *)cli_malloc(sizeof(text));
 		t_head = t_head->t_next;
 
 		assert(t_head != NULL);
 
b2223aad
 		if(t->t_line)
 			t_head->t_line = lineLink(t->t_line);
 		else
 			t_head->t_line = NULL;
e3aaff8e
 
 		t = t->t_next;
 	}
 
 	t_head->t_next = NULL;
 
 	return ret;
 }
 
 /*
  * Add a message's content to the end of the current object
  */
 text *
0c0894b8
 textAddMessage(text *aText, message *aMessage)
e3aaff8e
 {
d4d14218
 	assert(aMessage != NULL);
 
e3aaff8e
 	if(messageGetEncoding(aMessage) == NOENCODING)
 		return textAdd(aText, messageGetBody(aMessage));
 	else {
 		text *anotherText = messageToText(aMessage);
 
 		if(aText) {
 			aText = textAdd(aText, anotherText);
 			textDestroy(anotherText);
 			return aText;
 		}
 		return anotherText;
 	}
 }
c81143fc
 
 /*
  * Transfer the contents of the text into a blob
  * The caller must free the returned blob if b is NULL
  */
 blob *
2673dc74
 textToBlob(text *t, blob *b, int destroy)
c81143fc
 {
324c8ced
 	size_t s;
826864d6
 	blob *bin;
c81143fc
 
324c8ced
 	if(t == NULL)
 		return NULL;
 
 	s = 0;
 
2673dc74
 	(void)textIterate(t, getLength, &s, 0);
324c8ced
 
 	if(s == 0)
 		return b;
c81143fc
 
2673dc74
 	/*
 	 * copy b. If b is NULL and an error occurs we know we need to free
 	 *	before returning
 	 */
 	bin = b;
c81143fc
 	if(b == NULL) {
 		b = blobCreate();
 
 		if(b == NULL)
 			return NULL;
 	}
 
826864d6
 	if(blobGrow(b, s) != CL_SUCCESS) {
011d1ffc
 		cli_warnmsg("Couldn't grow the blob: we may be low on memory\n");
2673dc74
 #if	0
 		if(!destroy) {
 			if(bin == NULL)
 				blobDestroy(b);
 			return NULL;
 		}
 		/*
 		 * We may be able to recover enough memory as we destroy to
 		 * create the blob
 		 */
 #else
826864d6
 		if(bin == NULL)
 			blobDestroy(b);
 		return NULL;
2673dc74
 #endif
826864d6
 	}
c81143fc
 
2673dc74
 	(void)textIterate(t, addToBlob, b, destroy);
c81143fc
 
46d546c0
 	if(destroy && t->t_next) {
 		textDestroy(t->t_next);
 		t->t_next = NULL;
 	}
 
c81143fc
 	blobClose(b);
 
b6ba5281
 	return b;
c81143fc
 }
0e5a0129
 
 fileblob *
2673dc74
 textToFileblob(text *t, fileblob *fb, int destroy)
0e5a0129
 {
 	assert(fb != NULL);
 	assert(t != NULL);
 
 	if(fb == NULL) {
04995b8c
 		cli_dbgmsg("textToFileBlob, destroy = %d\n", destroy);
0e5a0129
 		fb = fileblobCreate();
 
 		if(fb == NULL)
 			return NULL;
04995b8c
 	} else {
 		cli_dbgmsg("textToFileBlob to %s, destroy = %d\n",
 			fileblobGetFilename(fb), destroy);
 
01c99f53
 		fb->ctx = NULL;	/* no need to scan */
04995b8c
 	}
0e5a0129
 
46d546c0
 	fb = textIterate(t, addToFileblob, fb, destroy);
 	if(destroy && t->t_next) {
 		textDestroy(t->t_next);
 		t->t_next = NULL;
 	}
 	return fb;
324c8ced
 }
0e5a0129
 
324c8ced
 static void
 getLength(const line_t *line, void *arg)
 {
 	size_t *length = (size_t *)arg;
0e5a0129
 
324c8ced
 	if(line)
 		*length += strlen(lineGetData(line)) + 1;
 	else
 		(*length)++;
 }
 
 static void
 addToBlob(const line_t *line, void *arg)
 {
 	blob *b = (blob *)arg;
 
 	if(line) {
 		const char *l = lineGetData(line);
 
95e11e5a
 		blobAddData(b, (const unsigned char *)l, strlen(l));
324c8ced
 	}
95e11e5a
 	blobAddData(b, (const unsigned char *)"\n", 1);
324c8ced
 }
 
 static void
 addToFileblob(const line_t *line, void *arg)
 {
 	fileblob *fb = (fileblob *)arg;
 
 	if(line) {
 		const char *l = lineGetData(line);
 
95e11e5a
 		fileblobAddData(fb, (const unsigned char *)l, strlen(l));
324c8ced
 	}
95e11e5a
 	fileblobAddData(fb, (const unsigned char *)"\n", 1);
324c8ced
 }
 
 static void *
2673dc74
 textIterate(text *t_text, void (*cb)(const line_t *item, void *arg), void *arg, int destroy)
324c8ced
 {
 	while(t_text) {
 		(*cb)(t_text->t_line, arg);
2673dc74
 
 		if(destroy && t_text->t_line) {
 			lineUnlink(t_text->t_line);
 			t_text->t_line = NULL;
 		}
 
324c8ced
 		t_text = t_text->t_next;
 	}
 	return arg;
0e5a0129
 }