libclamav/text.c
b151ef55
 /*
  *  Copyright (C) 2002 Nigel Horne <njh@bandsman.co.uk>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 2 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
30738099
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
7f8589d3
  *
  * $Log: text.c,v $
dbca666a
  * Revision 1.25  2007/02/12 20:46:09  njh
  * Various tidy
  *
74820396
  * Revision 1.24  2006/09/13 20:53:50  njh
  * Added debug
  *
e9d7da84
  * Revision 1.23  2006/07/14 12:13:08  njh
  * Typo
  *
6c7ddbc8
  * Revision 1.22  2006/07/01 21:03:36  njh
  * Better use of destroy mode
  *
985cc85e
  * Revision 1.21  2006/07/01 16:17:35  njh
  * Added destroy flag
  *
9b2be218
  * Revision 1.20  2006/07/01 03:47:50  njh
  * Don't loop if binhex runs out of memory
  *
ea916d6a
  * Revision 1.19  2006/05/19 11:02:12  njh
  * Just include mbox.h
  *
a4b13e7a
  * Revision 1.18  2006/05/04 10:37:03  nigelhorne
  * Speed up scanning of clean files
  *
5c86c162
  * Revision 1.17  2006/05/03 09:36:40  nigelhorne
  * Pass full ctx into the mbox code
  *
30738099
  * Revision 1.16  2006/04/09 19:59:28  kojm
  * update GPL headers with new address for FSF
  *
f4ff13a5
  * Revision 1.15  2005/03/10 08:50:49  nigelhorne
  * Tidy
  *
0a6eed64
  * Revision 1.14  2005/01/19 05:31:55  nigelhorne
  * Added textIterate
  *
36155fca
  * Revision 1.13  2004/12/08 19:03:41  nigelhorne
  * Fix compilation error on Solaris
  *
3497daca
  * Revision 1.12  2004/12/04 16:03:55  nigelhorne
  * Text/plain now handled as no encoding
  *
aedb0336
  * Revision 1.11  2004/11/27 21:54:26  nigelhorne
  * Tidy
  *
1e06e1ab
  * Revision 1.10  2004/08/22 10:34:24  nigelhorne
  * Use fileblob
  *
de617e3e
  * Revision 1.9  2004/08/21 11:57:57  nigelhorne
  * Use line.[ch]
  *
285a69b4
  * Revision 1.8  2004/07/20 14:35:29  nigelhorne
  * Some MYDOOM.I were getting through
  *
98685ac1
  * Revision 1.7  2004/06/22 04:08:02  nigelhorne
  * Optimise empty lines
  *
1839b497
  * Revision 1.6  2004/05/05 09:37:52  nigelhorne
  * Removed textClean - not needed in clamAV
  *
02c9dc2a
  * Revision 1.5  2004/03/25 22:40:46  nigelhorne
  * Removed even more calls to realloc and some duplicated code
  *
7f8589d3
  * Revision 1.4  2004/02/26 13:26:34  nigelhorne
  * Handle spaces at the end of uuencoded lines
  *
b151ef55
  */
 
dbca666a
 static	char	const	rcsid[] = "$Id: text.c,v 1.25 2007/02/12 20:46:09 njh Exp $";
7f8589d3
 
8b242bb9
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
 
36155fca
 #ifndef	CL_DEBUG
 #define	NDEBUG	/* map CLAMAV debug onto standard */
 #endif
 
b151ef55
 #include <stdlib.h>
3497daca
 #ifdef	C_DARWIN
b151ef55
 #include <sys/types.h>
 #include <sys/malloc.h>
 #else
 #ifdef HAVE_MALLOC_H /* tk: FreeBSD-CURRENT doesn't support malloc.h */
de617e3e
 #ifndef	C_BSD	/* BSD now uses stdlib.h */
b151ef55
 #include <malloc.h>
 #endif
 #endif
de617e3e
 #endif
b151ef55
 #include <string.h>
 #include <ctype.h>
 #include <assert.h>
1e06e1ab
 #include <stdio.h>
b151ef55
 
5c86c162
 #include "others.h"
ea916d6a
 
b151ef55
 #include "mbox.h"
 
3497daca
 static	text	*textCopy(const text *t_head);
0a6eed64
 static	void	addToFileblob(const line_t *line, void *arg);
 static	void	getLength(const line_t *line, void *arg);
 static	void	addToBlob(const line_t *line, void *arg);
985cc85e
 static	void	*textIterate(text *t_text, void (*cb)(const line_t *line, void *arg), void *arg, int destroy);
3497daca
 
b151ef55
 void
 textDestroy(text *t_head)
 {
 	while(t_head) {
02c9dc2a
 		text *t_next = t_head->t_next;
de617e3e
 		if(t_head->t_line)
f4ff13a5
 			(void)lineUnlink(t_head->t_line);
b151ef55
 		free(t_head);
 		t_head = t_next;
 	}
 }
 
 /*
  * Remove trailing spaces from the lines and trailing blank lines
aedb0336
  * This could be used to remove trailing blank lines, empty lines etc.,
  *	but it probably isn't worth the time taken given that it won't reclaim
  *	much memory
b151ef55
  */
 text *
 textClean(text *t_head)
 {
1839b497
 	return t_head;
b151ef55
 }
 
 /* Clone the current object */
3497daca
 static text *
b151ef55
 textCopy(const text *t_head)
 {
 	text *first = NULL, *last = NULL;
 
 	while(t_head) {
 		if(first == NULL)
 			last = first = (text *)cli_malloc(sizeof(text));
 		else {
 			last->t_next = (text *)cli_malloc(sizeof(text));
 			last = last->t_next;
 		}
 
f4ff13a5
 		if(last == NULL) {
 			if(first)
 				textDestroy(first);
 			return NULL;
 		}
b151ef55
 
de617e3e
 		if(t_head->t_line)
 			last->t_line = lineLink(t_head->t_line);
 		else
 			last->t_line = NULL;
b151ef55
 
 		t_head = t_head->t_next;
 	}
 
 	if(first)
 		last->t_next = NULL;
 
 	return first;
 }
 
02c9dc2a
 /* Add a copy of a text to the end of the current object */
b151ef55
 text *
 textAdd(text *t_head, const text *t)
 {
 	text *ret;
0a6eed64
 	int count;
b151ef55
 
 	if(t_head == NULL)
 		return textCopy(t);
 
0bcad2b1
 	if(t == NULL)
 		return t_head;
b151ef55
 
0bcad2b1
 	ret = t_head;
b151ef55
 
0a6eed64
 	count = 0;
 	while(t_head->t_next) {
 		count++;
b151ef55
 		t_head = t_head->t_next;
0a6eed64
 	}
 
 	cli_dbgmsg("textAdd: count = %d\n", count);
b151ef55
 
 	while(t) {
 		t_head->t_next = (text *)cli_malloc(sizeof(text));
 		t_head = t_head->t_next;
 
 		assert(t_head != NULL);
 
de617e3e
 		if(t->t_line)
 			t_head->t_line = lineLink(t->t_line);
 		else
 			t_head->t_line = NULL;
b151ef55
 
 		t = t->t_next;
 	}
 
 	t_head->t_next = NULL;
 
 	return ret;
 }
 
 /*
  * Add a message's content to the end of the current object
  */
 text *
285a69b4
 textAddMessage(text *aText, message *aMessage)
b151ef55
 {
0bcad2b1
 	assert(aMessage != NULL);
 
b151ef55
 	if(messageGetEncoding(aMessage) == NOENCODING)
 		return textAdd(aText, messageGetBody(aMessage));
 	else {
 		text *anotherText = messageToText(aMessage);
 
 		if(aText) {
 			aText = textAdd(aText, anotherText);
 			textDestroy(anotherText);
 			return aText;
 		}
 		return anotherText;
 	}
 }
02c9dc2a
 
 /*
  * Transfer the contents of the text into a blob
  * The caller must free the returned blob if b is NULL
  */
 blob *
985cc85e
 textToBlob(text *t, blob *b, int destroy)
02c9dc2a
 {
0a6eed64
 	size_t s;
9b2be218
 	blob *bin;
02c9dc2a
 
0a6eed64
 	if(t == NULL)
 		return NULL;
 
 	s = 0;
 
985cc85e
 	(void)textIterate(t, getLength, &s, 0);
0a6eed64
 
 	if(s == 0)
 		return b;
02c9dc2a
 
985cc85e
 	/*
 	 * copy b. If b is NULL and an error occurs we know we need to free
 	 *	before returning
 	 */
 	bin = b;
02c9dc2a
 	if(b == NULL) {
 		b = blobCreate();
 
 		if(b == NULL)
 			return NULL;
 	}
 
9b2be218
 	if(blobGrow(b, s) != CL_SUCCESS) {
e9d7da84
 		cli_warnmsg("Couldn't grow the blob: we may be low on memory\n");
985cc85e
 #if	0
 		if(!destroy) {
 			if(bin == NULL)
 				blobDestroy(b);
 			return NULL;
 		}
 		/*
 		 * We may be able to recover enough memory as we destroy to
 		 * create the blob
 		 */
 #else
9b2be218
 		if(bin == NULL)
 			blobDestroy(b);
 		return NULL;
985cc85e
 #endif
9b2be218
 	}
02c9dc2a
 
985cc85e
 	(void)textIterate(t, addToBlob, b, destroy);
02c9dc2a
 
6c7ddbc8
 	if(destroy && t->t_next) {
 		textDestroy(t->t_next);
 		t->t_next = NULL;
 	}
 
02c9dc2a
 	blobClose(b);
 
1839b497
 	return b;
02c9dc2a
 }
1e06e1ab
 
 fileblob *
985cc85e
 textToFileblob(text *t, fileblob *fb, int destroy)
1e06e1ab
 {
 	assert(fb != NULL);
 	assert(t != NULL);
 
 	if(fb == NULL) {
74820396
 		cli_dbgmsg("textToFileBlob, destroy = %d\n", destroy);
1e06e1ab
 		fb = fileblobCreate();
 
 		if(fb == NULL)
 			return NULL;
74820396
 	} else {
 		cli_dbgmsg("textToFileBlob to %s, destroy = %d\n",
 			fileblobGetFilename(fb), destroy);
 
a4b13e7a
 		fb->ctx = NULL;	/* no need to scan */
74820396
 	}
1e06e1ab
 
6c7ddbc8
 	fb = textIterate(t, addToFileblob, fb, destroy);
 	if(destroy && t->t_next) {
 		textDestroy(t->t_next);
 		t->t_next = NULL;
 	}
 	return fb;
0a6eed64
 }
1e06e1ab
 
0a6eed64
 static void
 getLength(const line_t *line, void *arg)
 {
 	size_t *length = (size_t *)arg;
1e06e1ab
 
0a6eed64
 	if(line)
 		*length += strlen(lineGetData(line)) + 1;
 	else
 		(*length)++;
 }
 
 static void
 addToBlob(const line_t *line, void *arg)
 {
 	blob *b = (blob *)arg;
 
 	if(line) {
 		const char *l = lineGetData(line);
 
dbca666a
 		blobAddData(b, (const unsigned char *)l, strlen(l));
0a6eed64
 	}
dbca666a
 	blobAddData(b, (const unsigned char *)"\n", 1);
0a6eed64
 }
 
 static void
 addToFileblob(const line_t *line, void *arg)
 {
 	fileblob *fb = (fileblob *)arg;
 
 	if(line) {
 		const char *l = lineGetData(line);
 
dbca666a
 		fileblobAddData(fb, (const unsigned char *)l, strlen(l));
0a6eed64
 	}
dbca666a
 	fileblobAddData(fb, (const unsigned char *)"\n", 1);
0a6eed64
 }
 
 static void *
985cc85e
 textIterate(text *t_text, void (*cb)(const line_t *item, void *arg), void *arg, int destroy)
0a6eed64
 {
 	while(t_text) {
 		(*cb)(t_text->t_line, arg);
985cc85e
 
 		if(destroy && t_text->t_line) {
 			lineUnlink(t_text->t_line);
 			t_text->t_line = NULL;
 		}
 
0a6eed64
 		t_text = t_text->t_next;
 	}
 	return arg;
1e06e1ab
 }