libclamav/text.c
e3aaff8e
 /*
e1cbc270
  *  Copyright (C) 2013-2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
5a36f2b9
  *  Copyright (C) 2007-2013 Sourcefire, Inc.
2023340a
  *
  *  Authors: Nigel Horne
e3aaff8e
  *
  *  This program is free software; you can redistribute it and/or modify
2023340a
  *  it under the terms of the GNU General Public License version 2 as
  *  published by the Free Software Foundation.
e3aaff8e
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
48b7b4a7
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
d21556c8
  *
  * $Log: text.c,v $
95e11e5a
  * Revision 1.25  2007/02/12 20:46:09  njh
  * Various tidy
  *
04995b8c
  * Revision 1.24  2006/09/13 20:53:50  njh
  * Added debug
  *
011d1ffc
  * Revision 1.23  2006/07/14 12:13:08  njh
  * Typo
  *
46d546c0
  * Revision 1.22  2006/07/01 21:03:36  njh
  * Better use of destroy mode
  *
2673dc74
  * Revision 1.21  2006/07/01 16:17:35  njh
  * Added destroy flag
  *
826864d6
  * Revision 1.20  2006/07/01 03:47:50  njh
  * Don't loop if binhex runs out of memory
  *
d4a7dd82
  * Revision 1.19  2006/05/19 11:02:12  njh
  * Just include mbox.h
  *
01c99f53
  * Revision 1.18  2006/05/04 10:37:03  nigelhorne
  * Speed up scanning of clean files
  *
0f7f7682
  * Revision 1.17  2006/05/03 09:36:40  nigelhorne
  * Pass full ctx into the mbox code
  *
48b7b4a7
  * Revision 1.16  2006/04/09 19:59:28  kojm
  * update GPL headers with new address for FSF
  *
9c107190
  * Revision 1.15  2005/03/10 08:50:49  nigelhorne
  * Tidy
  *
324c8ced
  * Revision 1.14  2005/01/19 05:31:55  nigelhorne
  * Added textIterate
  *
1d619765
  * Revision 1.13  2004/12/08 19:03:41  nigelhorne
  * Fix compilation error on Solaris
  *
8a892c3b
  * Revision 1.12  2004/12/04 16:03:55  nigelhorne
  * Text/plain now handled as no encoding
  *
496e1116
  * Revision 1.11  2004/11/27 21:54:26  nigelhorne
  * Tidy
  *
0e5a0129
  * Revision 1.10  2004/08/22 10:34:24  nigelhorne
  * Use fileblob
  *
b2223aad
  * Revision 1.9  2004/08/21 11:57:57  nigelhorne
  * Use line.[ch]
  *
0c0894b8
  * Revision 1.8  2004/07/20 14:35:29  nigelhorne
  * Some MYDOOM.I were getting through
  *
02927896
  * Revision 1.7  2004/06/22 04:08:02  nigelhorne
  * Optimise empty lines
  *
b6ba5281
  * Revision 1.6  2004/05/05 09:37:52  nigelhorne
  * Removed textClean - not needed in clamAV
  *
c81143fc
  * Revision 1.5  2004/03/25 22:40:46  nigelhorne
  * Removed even more calls to realloc and some duplicated code
  *
d21556c8
  * Revision 1.4  2004/02/26 13:26:34  nigelhorne
  * Handle spaces at the end of uuencoded lines
  *
e3aaff8e
  */
 
6d6e8271
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
 
e3aaff8e
 #include <stdlib.h>
288057e9
 #ifdef C_DARWIN
e3aaff8e
 #include <sys/types.h>
 #include <sys/malloc.h>
 #else
 #ifdef HAVE_MALLOC_H /* tk: FreeBSD-CURRENT doesn't support malloc.h */
288057e9
 #ifndef C_BSD        /* BSD now uses stdlib.h */
e3aaff8e
 #include <malloc.h>
 #endif
 #endif
b2223aad
 #endif
e3aaff8e
 #include <string.h>
 #include <ctype.h>
 #include <assert.h>
0e5a0129
 #include <stdio.h>
e3aaff8e
 
60d8d2c3
 #include "clamav.h"
0f7f7682
 #include "others.h"
d4a7dd82
 
e3aaff8e
 #include "mbox.h"
 
288057e9
 static text *textCopy(const text *t_head);
 static text *textAdd(text *t_head, const text *t);
 static void addToFileblob(const line_t *line, void *arg);
 static void getLength(const line_t *line, void *arg);
 static void addToBlob(const line_t *line, void *arg);
 static void *textIterate(text *t_text, void (*cb)(const line_t *line, void *arg), void *arg, int destroy);
8a892c3b
 
288057e9
 void textDestroy(text *t_head)
e3aaff8e
 {
288057e9
     while (t_head) {
         text *t_next = t_head->t_next;
         if (t_head->t_line) {
             lineUnlink(t_head->t_line);
             t_head->t_line = NULL;
         }
         free(t_head);
         t_head = t_next;
     }
e3aaff8e
 }
 
 /* Clone the current object */
8a892c3b
 static text *
e3aaff8e
 textCopy(const text *t_head)
 {
288057e9
     text *first = NULL, *last = NULL;
 
     while (t_head) {
         if (first == NULL)
             last = first = (text *)cli_malloc(sizeof(text));
         else {
             last->t_next = (text *)cli_malloc(sizeof(text));
             last         = last->t_next;
         }
e3aaff8e
 
288057e9
         if (last == NULL) {
             cli_errmsg("textCopy: Unable to allocate memory to clone object\n");
             if (first)
                 textDestroy(first);
             return NULL;
         }
e3aaff8e
 
288057e9
         last->t_next = NULL;
7cf2a701
 
288057e9
         if (t_head->t_line)
             last->t_line = lineLink(t_head->t_line);
         else
             last->t_line = NULL;
e3aaff8e
 
288057e9
         t_head = t_head->t_next;
     }
e3aaff8e
 
288057e9
     if (first)
         last->t_next = NULL;
e3aaff8e
 
288057e9
     return first;
e3aaff8e
 }
 
c81143fc
 /* Add a copy of a text to the end of the current object */
30e18caf
 static text *
e3aaff8e
 textAdd(text *t_head, const text *t)
 {
288057e9
     text *ret;
     int count;
e3aaff8e
 
288057e9
     if (t_head == NULL) {
         if (t == NULL) {
             cli_errmsg("textAdd fails sanity check\n");
             return NULL;
         }
         return textCopy(t);
     }
e3aaff8e
 
288057e9
     if (t == NULL)
         return t_head;
e3aaff8e
 
288057e9
     ret = t_head;
e3aaff8e
 
288057e9
     count = 0;
     while (t_head->t_next) {
         count++;
         t_head = t_head->t_next;
     }
324c8ced
 
288057e9
     cli_dbgmsg("textAdd: count = %d\n", count);
e3aaff8e
 
288057e9
     while (t) {
         t_head->t_next = (text *)cli_malloc(sizeof(text));
         t_head         = t_head->t_next;
e3aaff8e
 
288057e9
         assert(t_head != NULL);
e3aaff8e
 
288057e9
         if (t->t_line)
             t_head->t_line = lineLink(t->t_line);
         else
             t_head->t_line = NULL;
e3aaff8e
 
288057e9
         t = t->t_next;
     }
e3aaff8e
 
288057e9
     t_head->t_next = NULL;
e3aaff8e
 
288057e9
     return ret;
e3aaff8e
 }
 
 /*
  * Add a message's content to the end of the current object
  */
 text *
0c0894b8
 textAddMessage(text *aText, message *aMessage)
e3aaff8e
 {
288057e9
     assert(aMessage != NULL);
d4d14218
 
288057e9
     if (messageGetEncoding(aMessage) == NOENCODING)
         return textAdd(aText, messageGetBody(aMessage));
     else {
         text *anotherText = messageToText(aMessage);
e3aaff8e
 
288057e9
         if (aText) {
             text *newHead = textMove(aText, anotherText);
             free(anotherText);
             return newHead;
         }
         return anotherText;
     }
e3aaff8e
 }
c81143fc
 
 /*
30e18caf
  * Put the contents of the given text at the end of the current object.
  * The given text emptied; it can be used again if needed, though be warned that
  * it will have an empty line at the start.
  */
 text *
 textMove(text *t_head, text *t)
 {
288057e9
     text *ret;
30e18caf
 
288057e9
     if (t_head == NULL) {
         if (t == NULL) {
             cli_errmsg("textMove fails sanity check\n");
             return NULL;
         }
         t_head = (text *)cli_malloc(sizeof(text));
         if (t_head == NULL) {
241e7eb1
             cli_errmsg("textMove: Unable to allocate memory for head\n");
288057e9
             return NULL;
241e7eb1
         }
288057e9
         t_head->t_line = t->t_line;
         t_head->t_next = t->t_next;
         t->t_line      = NULL;
         t->t_next      = NULL;
         return t_head;
     }
30e18caf
 
288057e9
     if (t == NULL)
         return t_head;
30e18caf
 
288057e9
     ret = t_head;
30e18caf
 
288057e9
     while (t_head->t_next)
         t_head = t_head->t_next;
30e18caf
 
288057e9
     /*
30e18caf
 	 * Move the first line manually so that the caller is left clean but
 	 * empty, the rest is moved by a simple pointer reassignment
 	 */
288057e9
     t_head->t_next = (text *)cli_malloc(sizeof(text));
     if (t_head->t_next == NULL) {
241e7eb1
         cli_errmsg("textMove: Unable to allocate memory for head->next\n");
288057e9
         return NULL;
241e7eb1
     }
288057e9
     t_head = t_head->t_next;
30e18caf
 
288057e9
     assert(t_head != NULL);
30e18caf
 
288057e9
     if (t->t_line) {
         t_head->t_line = t->t_line;
         t->t_line      = NULL;
     } else
         t_head->t_line = NULL;
30e18caf
 
288057e9
     t_head->t_next = t->t_next;
     t->t_next      = NULL;
30e18caf
 
288057e9
     return ret;
30e18caf
 }
 
 /*
c81143fc
  * Transfer the contents of the text into a blob
  * The caller must free the returned blob if b is NULL
  */
 blob *
2673dc74
 textToBlob(text *t, blob *b, int destroy)
c81143fc
 {
288057e9
     size_t s;
     blob *bin;
c81143fc
 
288057e9
     if (t == NULL)
         return NULL;
324c8ced
 
288057e9
     s = 0;
324c8ced
 
288057e9
     (void)textIterate(t, getLength, &s, 0);
324c8ced
 
288057e9
     if (s == 0)
         return b;
c81143fc
 
288057e9
     /*
2673dc74
 	 * copy b. If b is NULL and an error occurs we know we need to free
 	 *	before returning
 	 */
288057e9
     bin = b;
     if (b == NULL) {
         b = blobCreate();
c81143fc
 
288057e9
         if (b == NULL)
             return NULL;
     }
c81143fc
 
288057e9
     if (blobGrow(b, s) != CL_SUCCESS) {
         cli_warnmsg("Couldn't grow the blob: we may be low on memory\n");
 #if 0
2673dc74
 		if(!destroy) {
 			if(bin == NULL)
 				blobDestroy(b);
 			return NULL;
 		}
 		/*
 		 * We may be able to recover enough memory as we destroy to
 		 * create the blob
 		 */
 #else
288057e9
         if (bin == NULL)
             blobDestroy(b);
         return NULL;
2673dc74
 #endif
288057e9
     }
c81143fc
 
288057e9
     (void)textIterate(t, addToBlob, b, destroy);
c81143fc
 
288057e9
     if (destroy && t->t_next) {
         textDestroy(t->t_next);
         t->t_next = NULL;
     }
46d546c0
 
288057e9
     blobClose(b);
c81143fc
 
288057e9
     return b;
c81143fc
 }
0e5a0129
 
 fileblob *
2673dc74
 textToFileblob(text *t, fileblob *fb, int destroy)
0e5a0129
 {
288057e9
     assert(fb != NULL);
     assert(t != NULL);
0e5a0129
 
288057e9
     if (fb == NULL) {
         cli_dbgmsg("textToFileBlob, destroy = %d\n", destroy);
         fb = fileblobCreate();
0e5a0129
 
288057e9
         if (fb == NULL)
             return NULL;
     } else {
         cli_dbgmsg("textToFileBlob to %s, destroy = %d\n",
                    fileblobGetFilename(fb), destroy);
04995b8c
 
288057e9
         fb->ctx = NULL; /* no need to scan */
     }
0e5a0129
 
288057e9
     fb = textIterate(t, addToFileblob, fb, destroy);
     if (destroy && t->t_next) {
         textDestroy(t->t_next);
         t->t_next = NULL;
     }
     return fb;
324c8ced
 }
0e5a0129
 
324c8ced
 static void
 getLength(const line_t *line, void *arg)
 {
288057e9
     size_t *length = (size_t *)arg;
0e5a0129
 
288057e9
     if (line)
         *length += strlen(lineGetData(line)) + 1;
     else
         (*length)++;
324c8ced
 }
 
 static void
 addToBlob(const line_t *line, void *arg)
 {
288057e9
     blob *b = (blob *)arg;
324c8ced
 
288057e9
     if (line) {
         const char *l = lineGetData(line);
324c8ced
 
288057e9
         blobAddData(b, (const unsigned char *)l, strlen(l));
     }
     blobAddData(b, (const unsigned char *)"\n", 1);
324c8ced
 }
 
 static void
 addToFileblob(const line_t *line, void *arg)
 {
288057e9
     fileblob *fb = (fileblob *)arg;
324c8ced
 
288057e9
     if (line) {
         const char *l = lineGetData(line);
324c8ced
 
288057e9
         fileblobAddData(fb, (const unsigned char *)l, strlen(l));
     }
     fileblobAddData(fb, (const unsigned char *)"\n", 1);
324c8ced
 }
 
 static void *
2673dc74
 textIterate(text *t_text, void (*cb)(const line_t *item, void *arg), void *arg, int destroy)
324c8ced
 {
288057e9
     /*
30e18caf
 	 * Have two loops rather than one, so that we're not checking the
 	 * value of "destroy" lots and lots of times
 	 */
288057e9
 #if 0
324c8ced
 	while(t_text) {
 		(*cb)(t_text->t_line, arg);
2673dc74
 
 		if(destroy && t_text->t_line) {
 			lineUnlink(t_text->t_line);
 			t_text->t_line = NULL;
 		}
 
324c8ced
 		t_text = t_text->t_next;
 	}
30e18caf
 #else
288057e9
     if (destroy)
         while (t_text) {
             (*cb)(t_text->t_line, arg);
30e18caf
 
288057e9
             if (t_text->t_line) {
                 lineUnlink(t_text->t_line);
                 t_text->t_line = NULL;
             }
30e18caf
 
288057e9
             t_text = t_text->t_next;
         }
     else
         while (t_text) {
             (*cb)(t_text->t_line, arg);
30e18caf
 
288057e9
             t_text = t_text->t_next;
         }
30e18caf
 #endif
288057e9
     return arg;
0e5a0129
 }