Browse code

Updated SESSION Code

git-svn-id: file:///var/lib/svn/clamav-devel/trunk/clamav-devel@852 77e5149b-7576-45b1-b177-96237e5ba77b

Nigel Horne authored on 2004/09/13 22:14:34
Showing 2 changed files
... ...
@@ -490,6 +490,7 @@ Changes
490 490
 			information
491 491
 		Added first draft of SESSION code. Do NOT use in a production
492 492
 			environment.
493
+0.75p	13/9/04	Updated SESSION code.
493 494
 
494 495
 INTERNATIONALISATION
495 496
 
... ...
@@ -26,6 +26,9 @@
26 26
  *
27 27
  * Change History:
28 28
  * $Log: clamav-milter.c,v $
29
+ * Revision 1.124  2004/09/13 13:14:34  nigelhorne
30
+ * Updated SESSION Code
31
+ *
29 32
  * Revision 1.123  2004/09/12 14:23:47  nigelhorne
30 33
  * Added SESSION Code
31 34
  *
... ...
@@ -380,9 +383,9 @@
380 380
  * Revision 1.6  2003/09/28 16:37:23  nigelhorne
381 381
  * Added -f flag use MaxThreads if --max-children not set
382 382
  */
383
-static	char	const	rcsid[] = "$Id: clamav-milter.c,v 1.123 2004/09/12 14:23:47 nigelhorne Exp $";
383
+static	char	const	rcsid[] = "$Id: clamav-milter.c,v 1.124 2004/09/13 13:14:34 nigelhorne Exp $";
384 384
 
385
-#define	CM_VERSION	"0.75o"
385
+#define	CM_VERSION	"0.75p"
386 386
 
387 387
 /*#define	CONFDIR	"/usr/local/etc"*/
388 388
 
... ...
@@ -465,12 +468,31 @@ int	deny_severity = LOG_NOTICE;
465 465
 typedef	unsigned short	in_port_t;
466 466
 #endif
467 467
 
468
-/* Do not define SESSION - it puts clamd/acceptloop_th() into a loop */
468
+/*
469
+ * Do not define SESSION in a production environment - it has been known to put
470
+ * clamd/ into a loop and sending STREAM often returns EPIPE
471
+ *
472
+ * It is however OK for testing: code is now in place to reopen as session
473
+ * that has gone bad, and it would be useful to find out the set of
474
+ * circumstances that causes clamd to loop, unless it's the SESSION close
475
+ * without END bug
476
+ */
469 477
 /*#define	SESSION	/*
470 478
 		 * Keep one command connection open to clamd, otherwise a new
471 479
 		 * command connection is created for each new email
472 480
 		 */
473 481
 
482
+#ifdef	SESSION
483
+#define	WATCHDOG_SECONDS	300	/*
484
+					 * How often (in seconds) to try to
485
+					 * fix broken clamd sessions.
486
+					 * We may try more often than this
487
+					 * e.g. when we're idle or all
488
+					 * connections are down, so you can
489
+					 * put this figure quite high
490
+					 */
491
+#endif
492
+
474 493
 /*
475 494
  * TODO: optional: xmessage on console when virus stopped (SNMP would be real nice!)
476 495
  *	Having said that, with LogSysLog you can (on Linux) configure the system
... ...
@@ -550,7 +572,11 @@ struct	privdata {
550 550
 	int	serverNumber;	/* Index into serverIPs */
551 551
 };
552 552
 
553
+#ifdef	SESSION
554
+static	int		createSession(int session);
555
+#else
553 556
 static	int		pingServer(int serverNumber);
557
+#endif
554 558
 static	int		findServer(void);
555 559
 static	sfsistat	clamfi_connect(SMFICTX *ctx, char *hostname, _SOCK_ADDR *hostaddr);
556 560
 static	sfsistat	clamfi_envfrom(SMFICTX *ctx, char **argv);
... ...
@@ -577,6 +603,10 @@ static	int	qfile(struct privdata *privdata, const char *virusname);
577 577
 static	void	setsubject(SMFICTX *ctx, const char *virusname);
578 578
 static	int	clamfi_gethostbyname(const char *hostname, struct hostent *hp, char *buf, size_t len);
579 579
 static	int	isLocalAddr(in_addr_t addr);
580
+static	void	clamdIsDown(void);
581
+#ifdef	SESSION
582
+static	void	*watchdog(void *a);
583
+#endif
580 584
 
581 585
 static	char	clamav_version[128];
582 586
 static	int	fflag = 0;	/* force a scan, whatever */
... ...
@@ -678,10 +708,19 @@ static	char	*port = NULL;	/* sendmail->milter comms */
678 678
 
679 679
 static	const	char	*serverHostNames = "127.0.0.1";
680 680
 static	long	*serverIPs;	/* IPv4 only */
681
+static	int	numServers;	/* number of elements in serverIPs/cmdSockets */
682
+
681 683
 #ifdef	SESSION
682 684
 static	int	*cmdSockets;
685
+static	int	*cmdSocketsStatus;
686
+static	pthread_mutex_t sstatus_mutex = PTHREAD_MUTEX_INITIALIZER;
687
+#define	CMDSOCKET_FREE	0
688
+#define	CMDSOCKET_INUSE	1
689
+#define	CMDSOCKET_DOWN	2
690
+
691
+static	pthread_cond_t	watchdog_cond = PTHREAD_COND_INITIALIZER;
692
+
683 693
 #endif	/*SESSION*/
684
-static	int	numServers;	/* number of elements in serverIPs/cmdSockets */
685 694
 
686 695
 static	const	char	*postmaster = "postmaster";
687 696
 static	const	char	*from = "MAILER-DAEMON";
... ...
@@ -747,6 +786,9 @@ main(int argc, char **argv)
747 747
 	struct cfgstruct *cpt;
748 748
 	struct passwd *user;
749 749
 	const char *pidfile = NULL;
750
+#ifdef	SESSION
751
+	pthread_t tid;
752
+#endif
750 753
 	struct smfiDesc smfilter = {
751 754
 		"ClamAv", /* filter name */
752 755
 		SMFI_VERSION,	/* version code -- leave untouched */
... ...
@@ -1136,6 +1178,10 @@ main(int argc, char **argv)
1136 1136
 	 * Get the outgoing socket details - the way to talk to clamd
1137 1137
 	 */
1138 1138
 	if((cpt = cfgopt(copt, "LocalSocket")) != NULL) {
1139
+#ifdef	SESSION
1140
+		struct sockaddr_un server;
1141
+#endif
1142
+
1139 1143
 		if(cfgopt(copt, "TCPSocket") != NULL) {
1140 1144
 			fprintf(stderr, _("%s: You can select one server type only (local/TCP) in %s\n"),
1141 1145
 				argv[0], cfgfile);
... ...
@@ -1145,6 +1191,7 @@ main(int argc, char **argv)
1145 1145
 		 * TODO: check --server hasn't been set
1146 1146
 		 */
1147 1147
 		localSocket = cpt->strarg;
1148
+#ifndef	SESSION
1148 1149
 		if(!pingServer(-1)) {
1149 1150
 			fprintf(stderr, _("Can't talk to clamd server via %s\n"),
1150 1151
 				localSocket);
... ...
@@ -1152,6 +1199,7 @@ main(int argc, char **argv)
1152 1152
 				cfgfile);
1153 1153
 			return EX_CONFIG;
1154 1154
 		}
1155
+#endif
1155 1156
 		/*if(quarantine_dir == NULL)
1156 1157
 			fprintf(stderr, _("When using Localsocket in %s\nyou may improve performance if you use the --quarantine-dir option\n"), cfgfile);*/
1157 1158
 
... ...
@@ -1159,6 +1207,33 @@ main(int argc, char **argv)
1159 1159
 
1160 1160
 		serverIPs = (long *)cli_malloc(sizeof(long));
1161 1161
 		serverIPs[0] = inet_addr("127.0.0.1");
1162
+
1163
+#ifdef	SESSION
1164
+		memset((char *)&server, 0, sizeof(struct sockaddr_un));
1165
+		server.sun_family = AF_UNIX;
1166
+		strncpy(server.sun_path, localSocket, sizeof(server.sun_path));
1167
+
1168
+		cmdSockets = (int *)cli_malloc(sizeof(int));
1169
+		if((cmdSockets[0] = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
1170
+			perror(localSocket);
1171
+			fprintf(stderr, _("Can't talk to clamd server via %s\n"),
1172
+				localSocket);
1173
+			fprintf(stderr, _("Check your entry for LocalSocket in %s\n"),
1174
+				cfgfile);
1175
+			return EX_CONFIG;
1176
+		}
1177
+		if(connect(cmdSockets[0], (struct sockaddr *)&server, sizeof(struct sockaddr_un)) < 0) {
1178
+			perror(localSocket);
1179
+			return EX_UNAVAILABLE;
1180
+		}
1181
+		if(send(cmdSockets[0], "SESSION\n", 7, 0) < 7) {
1182
+			perror("send");
1183
+			if(use_syslog)
1184
+				syslog(LOG_ERR, _("Can't create a clamd session"));
1185
+			return EX_UNAVAILABLE;
1186
+		}
1187
+#endif
1188
+		numServers = 1;
1162 1189
 	} else if((cpt = cfgopt(copt, "TCPSocket")) != NULL) {
1163 1190
 		int activeServers;
1164 1191
 
... ...
@@ -1188,6 +1263,16 @@ main(int argc, char **argv)
1188 1188
 		serverIPs = (long *)cli_malloc(numServers * sizeof(long));
1189 1189
 		activeServers = 0;
1190 1190
 
1191
+#ifdef	SESSION
1192
+		/*
1193
+		 * We need to know how many connections to establish to clamd
1194
+		 */
1195
+		if(max_children == 0) {
1196
+			fprintf(stderr, _("%s: Sessions does not multiplex\n"), argv[0]);
1197
+			return EX_CONFIG;
1198
+		}
1199
+#endif
1200
+
1191 1201
 		for(i = 0; i < numServers; i++) {
1192 1202
 			char *hostname = cli_strtok(serverHostNames, i, ":");
1193 1203
 
... ...
@@ -1207,20 +1292,31 @@ main(int argc, char **argv)
1207 1207
 				memcpy((char *)&serverIPs[i], h->h_addr, sizeof(serverIPs[i]));
1208 1208
 			}
1209 1209
 
1210
+#ifndef	SESSION
1210 1211
 			if(pingServer(i))
1211 1212
 				activeServers++;
1212 1213
 			else {
1213 1214
 				cli_warnmsg(_("Can't talk to clamd server %s on port %d\n"),
1214 1215
 					hostname, tcpSocket);
1215 1216
 			}
1217
+#endif
1216 1218
 			free(hostname);
1217 1219
 		}
1220
+#ifdef	SESSION
1221
+		activeServers = numServers;
1222
+		cmdSockets = (int *)cli_malloc(max_children * sizeof(int));
1223
+		cmdSocketsStatus = (int *)cli_calloc(max_children, sizeof(int));
1224
+		for(i = 0; i < max_children; i++)
1225
+			if(createSession(i) < 0)
1226
+				return EX_UNAVAILABLE;
1227
+#else
1218 1228
 		if(activeServers == 0) {
1219 1229
 			cli_errmsg(_("Can't find any clamd servers\n"));
1220 1230
 			cli_errmsg(_("Check your entry for TCPSocket in %s\n"),
1221 1231
 				cfgfile);
1222 1232
 			return EX_CONFIG;
1223 1233
 		}
1234
+#endif
1224 1235
 	} else {
1225 1236
 		fprintf(stderr, _("%s: You must select server type (local/TCP) in %s\n"),
1226 1237
 			argv[0], cfgfile);
... ...
@@ -1228,70 +1324,7 @@ main(int argc, char **argv)
1228 1228
 	}
1229 1229
 
1230 1230
 #ifdef	SESSION
1231
-	if(localSocket) {
1232
-		struct sockaddr_un server;
1233
-
1234
-		memset((char *)&server, 0, sizeof(struct sockaddr_un));
1235
-		server.sun_family = AF_UNIX;
1236
-		strncpy(server.sun_path, localSocket, sizeof(server.sun_path));
1237
-
1238
-		cmdSockets = (int *)cli_malloc(sizeof(int));
1239
-		if((cmdSockets[0] = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
1240
-			perror(localSocket);
1241
-			return EX_UNAVAILABLE;
1242
-		}
1243
-		if(connect(cmdSockets[0], (struct sockaddr *)&server, sizeof(struct sockaddr_un)) < 0) {
1244
-			perror(localSocket);
1245
-			return EX_UNAVAILABLE;
1246
-		}
1247
-		if(send(cmdSockets[0], "SESSION\n", 7, 0) < 7) {
1248
-			perror("send");
1249
-			if(use_syslog)
1250
-				syslog(LOG_ERR, _("Can't create a clamd session"));
1251
-			return EX_UNAVAILABLE;
1252
-		}
1253
-	} else {
1254
-		/*
1255
-		 * FIXME: Sessions code doesn't allow more than one datastream
1256
-		 * at a time to a server
1257
-		 */
1258
-		if(max_children > numServers) {
1259
-			fprintf(stderr, _("%s: Sessions does not multiplex\n"), argv[0]);
1260
-			return EX_CONFIG;
1261
-		}
1262
-
1263
-		cmdSockets = (int *)cli_malloc(numServers * sizeof(int));
1264
-
1265
-		assert(serverIPs != NULL);
1266
-
1267
-		for(i = 0; i < numServers; i++) {
1268
-			struct sockaddr_in server;
1269
-
1270
-			memset((char *)&server, 0, sizeof(struct sockaddr_in));
1271
-			server.sin_family = AF_INET;
1272
-			server.sin_port = (in_port_t)htons(tcpSocket);
1273
-
1274
-			server.sin_addr.s_addr = serverIPs[i];
1275
-
1276
-			if((cmdSockets[i] = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
1277
-				perror("socket");
1278
-				return EX_UNAVAILABLE;
1279
-			}
1280
-			if(connect(cmdSockets[i], (struct sockaddr *)&server, sizeof(struct sockaddr_in)) < 0) {
1281
-				perror("connect");
1282
-				return EX_UNAVAILABLE;
1283
-			}
1284
-			if(send(cmdSockets[i], "SESSION\n", 7, 0) < 7) {
1285
-				char *hostname = cli_strtok(serverHostNames, i, ":");
1286
-				perror("send");
1287
-				cli_warnmsg(_("Check clamd server %s - it may be down\n"), hostname);
1288
-				free(hostname);
1289
-
1290
-				close(cmdSockets[i]);
1291
-				cmdSockets[i] = -1;
1292
-			}
1293
-		}
1294
-	}
1231
+	pthread_create(&tid, NULL, watchdog, NULL);
1295 1232
 #endif
1296 1233
 
1297 1234
 	if(!cfgopt(copt, "Foreground")) {
... ...
@@ -1412,6 +1445,47 @@ main(int argc, char **argv)
1412 1412
 	return smfi_main();
1413 1413
 }
1414 1414
 
1415
+#ifdef	SESSION
1416
+/*
1417
+ * Use the SESSION command of clamd.
1418
+ * Returns -1 for terminal failure, 0 for OK, 1 for nonterminal failure
1419
+ * The caller must take care of locking the cmdSocketStatus array
1420
+ */
1421
+static int
1422
+createSession(int session)
1423
+{
1424
+	struct sockaddr_in server;
1425
+	const int serverNumber = session % numServers;
1426
+
1427
+	memset((char *)&server, 0, sizeof(struct sockaddr_in));
1428
+	server.sin_family = AF_INET;
1429
+	server.sin_port = (in_port_t)htons(tcpSocket);
1430
+
1431
+	server.sin_addr.s_addr = serverIPs[serverNumber];
1432
+
1433
+	if((cmdSockets[session] = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
1434
+		perror("socket");
1435
+		return -1;
1436
+	}
1437
+	if(connect(cmdSockets[session], (struct sockaddr *)&server, sizeof(struct sockaddr_in)) < 0) {
1438
+		perror("connect");
1439
+		return -1;
1440
+	}
1441
+	if(send(cmdSockets[session], "SESSION\n", 7, 0) < 7) {
1442
+		char *hostname = cli_strtok(serverHostNames, serverNumber, ":");
1443
+		perror("send");
1444
+		cli_warnmsg(_("Check clamd server %s - it may be down\n"), hostname);
1445
+		free(hostname);
1446
+
1447
+		cmdSocketsStatus[session] = CMDSOCKET_DOWN;
1448
+		return 1;
1449
+	}
1450
+	cli_dbgmsg("cmdSockets[%d] = %d\n", session, cmdSockets[session]);
1451
+	return 0;
1452
+}
1453
+
1454
+#else
1455
+
1415 1456
 /*
1416 1457
  * Verify that the server is where we think it is
1417 1458
  * Returns true or false
... ...
@@ -1513,6 +1587,7 @@ pingServer(int serverNumber)
1513 1513
 
1514 1514
 	return 1;
1515 1515
 }
1516
+#endif
1516 1517
 
1517 1518
 /*
1518 1519
  * Find the best server to connect to. No intelligence to this.
... ...
@@ -1524,6 +1599,47 @@ pingServer(int serverNumber)
1524 1524
  * If the load balancing fails return the first server in the list, not
1525 1525
  * an error, to be on the safe side
1526 1526
  */
1527
+#ifdef	SESSION
1528
+static int
1529
+findServer(void)
1530
+{
1531
+	int i;
1532
+
1533
+	/*
1534
+	 * FIXME: Sessions code isn't flexible at handling servers
1535
+	 *	appearing and disappearing, e.g. cmdSockets[n_children] == -1
1536
+	 */
1537
+	pthread_mutex_lock(&n_children_mutex);
1538
+	assert(n_children > 0);
1539
+	assert(n_children <= max_children);
1540
+	i = n_children - 1;
1541
+	pthread_mutex_unlock(&n_children_mutex);
1542
+
1543
+	pthread_mutex_lock(&sstatus_mutex);
1544
+	for(; i < max_children; i++)
1545
+		if(cmdSocketsStatus[i] == CMDSOCKET_FREE) {
1546
+			cmdSocketsStatus[i] = CMDSOCKET_INUSE;
1547
+			pthread_mutex_unlock(&sstatus_mutex);
1548
+			return i;
1549
+		}
1550
+	pthread_mutex_unlock(&sstatus_mutex);
1551
+
1552
+	pthread_cond_signal(&watchdog_cond);
1553
+
1554
+	pthread_mutex_lock(&sstatus_mutex);
1555
+	for(; i < max_children; i++)
1556
+		if(cmdSocketsStatus[i] == CMDSOCKET_FREE) {
1557
+			cmdSocketsStatus[i] = CMDSOCKET_INUSE;
1558
+			pthread_mutex_unlock(&sstatus_mutex);
1559
+			return i;
1560
+		}
1561
+	pthread_mutex_unlock(&sstatus_mutex);
1562
+
1563
+	cli_warnmsg(_("No free clamd sessions\n"));
1564
+
1565
+	return -1;	/* none available - must fail */
1566
+}
1567
+#else
1527 1568
 static int
1528 1569
 findServer(void)
1529 1570
 {
... ...
@@ -1544,9 +1660,16 @@ findServer(void)
1544 1544
 
1545 1545
 	FD_ZERO(&rfds);
1546 1546
 
1547
-	if(max_children > 0)
1548
-		j = n_children - 1;	/* Don't worry about no lock */
1549
-	else
1547
+	if(max_children > 0) {
1548
+		assert(n_children > 0);
1549
+		assert(n_children <= max_children);
1550
+
1551
+		/*
1552
+		 * Don't worry about no lock - it's doesn't matter if it's
1553
+		 * not really accurate
1554
+		 */
1555
+		j = n_children - 1;
1556
+	} else
1550 1557
 		/*
1551 1558
 		 * cli_rndnum returns 0..(max-1) - the max argument is not
1552 1559
 		 * the maximum number you want it to return, it is in fact
... ...
@@ -1557,16 +1680,6 @@ findServer(void)
1557 1557
 	for(i = 0, server = servers; i < numServers; i++, server++) {
1558 1558
 		int sock;
1559 1559
 
1560
-#ifdef	SESSION
1561
-		/*
1562
-		 * FIXME: Sessions code isn't flexible at handling servers
1563
-		 *	appearing and disappearing
1564
-		 * FIXME: ensure we don't try scanning with a server that's
1565
-		 *	already scanning
1566
-		 */
1567
-		if(cmdSockets[i] == -1)
1568
-			continue;
1569
-#endif
1570 1560
 		server->sin_family = AF_INET;
1571 1561
 		server->sin_port = (in_port_t)htons(tcpSocket);
1572 1562
 		server->sin_addr.s_addr = serverIPs[(i + j) % numServers];
... ...
@@ -1575,6 +1688,7 @@ findServer(void)
1575 1575
 			(i + j) % numServers);
1576 1576
 
1577 1577
 		sock = socks[i] = socket(AF_INET, SOCK_STREAM, 0);
1578
+
1578 1579
 		if(sock < 0) {
1579 1580
 			perror("socket");
1580 1581
 			do
... ...
@@ -1625,56 +1739,8 @@ findServer(void)
1625 1625
 			close(socks[i]);
1626 1626
 
1627 1627
 	if(retval == 0) {
1628
-		static time_t lasttime;
1629
-		time_t thistime, diff;
1630
-		static pthread_mutex_t time_mutex = PTHREAD_MUTEX_INITIALIZER;
1631
-
1632
-		/*
1633
-		 * This is serious, we need to inform someone.
1634
-		 * In the absence of SNMP the best way is by e-mail. We
1635
-		 * don't want to flood so there's a need to restrict to
1636
-		 * no more than say one message every 15 minutes
1637
-		 */
1638 1628
 		free(socks);
1639
-		cli_dbgmsg(_("findServer: No response from any server\n"));
1640
-		if(use_syslog)
1641
-			syslog(LOG_WARNING, _("findServer: No response from any server"));
1642
-
1643
-		time(&thistime);
1644
-		pthread_mutex_lock(&time_mutex);
1645
-		diff = thistime - lasttime;
1646
-		pthread_mutex_unlock(&time_mutex);
1647
-
1648
-		if(diff >= (time_t)(15 * 60)) {
1649
-			char cmd[128];
1650
-			FILE *sendmail;
1651
-
1652
-			snprintf(cmd, sizeof(cmd) - 1, "%s -t", SENDMAIL_BIN);
1653
-
1654
-			sendmail = popen(cmd, "w");
1655
-
1656
-			if(sendmail) {
1657
-				fprintf(sendmail, "To: %s\n", postmaster);
1658
-				fprintf(sendmail, "From: %s\n", postmaster);
1659
-				fputs(_("Subject: ClamAV Down\n"), sendmail);
1660
-				fputs("Priority: High\n\n", sendmail);
1661
-
1662
-				fputs(_("This is an automatic message\n\n"), sendmail);
1663
-
1664
-				if(numServers == 1)
1665
-					fputs(_("The clamd program cannot be contacted.\n"), sendmail);
1666
-				else
1667
-					fputs(_("No clamd server can be contacted.\n"), sendmail);
1668
-
1669
-				fputs(_("Emails may not be being scanned, please check your servers.\n"), sendmail);
1670
-
1671
-				if(pclose(sendmail) == 0) {
1672
-					pthread_mutex_lock(&time_mutex);
1673
-					time(&lasttime);
1674
-					pthread_mutex_unlock(&time_mutex);
1675
-				}
1676
-			}
1677
-		}
1629
+		clamdIsDown();
1678 1630
 		return 0;
1679 1631
 	} else if(retval < 0) {
1680 1632
 		free(socks);
... ...
@@ -1698,6 +1764,7 @@ findServer(void)
1698 1698
 		syslog(LOG_WARNING, _("findServer: No response from any server"));
1699 1699
 	return 0;
1700 1700
 }
1701
+#endif
1701 1702
 
1702 1703
 /*
1703 1704
  * Sendmail wants to establish a connection to us
... ...
@@ -1826,7 +1893,7 @@ clamfi_connect(SMFICTX *ctx, char *hostname, _SOCK_ADDR *hostaddr)
1826 1826
 #ifdef	CL_DEBUG
1827 1827
 		if(use_syslog)
1828 1828
 			syslog(LOG_DEBUG, _("clamfi_connect: not scanning local messages"));
1829
-		cli_dbgmsg(_("clamfi_connect: not scanning outgoing messages\n"));
1829
+		cli_dbgmsg(_("clamfi_connect: not scanning local messages\n"));
1830 1830
 #endif
1831 1831
 		return SMFIS_ACCEPT;
1832 1832
 	}
... ...
@@ -1859,11 +1926,16 @@ clamfi_envfrom(SMFICTX *ctx, char **argv)
1859 1859
 			struct timespec timeout;
1860 1860
 			struct timezone tz;
1861 1861
 
1862
+			cli_dbgmsg((dont_wait) ?
1863
+					_("hit max-children limit (%u >= %u)\n") :
1864
+					_("hit max-children limit (%u >= %u): waiting for some to exit\n"),
1865
+				n_children, max_children);
1866
+
1862 1867
 			if(use_syslog)
1863 1868
 				syslog(LOG_NOTICE,
1864
-					((dont_wait) ?
1869
+					(dont_wait) ?
1865 1870
 						_("hit max-children limit (%u >= %u)") :
1866
-						_("hit max-children limit (%u >= %u): waiting for some to exit")),
1871
+						_("hit max-children limit (%u >= %u): waiting for some to exit"),
1867 1872
 					n_children, max_children);
1868 1873
 
1869 1874
 			if(dont_wait) {
... ...
@@ -2240,7 +2312,12 @@ clamfi_eom(SMFICTX *ctx)
2240 2240
 		return cl_error;
2241 2241
 	}
2242 2242
 
2243
-#ifndef	SESSION
2243
+#ifdef	SESSION
2244
+	pthread_mutex_lock(&sstatus_mutex);
2245
+	if(cmdSocketsStatus[privdata->serverNumber] == CMDSOCKET_INUSE)
2246
+		cmdSocketsStatus[privdata->serverNumber] = CMDSOCKET_FREE;
2247
+	pthread_mutex_unlock(&sstatus_mutex);
2248
+#else
2244 2249
 	close(privdata->cmdSocket);
2245 2250
 	privdata->cmdSocket = -1;
2246 2251
 #endif
... ...
@@ -2404,7 +2481,7 @@ clamfi_eom(SMFICTX *ctx)
2404 2404
 			/* Include the sendmail queue ID in the log */
2405 2405
 			syslog(LOG_NOTICE, "%s: %s %s", sendmailId, mess, err);
2406 2406
 #ifdef	CL_DEBUG
2407
-			cli_dbgmsg("%s\n", err);
2407
+			cli_dbgmsg("%s", err);
2408 2408
 #endif
2409 2409
 			free(err);
2410 2410
 		}
... ...
@@ -2640,6 +2717,7 @@ clamfi_cleanup(SMFICTX *ctx)
2640 2640
 static void
2641 2641
 clamfi_free(struct privdata *privdata)
2642 2642
 {
2643
+	cli_dbgmsg("clamfi_free\n");
2643 2644
 	if(privdata) {
2644 2645
 		if(privdata->body)
2645 2646
 			free(privdata->body);
... ...
@@ -2693,15 +2771,23 @@ clamfi_free(struct privdata *privdata)
2693 2693
 		}
2694 2694
 
2695 2695
 #ifdef	SESSION
2696
-		if(readTimeout && (cmdSockets[privdata->serverNumber] >= 0)) for(;;) {
2697
-			char buf[64];
2696
+		pthread_mutex_lock(&sstatus_mutex);
2697
+		if(cmdSocketsStatus[privdata->serverNumber] == CMDSOCKET_INUSE) {
2698
+			pthread_mutex_unlock(&sstatus_mutex);
2699
+			if(readTimeout) {
2700
+				char buf[64];
2701
+				const int fd = cmdSockets[privdata->serverNumber];
2698 2702
 
2699
-			cli_dbgmsg("clamfi_free: Flush cmd server %d (fd %d)\n",
2700
-				privdata->serverNumber, cmdSockets[privdata->serverNumber]);
2703
+				cli_dbgmsg("clamfi_free: flush server %d fd %d\n",
2704
+					privdata->serverNumber, fd);
2701 2705
 
2702
-			while(clamd_recv(cmdSockets[privdata->serverNumber], buf, sizeof(buf)) > 0)
2703
-				puts(buf);
2706
+				while(clamd_recv(fd, buf, sizeof(buf)) > 0)
2707
+					;
2708
+			}
2709
+			pthread_mutex_lock(&sstatus_mutex);
2710
+			cmdSocketsStatus[privdata->serverNumber] = CMDSOCKET_FREE;
2704 2711
 		}
2712
+		pthread_mutex_unlock(&sstatus_mutex);
2705 2713
 #else
2706 2714
 		if(privdata->cmdSocket >= 0) {
2707 2715
 			char buf[64];
... ...
@@ -2730,11 +2816,17 @@ clamfi_free(struct privdata *privdata)
2730 2730
 
2731 2731
 	if(max_children > 0) {
2732 2732
 		pthread_mutex_lock(&n_children_mutex);
2733
+		cli_dbgmsg("clamfi_free: n_children = %d\n", n_children);
2733 2734
 		/*
2734 2735
 		 * Deliberately errs on the side of broadcasting too many times
2735 2736
 		 */
2736
-		if(n_children > 0)
2737
+		if(n_children > 0) {
2737 2738
 			--n_children;
2739
+#ifdef	SESSION
2740
+			if(n_children == 0)
2741
+				pthread_cond_signal(&watchdog_cond);
2742
+#endif
2743
+		}
2738 2744
 #ifdef	CL_DEBUG
2739 2745
 		cli_dbgmsg("pthread_cond_broadcast\n");
2740 2746
 #endif
... ...
@@ -3149,9 +3241,12 @@ connect2clamd(struct privdata *privdata)
3149 3149
 
3150 3150
 #ifdef	SESSION
3151 3151
 		if(send(cmdSockets[freeServer], "STREAM\n", 7, 0) < 7) {
3152
-			cli_dbgmsg("Sending stream to server %d (fd %d)\n",
3153
-				freeServer, cmdSockets[freeServer]);
3154 3152
 			perror("send");
3153
+			pthread_mutex_lock(&sstatus_mutex);
3154
+			cmdSocketsStatus[privdata->serverNumber] = CMDSOCKET_DOWN;
3155
+			pthread_mutex_unlock(&sstatus_mutex);
3156
+			cli_warnmsg("Failed sending stream to server %d (fd %d) errno %d\n",
3157
+				freeServer, cmdSockets[freeServer], errno);
3155 3158
 			if(use_syslog)
3156 3159
 				syslog(LOG_ERR, _("send failed to clamd"));
3157 3160
 			return 0;
... ...
@@ -3270,7 +3365,7 @@ connect2clamd(struct privdata *privdata)
3270 3270
 				return 0;
3271 3271
 	}
3272 3272
 
3273
-	cli_dbgmsg("connect2clamd OK\n");
3273
+	cli_dbgmsg("connect2clamd: serverNumber = %d\n", privdata->serverNumber);
3274 3274
 
3275 3275
 	return 1;
3276 3276
 }
... ...
@@ -3559,3 +3654,141 @@ isLocalAddr(in_addr_t addr)
3559 3559
 
3560 3560
 	return 0;	/* is non-local */
3561 3561
 }
3562
+
3563
+/*
3564
+ * Can't connect to any clamd server. This is serious, we need to inform
3565
+ * someone. In the absence of SNMP the best way is by e-mail. We
3566
+ * don't want to flood so there's a need to restrict to
3567
+ * no more than say one message every 15 minutes
3568
+ */
3569
+static void
3570
+clamdIsDown(void)
3571
+{
3572
+	static time_t lasttime;
3573
+	time_t thistime, diff;
3574
+	static pthread_mutex_t time_mutex = PTHREAD_MUTEX_INITIALIZER;
3575
+
3576
+	cli_errmsg(_("No response from any clamd server - your AV system is not scanning emails\n"));
3577
+
3578
+	if(use_syslog)
3579
+		syslog(LOG_ERR, _("No response from any clamd server - your AV system is not scanning emails"));
3580
+
3581
+	time(&thistime);
3582
+	pthread_mutex_lock(&time_mutex);
3583
+	diff = thistime - lasttime;
3584
+	pthread_mutex_unlock(&time_mutex);
3585
+
3586
+	if(diff >= (time_t)(15 * 60)) {
3587
+		char cmd[128];
3588
+		FILE *sendmail;
3589
+
3590
+		snprintf(cmd, sizeof(cmd) - 1, "%s -t", SENDMAIL_BIN);
3591
+
3592
+		sendmail = popen(cmd, "w");
3593
+
3594
+		if(sendmail) {
3595
+			fprintf(sendmail, "To: %s\n", postmaster);
3596
+			fprintf(sendmail, "From: %s\n", postmaster);
3597
+			fputs(_("Subject: ClamAV Down\n"), sendmail);
3598
+			fputs("Priority: High\n\n", sendmail);
3599
+
3600
+			fputs(_("This is an automatic message\n\n"), sendmail);
3601
+
3602
+			if(numServers == 1)
3603
+				fputs(_("The clamd program cannot be contacted.\n"), sendmail);
3604
+			else
3605
+				fputs(_("No clamd server can be contacted.\n"), sendmail);
3606
+
3607
+			fputs(_("Emails may not be being scanned, please check your servers.\n"), sendmail);
3608
+
3609
+			if(pclose(sendmail) == 0) {
3610
+				pthread_mutex_lock(&time_mutex);
3611
+				time(&lasttime);
3612
+				pthread_mutex_unlock(&time_mutex);
3613
+			}
3614
+		}
3615
+	}
3616
+}
3617
+
3618
+#ifdef	SESSION
3619
+/*
3620
+ * Thread to monitor the links to clamd sessions. Any marked as being in
3621
+ * an error state because of previous I/O errors are restarted, and a heartbeat
3622
+ * is sent the others
3623
+ */
3624
+static void *
3625
+watchdog(void *a)
3626
+{
3627
+	static pthread_mutex_t watchdog_mutex = PTHREAD_MUTEX_INITIALIZER;
3628
+
3629
+	pthread_mutex_lock(&watchdog_mutex);
3630
+	for(;;) {
3631
+		int i;
3632
+		struct timespec ts;
3633
+		struct timeval tp;
3634
+
3635
+		gettimeofday(&tp, NULL);
3636
+		ts.tv_sec = tp.tv_sec + WATCHDOG_SECONDS;
3637
+		ts.tv_nsec = tp.tv_usec * 1000;
3638
+		cli_dbgmsg("watchdog sleeps\n");
3639
+		if(pthread_cond_timedwait(&watchdog_cond, &watchdog_mutex, &ts) == ETIMEDOUT)
3640
+			pthread_mutex_lock(&watchdog_mutex);
3641
+		cli_dbgmsg("watchdog wakes\n");
3642
+
3643
+		pthread_mutex_lock(&sstatus_mutex);
3644
+		for(i = 0; i < max_children; i++) {
3645
+			const int sock = cmdSockets[i];
3646
+
3647
+			/*
3648
+			 * Check all free sessions are still usable
3649
+			 * This could take some time with many free
3650
+			 * sessions to slow remote servers, so only do this
3651
+			 * when the system is quiet (not 100% accurate when
3652
+			 * determining this since n_children isn't locked but
3653
+			 * that doesn't really matter)
3654
+			 */
3655
+			cli_dbgmsg("watchdog: check server %d\n", i);
3656
+			if((n_children == 0) && (cmdSocketsStatus[i] == CMDSOCKET_FREE)) {
3657
+				if(send(sock, "PING\n", 5, 0) == 5) {
3658
+					char buf[6];
3659
+
3660
+					buf[5] = '\0';
3661
+					if(clamd_recv(sock, buf, 5) != 5)
3662
+						cmdSocketsStatus[i] = CMDSOCKET_DOWN;
3663
+					else if(strcmp(buf, "PONG\n") != 0)
3664
+						cmdSocketsStatus[i] = CMDSOCKET_DOWN;
3665
+				} else
3666
+					cmdSocketsStatus[i] = CMDSOCKET_DOWN;
3667
+
3668
+				if(cmdSocketsStatus[i] == CMDSOCKET_DOWN)
3669
+					cli_warnmsg("Session %d has gone down\n", i);
3670
+			}
3671
+			/*
3672
+			 * Reset all all dead sessions
3673
+			 */
3674
+			if(cmdSocketsStatus[i] == CMDSOCKET_DOWN) {
3675
+				/*
3676
+				 * The END command probably won't get through,
3677
+				 * but let's give it a go anyway
3678
+				 */
3679
+				send(sock, "END\n", 4, 0);
3680
+				close(sock);
3681
+
3682
+				cli_dbgmsg("Trying to restart session %d\n", i);
3683
+				if(createSession(i) == 0) {
3684
+					cmdSocketsStatus[i] = CMDSOCKET_FREE;
3685
+					cli_warnmsg("Session %d restarted OK\n", i);
3686
+				}
3687
+			}
3688
+		}
3689
+		for(i = 0; i < max_children; i++)
3690
+			if(cmdSocketsStatus[i] != CMDSOCKET_DOWN)
3691
+				break;
3692
+
3693
+		if(i == max_children)
3694
+			clamdIsDown();
3695
+		pthread_mutex_unlock(&sstatus_mutex);
3696
+	}
3697
+	return NULL;
3698
+}
3699
+#endif