/*
 * The Spread Toolkit.
 *     
 * The contents of this file are subject to the Spread Open-Source
 * License, Version 1.0 (the ``License''); you may not use
 * this file except in compliance with the License.  You may obtain a
 * copy of the License at:
 *
 * http://www.spread.org/license/
 *
 * or in the file ``license.txt'' found in this distribution.
 *
 * Software distributed under the License is distributed on an AS IS basis, 
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 
 * for the specific language governing rights and limitations under the 
 * License.
 *
 * The Creators of Spread are:
 *  Yair Amir, Michal Miskin-Amir, Jonathan Stanton.
 *
 *  Copyright (C) 1993-2001 Spread Concepts LLC <spread@spreadconcepts.com>
 *
 *  All Rights Reserved.
 *
 * Major Contributor(s):
 * ---------------
 *    Dan Schoenblum   dansch@cnds.jhu.edu - Java Interface Developer.
 *    John Schultz     jschultz@cnds.jhu.edu - contribution to process group membership.
 *    Theo Schlossnagle theos@cnds.jhu.edu - Perl library and Skiplists.
 *
 */

/* group communication manager for spread
 * Author: Bettina Kemme
 * Creation Date: 19-July-2001
 
 * make it one group communication manager
 * first form of failure handling if sites leave the group
 */

//#include "sp.h"

#include <sys/types.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>

#include        <sys/types.h>
#include        <sys/socket.h>
#include 	<pthread.h>
#include "replication/replication.h"
//#include "replication/groupcomm.h"

#include "sp.h"

static	char	User[80];
static  char    Group[80];
static  char    Spread_name[80];
static  service Service_type;

static  char    Private_group[MAX_GROUP_NAME];
static  mailbox Mbox;

static  int     To_exit = 0;

#define MAX_MESSLEN     102400


static  pthread_t	Read_pthread;
static  void    *Read_thread_routine();


/*
*	socket to the replication mgr (blocking)
*/
static int rmgrSock_simple = -1;
static int rmgrSock_to_rmgr_simple = -1;
static int rmgrSock_total = -1;
static int rmgrSock_to_rmgr_total = -1;
static int rmgrSock = -1;

#define INVALID_SOCK (-1)
/*
*	receive and send buffers
*/
static char send_buf[MAX_MESSLEN];
static int  send_msg_length;
static char read_buf[MAX_MESSLEN];
static int  read_msg_length;

/* keep the hostids and the hostnames off all current group members */
static uint32  group_hostids_1[MAX_GROUP_MEMBERS];
static char group_hostnames_1[MAX_GROUP_MEMBERS][MAX_GROUP_NAME];
static uint32  group_hostids_2[MAX_GROUP_MEMBERS];
static char group_hostnames_2[MAX_GROUP_MEMBERS][MAX_GROUP_NAME];
static int  current_group_size =0;
static int  previous_group_size =0;
static uint32  *current_group_hostids;
static uint32  *previous_group_hostids;
static char (*current_group_hostnames)[MAX_GROUP_NAME];
static char (*previous_group_hostnames)[MAX_GROUP_NAME];

static uint32 group_failed_hostids[MAX_GROUP_MEMBERS];


static	void	Read_message();
static  void    Send_message();
static int readData(int sock, int len, char buf[]);
static int writeData(int sock, int len, char buf[]);
static  void	Bye();
int GroupCommMain(int s1, int s2, int s3, int s4);

/* for old code: remove if it works */
int GroupCommMain_basic(int s1, int s2);
int GroupCommMain_total(int s1, int s2);


int GroupCommMain(int s1, int s2, int s3, int s4) 
{
  
 int ret;
   fd_set		active_rsocks;
   fd_set		tmp_rmask;
   int maxSock = -1;

	uint32	myHostId;


#ifdef RMGR_DEBUG
  elog(NOTICE, "GroupComm pid %d", getpid());
#endif
  
  rmgrSock_simple = s1;
  rmgrSock_to_rmgr_simple = s2;
  rmgrSock_total = s3;
  rmgrSock_to_rmgr_total = s4;
  
  current_group_hostids = group_hostids_1;
  current_group_hostnames = group_hostnames_1;
  previous_group_hostids = group_hostids_2;
  previous_group_hostnames = group_hostnames_2;

  
  FD_ZERO(&active_rsocks);
  printf("server port set\n");
  FD_SET(rmgrSock_simple, &active_rsocks);
  if(maxSock < rmgrSock_simple)
    maxSock = rmgrSock_simple;
  FD_SET(rmgrSock_total, &active_rsocks);
  if(maxSock < rmgrSock_total)
    maxSock = rmgrSock_total;
    
  if (getenv ("SPREAD_NAME"))
        strcpy(Spread_name, getenv("SPREAD_NAME"));
  else
        strcpy(Spread_name, "4803@localhost");

  if (getenv ("SPREAD_GROUP"))
        strcpy(Group, getenv("SPREAD_GROUP"));
  else
        strcpy(Group, "replicationGroup");

  myHostId = gethostid();
  sprintf(User, "%x", myHostId);
  elog(NOTICE," hostId = %lu (unsigned long), %ld (signed long) %x (hexa), USER %s", 
  myHostId, myHostId, myHostId, User);
  
 
  ret = SP_connect( Spread_name, User, 0, 1, 
		    &Mbox, Private_group);
	
  if( ret != ACCEPT_SESSION ) 
    {
      SP_error( ret );
      Bye();
    }
    
    elog(NOTICE,"sp_connected %s", Spread_name);

  
  ret = SP_join (Mbox, Group);
   if (ret < 0)
    {
      SP_error(ret);
      Bye();
    }
   elog(NOTICE,"sp_joined %s",Group);


  /* this is the thread which listens continuously to incoming messages 
   * from the spread deamon */ 
  ret = pthread_create( &Read_pthread, NULL, Read_thread_routine, 0 );

 
  //elog(NOTICE,"read thread created");
  
  /* now start listening to the replication manager sockets, to whether
   * whether the replication manager wants to send some messages */
  for(;;)
    {
      /* listen on both the simple and the total socket */
 	  //elog(NOTICE,"I am at the beginning of the for loop");
      memmove((char *) &tmp_rmask, (char *) &active_rsocks, 
	      sizeof(fd_set));
      if(select(maxSock + 1,  &tmp_rmask, (fd_set *) NULL, (fd_set *) NULL,
		(struct timeval *) NULL) < 0)
		{
	  		if (errno == EINTR)
	    		continue;
	  		elog(ERROR, "RmgrMain: select failed (errno=%d)", errno);
	  		return STATUS_ERROR;
		}
		//elog(NOTICE,"got mssg from replicaManager");
		if (rmgrSock_simple != INVALID_SOCK &&
	  		FD_ISSET(rmgrSock_simple, &tmp_rmask))
		{
			//elog(NOTICE,"send reliable mess");
			Service_type = RELIABLE_MESS;
			rmgrSock = rmgrSock_simple;
			Send_message();
		}
		if (rmgrSock_total != INVALID_SOCK &&
			FD_ISSET(rmgrSock_total, &tmp_rmask))
		{
			//elog(NOTICE,"send agreed mess");
			Service_type = AGREED_MESS;
			rmgrSock = rmgrSock_total;
			Send_message();
			//elog(NOTICE,"agree mess sent");
    	}
    }
    return(0);
}

static	
void	*Read_thread_routine()
{
  for(;;)
    {
      //elog(NOTICE,"read thread prepared to read messsage");
      Read_message();
      
    }
  return( 0 );
}

static void Send_message()
{
	int datalen;
	int	ret;
	
      /* read message from replication manager */
      /*
       *	read the message type
       */
    if(readData(rmgrSock, GROUPCOMM_HDR_SIZE, send_buf) <= 0)
	{
	  elog(NOTICE," i read wrong data\n");
	  proc_exit(400);
	}
    datalen = ntohl(*((long *) (send_buf + 12)));
      
    if(datalen != 0)
	{	
	  if(readData(rmgrSock, datalen, send_buf + GROUPCOMM_HDR_SIZE) <= 0)
	    {
	      printf("don't read good data\n");
	      proc_exit(400);
	    }
	}
      send_msg_length = datalen + GROUPCOMM_HDR_SIZE;

	//elog(NOTICE,"before multicast sent");
	ret= SP_multicast( Mbox, Service_type, Group, 
			   1, send_msg_length, send_buf );
			   

	if( ret < 0 ) 
	  {
	    SP_error( ret );
	    Bye();
	  }
}

static	void	Read_message()
{

 char		sender[MAX_GROUP_NAME];
 char		target_groups[MAX_GROUP_MEMBERS][MAX_GROUP_NAME];
 int		num_groups;
 int		service_type;
 int16		mess_type;
 int		endian_mismatch;
 int		ret;
 int		num_bytes;
 
 /* now some variables to capture failure cases */
 int	    i,j;
 int        k=0;
 bool       failed;
 uint32    *h1;
 char   	(*h2)[MAX_GROUP_NAME], *h3;
 uint32 	host1, hostid, txnid, tmp4;
 int datalen;
 
 /* receive the message from the spread deamon */
 read_msg_length = SP_receive( Mbox, &service_type, sender, MAX_GROUP_MEMBERS, 
		   &num_groups, target_groups, 
		   &mess_type, &endian_mismatch, 
		   sizeof(read_buf), read_buf );


 if( ret < 0 ) 
   {
     SP_error( ret );
     Bye();
   }
	//elog(NOTICE,"message received, msg length = %d", read_msg_length);
	
/* forward a regular message to the appropriate total or simple socket to
   replica manager
*/
 if( Is_regular_mess( service_type ) )
   {
 
   	//	elog(NOTICE,"regular message");
     if     (Is_reliable_mess( service_type )) 
       {
      // 		elog(NOTICE,"simple message");
       	  if(writeData(rmgrSock_to_rmgr_simple, read_msg_length, read_buf) <= 0)
       		{
	 			elog(NOTICE,"reliable write data did somehow not work");
	 			Bye();
       		}
       }
     if (Is_agreed_mess (service_type))
     	{
 		//	elog(NOTICE,"agreed message");
 			if(writeData(rmgrSock_to_rmgr_total, read_msg_length, read_buf) <= 0)
       		{
	 			elog(NOTICE,"total write data did somehow not work");
	 			Bye();
       		}
       }
   		
   }


 /* currently, we only forward information if sites have failed */
 /* more stuff should follow */
 if ( Is_membership_mess( service_type ) )
   {
   	if (Is_reg_memb_mess (service_type))
   	{
		/* group change: set current setting to old settings */
		/* take old space for the new settings */
		elog(NOTICE,"received a regular memb message");
		h1 = previous_group_hostids;
		h2 = previous_group_hostnames;
		previous_group_hostids = current_group_hostids;
		previous_group_hostnames = current_group_hostnames;
		current_group_hostids = h1;
		current_group_hostnames = h2;
		previous_group_size = current_group_size;
		current_group_size = num_groups;

		/* get the new memberships right */
		for (i=0;i<num_groups;i++)
		{
			num_bytes = 1;
			current_group_hostids[i] = strtoul(&target_groups[i][num_bytes],&h3,16);
			strcpy(current_group_hostnames[i],h3+1);
			elog(NOTICE, "hostid %u, hostname = %s",
				current_group_hostids[i],current_group_hostnames[i]);
		}
				
		/* figure out the hosts that disappeared */
		for (i=0,k=0;i<previous_group_size;i++)
		{
			failed = TRUE;
			host1 =  previous_group_hostids[i];
			for (j=0;j<current_group_size;j++)
			{
				if (host1 == current_group_hostids[j])
				{
					failed = FALSE;
					elog(NOTICE,"old host %u has not failed", host1);
   					break;
				}
			}
			if (failed == TRUE)
			{
				group_failed_hostids[k] = host1;
				k++;
			}
		}
		
		/* send view change message to replica Manager containing
	       the hostids of all failed nodes  */
	    /* the format is the following: 
	       hostid and txnid are set to 0, so the replica manager will
	       detect that this is a special message
	       the new message type is MSG_SITE_HAS_LEFT_GROUP, more types
	       will probably follow
	       the datalen of the rest of the message is 4 bytes for each failed site
	       then there is a list of uint32 hostids that have failed
	     */
		if (k>0)
		{
			elog(NOTICE,"%d sites have failed",k);
			hostid = 0;
			tmp4 = htonl(hostid);
			memcpy(read_buf, (const char *) &tmp4, 4);
			tmp4 = htonl(txnid);
			memcpy((read_buf+4), (const char *) &tmp4, 4);
			tmp4 = htonl((uint32) MSG_SITE_HAS_LEFT_GROUP);
			memcpy((read_buf+8), (const char *) &tmp4, 4);
			datalen = k * 4;
			tmp4 = htonl(datalen);
			memcpy((read_buf+12), (const char *) &tmp4, 4);
			for (i=0;i<k;i++)
			{
				tmp4 = htonl(group_failed_hostids[i]);
				elog(NOTICE,"inform replica about failed %u",group_failed_hostids[i]);
				memcpy((read_buf+16+i*4), (const char *) &tmp4, 4);
			}
			
			/* write the message to both channels to guarantee virtual
			   synchrony on both simple and total channel */
			if(writeData(rmgrSock_to_rmgr_simple, GROUPCOMM_HDR_SIZE + datalen, read_buf) <= 0)
       		{
	 			elog(NOTICE,"reliable write data did somehow not work");
	 			Bye();
       		}
     	
 		//	elog(NOTICE,"agreed message");
 			if(writeData(rmgrSock_to_rmgr_total, GROUPCOMM_HDR_SIZE + datalen, read_buf) <= 0)
       		{
	 			elog(NOTICE,"total write data did somehow not work");
	 			Bye();
       		}
		}
		/* these are only all the special cases of regular memberships that
		   can occur, maybe we want to use different behavior for
		   different failure cases in the future 
		*/
    	if (Is_caused_disconnect_mess (service_type))
    	{
    		elog(NOTICE,"caused disconnect");
    	}
    	else if (Is_caused_network_mess (service_type))
    	{
    		elog(NOTICE,"caused network");
    	}
    	else if (Is_caused_join_mess(service_type))
    	{
        	elog(NOTICE,"joined");
    	}
    	else if (Is_caused_leave_mess(service_type))
    	{
        	elog(NOTICE,"caused leave");
    	}
    	else
    	{
        	elog(NOTICE,"unknown reg memb mess of type 0x%x", service_type);
    	}
   	}
   	/* transition messages are only of interest if we use SAFE:
   	*  used properly this will guarantee us that even in the case
   	* of failures and network partitions full atomicity in the entire
   	* system: whenever any site commits a transaction all sites that
   	          are up for sufficiently long time in a primary partition will
   	          commit this transaction
   	* we currently use only agreed delivery: here it can happen that
   	* a site commits a transaction locally shortly before it fails or
   	* moves to a minority partition but the write set and or commit
   	* message of this transaction is not delivered at the available sites
   	* and hence, they miss this transaction.
   	*/
   	else if (Is_transition_mess (service_type))
   	{
   		elog(NOTICE,"transition mess");
   	}
   	else if (Is_caused_leave_mess(service_type))
   	{
   		elog(NOTICE,"caused leave 2");
   	}
   	else
   	{
   		elog(NOTICE,"Unknow mess type 0x%x", service_type);
   	}
   }
}


/*
*	readData
*
*	Reads a given number of bytes from a given (blocking) socket
*
*	param:
*		sock - the socket to read from
*		len - the number of bytes to read
*		buf - buffer to read them into
*	return:
*		void
*/

static	int
readData(int sock, int len, char buf[])
{
	int nread = 0; 
	
	for(;;)
	{
		if((nread = read(sock, buf , len)) <= 0)
		{
			if(nread < 0 && (errno == EINTR || errno == EAGAIN))
			{
				continue;
			}
			else
			{
				elog(DEBUG, "pg_transis: closing socket");
				close(sock);
			}
		}
		break;
	}
	return nread;
}

/*
*	writeData
*
*	Writes a given number of bytes to a given (blocking) socket
*
*	param:
*		sock - the socket to write to
*		len - the number of bytes to write
*		buf - buffer containing the data to write
*	return:
*		void
*/
static int
writeData(int sock, int len, char buf[])
{
	int nwritten = 0;
	
	for(;;)
	{
		if((nwritten = write(sock, buf, len)) <= 0)
		{
			if(nwritten < 0 && (errno == EINTR || errno == EAGAIN))
			{
				printf("in the continue loop\n");
				continue;
			}
			else
			{
				printf("in the closing socket thingi \n");
				elog(DEBUG, "pg_transis: closing socket");
				close(sock);
			}
		}
		break;
	}
	return nwritten;
}

static  void	Bye()
{
	To_exit = 1;

	elog(NOTICE,"Bye");

	SP_disconnect( Mbox );
	pthread_join( Read_pthread, NULL );
	exit( 0 );
}


int GroupCommMain_total(int s1, int s2)
{
	elog(NOTICE,"empty total GroupCommMain_total");
	return(0);
}
int GroupCommMain_basic(int s1, int s2)
{
	elog(NOTICE,"empty GroupCommMain_basic");
	return(0);
}
