#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <ctype.h>

#include "udm_config.h"
#include "udm_common.h"
#include "udm_word.h"
#include "udm_db.h"
#include "udm_log.h"
#include "udm_charset.h"
#include "udm_spell.h"
#include "udm_xmalloc.h"
#include "udm_stopwords.h"
#include "udm_agent.h"

#define RESORT_WORDS	256
#define WSIZE		1024

static int cmpwords(const void * v1, const void * v2){
	return(strcmp(((const UDM_WORD*)v1)->word,((const UDM_WORD*)v2)->word));
}

static int AddOneWord(UDM_AGENT *Indexer,UDM_SERVER *Server,char *word,int where,int checkstop){
	char *s;
	UDM_STOPWORD * stop;
	int l,r,c,res,wlen;

	/* Check MaxWordLen and MinWordLen condition */
	wlen=strlen(word);
	if((wlen>Indexer->Conf->max_word_len)||(wlen<Indexer->Conf->min_word_len))
		return(0);

	s=strdup(word);
	/* Just to be safe */
	if(wlen>UDM_MAXWORDSIZE)s[UDM_MAXWORDSIZE]=0;

	if(checkstop){
		/* Stopwords checking and language guesser */
		if((stop=UdmIsStopWord(Indexer->Conf,s))){
			int curlang;
			UdmLog(Indexer,UDM_LOG_DEBUG,"stop: '%s'-'%s'", stop->word, stop->lang);
			/* Add language */
			curlang = Indexer->curlang;
			UdmSelectLang(Indexer, stop->lang);
			Indexer->lang[Indexer->curlang].count++;
			Indexer->curlang = curlang;
			free(s);
			return(0);
		}
	}

	/* Update current language word count */
	Indexer->lang[Indexer->curlang].count++;

	if(Indexer->Conf->use_phrases){
		/* Add word position */
		where+=(Indexer->wordpos<<16);
	}else{
		size_t i;
		
		/* For non-phrase search check */
		/* that word is already exist  */
		/* Find current word in sorted part of word list */
		l=0;r=Indexer->swords-1;
		while(l<=r){
			c=(l+r)/2;
			res=strcmp(Indexer->Word[c].word,s);
			if(res==0){
				Indexer->Word[c].count|=where;
				Indexer->Word[c].count+=0x10000;
				free(s);
				return(0);
			}
			if(res<0)
				l=c+1;
			else
				r=c-1;
		}

		/* Now find in unsorted part */
		for(i=Indexer->swords;i<Indexer->nwords;i++){
			if(!strcmp(Indexer->Word[i].word,s)){
				Indexer->Word[i].count|=where;
				Indexer->Word[i].count+=0x10000;
				free(s);return(0);
			}
		}
	}
	
	/* Realloc memory when required  */
	if(Indexer->nwords>=Indexer->mwords){
		if(Indexer->mwords){
			Indexer->mwords+=WSIZE;
			Indexer->Word=(UDM_WORD *)UdmXrealloc(Indexer->Word,Indexer->mwords*sizeof(UDM_WORD));
		}else{
			Indexer->mwords=WSIZE;
			Indexer->Word=(UDM_WORD *)UdmXmalloc(Indexer->mwords*sizeof(UDM_WORD));
		}
	}

	/* Add new word */
	Indexer->Word[Indexer->nwords].word=s;
	
	if(!Indexer->Conf->use_phrases){
		Indexer->Word[Indexer->nwords].count=where+0x10000;
	}else{
		Indexer->Word[Indexer->nwords].count=where;
	}
	Indexer->nwords++;

	/* Sort unsorted part if non-phrase index */
	if(!Indexer->Conf->use_phrases){
		if((Indexer->nwords-Indexer->swords)>RESORT_WORDS){
			qsort(Indexer->Word,Indexer->nwords,sizeof(UDM_WORD),cmpwords);
			Indexer->swords=Indexer->nwords;
		}
	}
	return(0);
}

/* This function adds a normalized word form(s) into list using Ispell */
int UdmAddWord(UDM_AGENT *Indexer,UDM_SERVER *Server,char *word,int where,int checkstop){
	char 	** forms, ** saveforms;
	int	have_digit=0;
	int	have_alpha=0;

	Indexer->wordpos++;

	if(Server->number_factor==0||Server->alnum_factor==0){
		char *s;
		s=word;
		while(*s){
			if(isdigit(*s))
				have_digit=1;
			else	
				have_alpha=1;
			if(have_digit&&have_alpha)break;
			s++;
		}
		if(have_digit){
			if(have_alpha){
				if(!Server->alnum_factor)return(0);
			}else{
				if(!Server->number_factor)return(0);
			}
		}
	}
	UdmTolower(word,Indexer->Conf->local_charset);
	if((saveforms=forms=UdmNormalizeWord(Indexer,word))){
		/* Add all NORMAL forms of the word */
		while(*forms){
			/* Add only if correct words are allowed */
			if(Server->correct_factor){
				AddOneWord(Indexer,Server,*forms,where,checkstop);
			}
			free(*forms);
			forms++;
		}
		free(saveforms);
	}else{
		/* If NORMAL forms has not been found  */
		/*   then we will add the word itself  */
		/* Do it only when incorrect words are */
		/* allowed by configuration            */
		if(Server->incorrect_factor)
			AddOneWord(Indexer,Server,word,where,checkstop);
	}
	return(0);
}

int UdmFreeWords(UDM_AGENT* Indexer) {
	size_t i;
	for(i=0;i<Indexer->nwords;i++)
		free(Indexer->Word[i].word);
	Indexer->nwords=0;
	Indexer->swords=0;
	return(0);
}
