#include "udm_config.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "udm_common.h"
#include "udm_spell.h"
#include "udm_db.h"
#include "udm_hrefs.h"
#include "udm_utils.h"

/* All links are stored in the cache of this structure */
/* before actual INSERT into database                  */

typedef struct href_struct {
	char *href;
	int referrer;
	int hops;
	int stored;
} UDM_HREF;

/* Max URLs in cache: 4K URLs will use about 200K of RAM         */
/* This should be a configurable parameter  but we'll use 4K now */

#define MAXHSIZE	1023*4
#define HSIZE		256	/* Length of buffer increment  */
#define RESORT_HREFS	256	/* Max length of unsorted part */

static	UDM_HREF* Href=NULL;
static	int mhrefs=0; /* Number of memory allocated */
static  int nhrefs=0; /* Real URL number in cacher  */
static  int shrefs=0; /* Length of sorted part      */
static  int dhrefs=0; /* Number of already stored   */

/* Function to sort URLs in alphabetic order */
static int cmphrefs(const void * v1, const void * v2){
	return(strcmp(((UDM_HREF*)v1)->href,((UDM_HREF*)v2)->href));
}


__INDLIB__ int UdmAddHref(UDM_INDEXER *Indexer,char *href,int referrer,int hops,int stored){
int i,l,r,c,res;
	/* Find current URL in sorted part of list */
	l=0;r=shrefs-1;
	while(l<=r){
		c=(l+r)/2;
		if(!(res=strcmp(Href[c].href,href))){
			Href[c].stored|=stored;
			return(0);
		}
		if(res<0)
			l=c+1;
		else
			r=c-1;
	}
	/* Find in unsorted part */
	for(i=shrefs;i<nhrefs;i++){
		if(!strcmp(Href[i].href,href)){
			Href[i].stored|=stored;
			return(0);
		}
	}
	if(nhrefs>=mhrefs){
		if(mhrefs){
			mhrefs+=HSIZE;
			Href=(UDM_HREF *)realloc(Href,mhrefs*sizeof(UDM_HREF));
		}else{
			mhrefs=HSIZE;
			Href=(UDM_HREF *)malloc(mhrefs*sizeof(UDM_HREF));
		}
	}
	Href[nhrefs].href=strdup(href);
	Href[nhrefs].referrer=referrer;
	Href[nhrefs].hops=hops;
	Href[nhrefs].stored=stored;
	nhrefs++;

	/* Sort unsorted part */
	if((nhrefs-shrefs)>RESORT_HREFS){
		qsort(Href,nhrefs,sizeof(UDM_HREF),cmphrefs);
		shrefs=nhrefs;	/* Remember count of sorted URLs  */
		dhrefs=0;	/* Count of stored URLs became 0  */
	}
	return(1);
}
extern __INDLIB__ void UdmFreeHrefs(){
int i;
	for(i=0;i<nhrefs;i++)
		free(Href[i].href);
	nhrefs=mhrefs=shrefs=dhrefs=0;
	UDM_FREE(Href);
}
extern __INDLIB__ int UdmStoreHrefs(UDM_INDEXER * Indexer){
int i;
int added=0;

	for(i=dhrefs;i<nhrefs;i++){
		if(!(Href[i].stored)){
			char * msg_id="";
			char * pos;
			added++;
			/* see if it is a NEWS message */
			if((strchr(Href[i].href,'@'))&&(pos = strrchr(Href[i].href,'/'))){
				msg_id = malloc( strlen(pos) +1);
				strcpy(msg_id, (pos + 1));
			}
			/* now add the url with its message id */
			if(strlen(Href[i].href)<=UDM_URLSIZE){
				UdmAddURL(Indexer,Href[i].href,Href[i].referrer,Href[i].hops,msg_id);
				if(UdmDBErrorCode(Indexer->db))
					return(added);
			}
			/* free msg_id if alloc'd */
			if(strlen(msg_id))
				free(msg_id);
			Href[i].stored=1;
		}
	}
	/* Remember last stored URL num */
	/* Note that it will became 0   */
	/* after next sort in AddUrl    */
	dhrefs=nhrefs;

	/* We should not free URL list with onw database */
	/* to avoid double indexing of the same document */
	/* So, do it if compiled with SQL only           */

#ifndef HAVE_FILES
	if(nhrefs>MAXHSIZE)
		UdmFreeHrefs();
#endif
	return(added);
}
