/*****************************************************************************
 * MySQL Data Store for ENTROPY
 * By Nightblade
 *
 *	$Id: store_mysql.c,v 1.1.1.1 2005/06/21 14:14:43 pullmoll Exp $
 *****************************************************************************/

#include "osd.h"
#include "config.h"
#include "store.h"
#include "file.h"
#include "logger.h"
#if STORE_TYPE == 2  /* squelch mysql.h warnings from gmake .depend */
#include <mysql.h>
#endif

/* Don't use unix socket/named pipe for the database connection */
#define DBSOCK NULL

/*
 * As long as the store drop FIFO is always filled with entries of
 * the same timeout, we do not need to sort the list before commiting
 * the deletes.
 */
#define	STORE_QSORT	0

/* The size of the result of sha1_hexstr - does not include '\0' */
#define HEXSIZE (SHA1SIZE * 2)

/* global variables */
static MYSQL *dbconn = NULL;  /* database connection */
static pid_t dbconnpid = 0;  /* process id that is using this database
								connection */
/* local functions */
static int commit(const sha1_digest_t *sha1, size_t size);
static int store_table_create(void);
static int store_table_drop(void);
static int hex2sha1(sha1_digest_t *sha1, const char *src);
static void optimize_tables(void);
static int real_get(const sha1_digest_t *sha1, void *buff, int *ret_length,
	int do_stamp);
static int scan_db(void);
static int sort_age_dist(const void *p1, const void *p2);
#if STORE_QSORT
static int sort_drop_time(const void *p1, const void *p2);
#endif
static void stamp_key(const sha1_digest_t *sha1);

/*
 * Delete the row with a name that is derived from the sha1 values
 * Important: Caller must set and release lock!!  This is so functions that
 * call commit multiple times can just set one lock at the beginning and one
 * lock at the end of multiple commits instead of doing a single lock in
 * each commit.  (see store_del)
 * Takes sha1 (name), size of the data column
 * Returns 0 on success, -1 on failure
 */
#define	MYSQL_COMMIT_QUERY	"delete from store%02x where sha1='%s'"
#define	MYSQL_COMMIT_SIZE	(sizeof(MYSQL_COMMIT_QUERY) + HEXSIZE)
static int commit(const sha1_digest_t *sha1, size_t size)
{
	char query[MYSQL_COMMIT_SIZE];  /* SQL delete query */
	size_t idx, tableno;
	uint8_t fpr = 0;
	FUN("commit");

	if (sha1 == NULL) {
		LOGS(L_STORE,L_ERROR,("sha1 is NULL\n"));
		return -1;
	}

    /* use SHA1 digest bytes 0 and 1 as an index */
	idx = sha1->digest[0] | ((uint32_t)sha1->digest[1] << 8);

	/* table number derived from idx */
	tableno = idx % g_conf->storedepth;

    /* fingerprint index */
    fpr = keyroute(sha1);

	pm_snprintf(query, MYSQL_COMMIT_SIZE, MYSQL_COMMIT_QUERY,
		tableno, sha1_hexstr(sha1));
	LOGS(L_STORE,L_DEBUGX,("query: %s\n", query));
	if (mysql_query(dbconn, query) != 0) {
		LOGS(L_STORE,L_ERROR,("failed to delete row for key %s: %s\n",
			sha1_hexstr(sha1), mysql_error(dbconn)));
		return -1;
	}
	if (mysql_affected_rows(dbconn) == 1) {
		LOGS(L_STORE,L_DEBUG,("COMMITTED key %s (size %u)\n",
			sha1_hexstr(sha1), (unsigned)size));
	} else if (mysql_affected_rows(dbconn) == 0) {
		LOGS(L_STORE,L_DEBUG,("NOT FOUND key %s (size %u)\n",
			sha1_hexstr(sha1), (unsigned)size));
		return -1;
	} else {
		LOGS(L_STORE,L_ERROR,("unexpected affected rows: %lu\n",
			(unsigned long)mysql_affected_rows(dbconn)));
		return -1;
	}

	/* update statistics */
	store_sub_key(sha1, size);

	return 0;
}

/*
 * Creates the 'storeXX' table(s)
 * Returns 0 on success, -1 on failure
 */
#define	MYSQL_CREATE_QUERY \
	"create table store%02x (" \
	"	sha1 char(%d) binary not null primary key," \
	"	data blob not null," \
	"	size smallint unsigned not null," \
	"	time timestamp" \
	")"
#define	MYSQL_CREATE_SIZE (sizeof(MYSQL_CREATE_QUERY)+6)
static int store_table_create(void)
{
	char query[MYSQL_CREATE_SIZE];
	size_t tableno;
	FUN("store_table_create");

	LOGS(L_STORE,L_NORMAL,("creating store table(s)\n"));
	for (tableno = 0; tableno < g_conf->storedepth; tableno++) {
		pm_snprintf(query, MYSQL_CREATE_SIZE, MYSQL_CREATE_QUERY,
			tableno, HEXSIZE);
		if (mysql_query(dbconn, query) != 0) {
			LOGS(L_STORE,L_ERROR,("'%s' failed: %s\n",
				query, mysql_error(dbconn)));
			return -1;
		}
	}

	return 0;
}

/*
 * Drops (deletes) the 'storeXX' table(s)
 * Returns 0 on success, -1 on failure
 */
#define	MYSQL_DROP_QUERY	"drop table store%02x"
#define	MYSQL_DROP_SIZE		(sizeof(MYSQL_DROP_QUERY))
static int store_table_drop(void)
{
	char query[MYSQL_DROP_SIZE];
	size_t tableno;
	FUN("store_table_drop");


	LOGS(L_STORE,L_NORMAL,("dropping store table(s)\n"));

	for (tableno = 0; tableno < g_conf->storedepth; tableno++) {	
		pm_snprintf(query, MYSQL_DROP_SIZE, MYSQL_DROP_QUERY,
			tableno);
		if (mysql_query(dbconn, query) != 0) {
			/* "Unknown table" - the store was already dropped */
			if (mysql_errno(dbconn) == 1051)
				continue;
			LOGS(L_STORE,L_ERROR,("'%s' failed: %s\n",
				query, mysql_error(dbconn)));
			return -1;
		}
	}

	return 0;
}

/*
 *	hex2sha1()
 *	Convert a 2*SHA1SIZE string of hex digits into the SHA1
 *	field of a chkey_t.
 */
static int hex2sha1(sha1_digest_t *sha1, const char *src)
{
	size_t i;

	memset(sha1, 0, sizeof(*sha1));

	for (i = 0; i < 2 * SHA1SIZE; i++, src++) {
		switch (*src) {
		case '0': case '1': case '2': case '3': case '4':
		case '5': case '6': case '7': case '8': case '9':
			sha1->digest[i/2] |= *src - '0';
			break;
		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
			sha1->digest[i/2] |= *src - 'A' + 10;
			break;
		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
			sha1->digest[i/2] |= *src - 'a' + 10;
			break;
		default:
			errno = EINVAL;
			return -1;
		}
		if (0 == (i % 2)) {
			sha1->digest[i/2] *= 16;
		}
		if ('\0' == *src) {
			break;
		}
	}

	if (2*SHA1SIZE == i) {
		return 0;
	}

	errno = EINVAL;
	return -1;
}

/* Defragment the database table(s) */
#define	MYSQL_OPTIMIZE_QUERY "optimize table store%02x"
#define	MYSQL_OPTIMIZE_SIZE (sizeof(MYSQL_OPTIMIZE_QUERY))
static void optimize_tables(void)
{
	char query[MYSQL_OPTIMIZE_SIZE];
	MYSQL_RES *result;  /* the result of the optimization query */
	size_t tableno;
	FUN("optimize_tables");

	LOGS(L_STORE,L_NORMAL,("optimizing store\n"));
	info("optimizing\b\b\b\b\b\b\b\b\b\b");
	for (tableno = 0; tableno < g_conf->storedepth; tableno++) {
		pm_snprintf(query, MYSQL_OPTIMIZE_SIZE, MYSQL_OPTIMIZE_QUERY,
			tableno);
		if (mysql_query(dbconn, query) != 0) {
			/*
			 * If the table is missing it does not cause an error.  I assume it
		 	 * just returns 0 rows for the `result' below.  That is fine.
			 */
			LOGS(L_STORE,L_ERROR,("'%s' failed: %s\n",
				query, mysql_error(dbconn)));
			return;
		}
		/* we have to get the result to keep the commands from going
		 * out of sync
		 */
		result = mysql_store_result(dbconn); 
		if (result != NULL)
			mysql_free_result(result);

	}
	return;
}

/*
 * Sets the store's current size, and keycounts
 * Returns 0 on success, -1 on failure, -2 if a table was created
 */
#define	MYSQL_SELECT_QUERY	"select sha1,size from store%02x"
#define	MYSQL_SELECT_SIZE	(sizeof(MYSQL_SELECT_QUERY))
static int scan_db(void)
{
	char query[MYSQL_SELECT_SIZE];
	MYSQL_RES *result;  /* the result of a SQL select */
	MYSQL_ROW row;  /* a row in the database */
	unsigned long *length;  /* the length of columns in a key */
	sha1_digest_t sha1;  /* a key name */
	size_t tableno, storecount;
	uint8_t fpr;
	int i, pct, newpct;
	FUN("scan_db");

	STORE_LOCK();
	g_store->currsize = 0;

	for (tableno = 0; tableno < g_conf->storedepth; tableno++) {
		pm_snprintf(query, MYSQL_SELECT_SIZE, MYSQL_SELECT_QUERY,
			tableno);

		if (mysql_query(dbconn, query) != 0) {
			STORE_UNLOCK();
			if (mysql_errno(dbconn) != 1146) {
				/* 1146 == table doesn't exist */
				LOGS(L_STORE,L_ERROR,("'%s' failed: %s\n",
					query, mysql_error(dbconn)));
				return -1;
			}
			if (store_table_create() == 0)
				return -2;
			return -1;
		}

		result = mysql_store_result(dbconn);
		/* don't forget to free result!! */

		if (result == NULL) {
			LOGS(L_STORE,L_ERROR,("failed to store select results: %s\n",
				mysql_error(dbconn)));
			STORE_UNLOCK();
			return -1;
		}

		if (mysql_num_fields(result) != 2) {
			LOGS(L_STORE,L_ERROR,("unexpected number of columns in result: " \
				"%u (expected 2)\n",
				mysql_num_fields(result)));
			STORE_UNLOCK();
			mysql_free_result(result);
			return -1;
		}

		/* number of keys in the store */
		storecount = mysql_num_rows(result);
		g_store->storecount += storecount;

		info("  0%%      \b\b\b\b\b\b\b\b\b\b");  /* erase "optimizing" */
		for (i = 1, pct = -1; (row = mysql_fetch_row(result)) != NULL; i++) {
			newpct = tableno * 100 / g_conf->storedepth +
					i * 100 / g_conf->storedepth / storecount;
			if (pct != newpct) {
				/* only print if we have a new number */
				pct = newpct;
				info("%3d%%\b\b\b\b", pct);
			}

			length = mysql_fetch_lengths(result);
			if (length != NULL) {
				/* extract routing key */
				if (length[0] == HEXSIZE) {
					/* sha1 is the correct size */
					if (hex2sha1(&sha1, row[0]) == -1) {
						LOGS(L_STORE,L_ERROR,("invalid sha1 detected: %s\n",
							row[0]));
					}
				} else {
					/* sha1 is too big or too small */
					LOGS(L_STORE,L_ERROR,("sha1 column for key %s has" \
						" an unexpected length: %lu (expected %d)\n",
						row[0], length[0], HEXSIZE));
					hex2sha1(&sha1, row[0]);
				}
				fpr = keyroute(&sha1);
				g_store->keycount[fpr] += 1;

				/* add the length of the data to the current store size */
				if (atoi(row[1]) > CHUNKSIZE || atoi(row[1]) < 1) {
					LOGS(L_STORE,L_ERROR,("data size for key %s has " \
						"an unexpected length: %d\n",
						row[0], atoi(row[1])));  /* we'll add it anyway */
				}
				g_store->currsize += atoi(row[1]);
			} else {
				LOGS(L_STORE,L_ERROR,("unable to determine column lengths\n"));
			}
		}
		mysql_free_result(result);
	}
	STORE_UNLOCK();

	return 0;
}

/*
 * Return an index for the position of keys sorted by age and/or
 * their distance from our optimal fingerprint.
 */
static int sort_age_dist(const void *p1, const void *p2)
{
	const fileage_t *a1 = (const fileage_t *)p1;
	const fileage_t *a2 = (const fileage_t *)p2;

	int ad1, ad2;
	/* For keys younger than a week, don't consider the distance */
	if (a1->age < 7 * 86400 || a2->age < 7 * 86400) {
		ad1 = a1->age;
		ad2 = a2->age;
	} else {
		/* else weigh the distance as if 255 were approx. 192 days old */
		ad1 = (int)(((uint64_t)a1->dist * 65535 + a1->age) / 65536);
		ad2 = (int)(((uint64_t)a2->dist * 65535 + a2->age) / 65536);
	}
	return ad2 - ad1;
}

#if	STORE_QSORT
/*
 * Return an index for the comparison of the 'time' fields from two
 * drop_key_t entries, i.e. let qsort sort the drop[] array by time.
 */
static int sort_drop_time(const void *p1, const void *p2)
{
	const drop_key_t *d1 = (const drop_key_t *)p1;
	const drop_key_t *d2 = (const drop_key_t *)p2;

	return d1->time - d2->time;
}
#endif

/*
 * Updates the timestamp on the specified key
 * Takes sha1
 */
#define	MYSQL_STAMP_QUERY	"update store%02x set time=now() where sha1='%s'"
#define	MYSQL_STAMP_SIZE	(sizeof(MYSQL_STAMP_QUERY) + HEXSIZE)
static void stamp_key(const sha1_digest_t *sha1)
{
	char query[MYSQL_STAMP_SIZE];  /* SQL update query */
	size_t idx, tableno;
	FUN("stamp_key");

	if (sha1 == NULL) {
		LOGS(L_STORE,L_ERROR,("sha1 is NULL\n"));
		return;
	}

	LOGS(L_STORE,L_DEBUG,("updating timestamp for key %s\n", sha1_hexstr(sha1)));
    /* use SHA1 digest bytes 0 and 1 as an index */
	idx = sha1->digest[0] | ((uint32_t)sha1->digest[1] << 8);

	/* table number is derived from the index */
	tableno = idx % g_conf->storedepth;

	pm_snprintf(query, MYSQL_STAMP_SIZE, MYSQL_STAMP_QUERY,
		tableno, sha1_hexstr(sha1));

	LOGS(L_STORE,L_DEBUGX,("query: %s\n", query));
	if (mysql_query(dbconn, query) != 0) {
		LOGS(L_STORE,L_ERROR,("failed to update timestamp for key %s: %s\n",
			sha1_hexstr(sha1), mysql_error(dbconn)));
	}

	if (mysql_affected_rows(dbconn) != 1) {
		/* Sometimes there will be 0 affected rows.  This is not necessarily an
		 * error!  If the time is for example 20:33:29 and stamp_key is run
		 * twice on the same key name in this 29th second, then the second
		 * stamp_key call will update zero rows.  This is because the time is
		 * exactly the same so no changes were made. */
		if (mysql_affected_rows(dbconn) == 0) {
			LOGS(L_STORE,L_MINOR,("affected rows: 0\n"));
		} else {
			LOGS(L_STORE,L_ERROR,("unexpected affected rows: %lu\n",
				(unsigned long)mysql_affected_rows(dbconn)));
		}
	}

	return;
}

/*
 * Starts the data store - this is called once when Entropy starts
 * Returns 0 on success, -1 on failure
 */
int store(void)
{
	char cwd[MAXPATHLEN];
	int zap = 0;
	size_t size;
	int rc;
	FUN("store");

	/* capture signals */
	set_signal_handler(SIGHUP, store_exit);
	set_signal_handler(SIGINT, store_exit);
	set_signal_handler(SIGPIPE, store_exit);
	set_signal_handler(SIGALRM, store_exit);
	set_signal_handler(SIGTERM, store_exit);

	/* initialise g_store */
	size = sizeof(store_t);
	g_store = (store_t *)scalloc(size, 1);
	if (g_store == NULL) {
		LOGS(L_STORE,L_ERROR,("failed to scalloc() store; size %d (%s)\n",
			size, strerror(errno)));
		return -1;
	}

	/* prepare lock */
	if (osd_sem_init(&g_store->sem, 1, 1) != 0) {
		LOGS(L_STORE,L_ERROR,("osd_sem_init(0x%x,%d,%d) call failed (%s)\n",
			(unsigned)&g_store->sem, 1, 1, strerror(errno)));
		return -1;
	}

	/* open a connection to the database */
	if (store_connect_db() != 0)
		return -1;

	/* check version and write new version */
	/* BEGIN ---- */
	if (NULL == getcwd(cwd, sizeof(cwd))) {
		LOGS(L_STORE,L_NORMAL,("*** getcwd() failed (%s)\n",
			strerror(errno)));
		die(1,"getcwd() call failed\n");
	}

	if (0 != chdir(g_conf->storepath)) {
		if (0 != mkdir(g_conf->storepath, 0700)) {
			LOGS(L_STORE,L_NORMAL,("*** mkdir('%s') failed (%s)\n",
				g_conf->storepath, strerror(errno)));
			die(1,"mkdir('%s') call failed\n", g_conf->storepath);
		}
		if (0 != chdir(g_conf->storepath)) {
			LOGS(L_STORE,L_NORMAL,("*** chdir('%s') failed (%s)\n",
				g_conf->storepath, strerror(errno)));
			die(1,"chdir('%s') call failed\n", g_conf->storepath);
		}
	}

	if (0 != chdir(cwd)) {
		LOGS(L_STORE,L_NORMAL,("*** chdir('%s') failed (%s)\n",
			cwd, strerror(errno)));
		die(1,"chdir('%s') call failed\n", cwd);
	}

	zap = store_get_version();
	if (0 != (rc = store_put_version(zap))) {
		return -1;
	}

	g_store->storesize = g_conf->storesize;  /* set maximum store size */
	if (g_conf->storedepth < 1)
		g_conf->storedepth = 1;
	/* enforce a maximum table size of 1GB by increasing storedepth */
	size = g_conf->storesize / g_conf->mysql_tablesize;
	if (g_conf->storesize % g_conf->mysql_tablesize)
		size++;
	if (size > g_conf->storedepth) {
		LOGS(L_STORE,L_NORMAL,("storedepth forcefully set to %u\n",
			(unsigned)size));
		g_conf->storedepth = size;
	}
	/* recalc the (rounded to mysql_tablesize) store size */
	g_conf->storesize = g_conf->mysql_tablesize * g_conf->storedepth;

	/* zap the store if we need it */
	if (zap != 0) {
		info("drop\b\b\b\b");
		if (0 != store_table_drop()) {
			LOGS(L_STORE,L_ERROR,("couldn't drop store table(s)\n"));
		}
		info("create\b\b\b\b\b\b");
		if (0 != store_table_create()) {
			LOGS(L_STORE,L_ERROR,("couldn't create store table(s)\n"));
			die(1,"couldn't create store table\n");
		}
	} else if (0 != g_conf->mysql_optimize) {
		/* defragment the store */
		optimize_tables();
	}

	rc = scan_db();  /* fill the other g_store variables */

	if (rc < 0) {
		if (rc != -2) {
			/* error */
			LOGS(L_STORE,L_ERROR,("could not scan database (no create)\n"));
			return -1;
		} else {
			/* needed to create the store table first */
			rc = scan_db();
			if (rc < 0) {
				LOGS(L_STORE,L_ERROR,("could not scan database (tried create)\n"));
				return -1;
			}
		}
	}

	/* trim some fat off the store if it is too big */
	if (0 == zap) {
		info("purge\b\b\b\b\b");
		store_purge();
	}

	store_upd_fingerprint();
	LOGS(L_STORE,L_NORMAL,("Opened %uMB [%uMB] store with %d keys\n",
		(unsigned)(g_store->currsize / 1024 / 1024),
		(unsigned)(g_store->storesize / 1024 / 1024),
		g_store->storecount));
	info("%uMB [%uMB], %d keys ",
		(unsigned)(g_store->currsize / 1024 / 1024),
		(unsigned)(g_store->storesize / 1024 / 1024),
		g_store->storecount);

	store_disconnect_db();

	return 0;
}

/*
 * Check if all of 'count' keys specified in '*sha1' that are marked (non-zero
 * values) in flags[] are available in the store
 * Returns 0 if they are all there, or -1 if one or more are missing
 */
int store_check(const sha1_digest_t *sha1, int flags, size_t count, size_t required)
{
	size_t i, have = 0;
	FUN("store_check");

	if (sha1 == NULL) {
		LOGS(L_STORE,L_ERROR,("sha1 is NULL\n"));
		errno = EINVAL;
		return -1;
	}

	if (store_connect_db() != 0)
		return -1;

	for (i = 0; i < count; i++) {
		if ((flags & (1 << i)) == 0) {
			have++;
		} else if (0 == real_get(&sha1[i], NULL, NULL, 0)) {
			have++;
		}
	}
	if (have < required) {
		/* at least one is missing */
		errno = ENOENT;
		return -1;
	}
	return 0;
}

/*
 * Connects to the database
 * Returns 0 on a good connection, -1 on failure
 */
int store_connect_db(void)
{
	FUN("connect_db");

	/* sanity check */
	if (dbconn != NULL && dbconnpid != getpid()) {
		/* connection wasn't closed prior to fork! */
		LOGS(L_STORE,L_ERROR,("database connection open (for %d) and I'm not him\n",
			(int)dbconnpid));
		errno = EPERM;
		return -1;
	} else if (dbconn != NULL) {
		/* database connection already open */
		return 0;
	}

	dbconn = mysql_init(NULL);
	if (dbconn == NULL) {
		LOGS(L_STORE,L_ERROR,("mysql_init failed (out of memory)\n"));
		errno = ENOMEM;
		return -1;
	}
	while (mysql_real_connect(dbconn, g_conf->mysql_host, g_conf->mysql_user,
		g_conf->mysql_pass, g_conf->mysql_db, g_conf->mysql_port, DBSOCK, 0) ==
		NULL) {
		LOGS(L_STORE,L_ERROR,("failed to connect to database (will retry): %s\n",
			mysql_error(dbconn)));
		sleep(3);
	}
	dbconnpid = getpid();

	LOGS(L_STORE,L_MINOR,("new database connection opened\n"));
	return 0;
}

/*
 * Enter a key into the list of pending deletes (drop key list).  If the list
 * is full, delete the earliest key now, sort the list by time (in case we
 * would use varying timeouts) and make room for one new entry.  Afterwards,
 * commit all pending deletes that expired.
 * Returns 0 on success, -1 on failure
 */
int store_del(const sha1_digest_t *sha1)
{
	int size;
	size_t i, j, idx;
	drop_key_t *d;
	time_t t0;
	FUN("store_del");

	if (sha1 == NULL) {
		LOGS(L_STORE,L_ERROR,("sha1 is NULL\n"));
		errno = EINVAL;
		return -1;
	}

	if (store_connect_db() != 0)
		return -1;

	STORE_LOCK();

	/* first check if this delete is already pending */
	for (i = g_store->tail; i != g_store->head; i = (i + 1) % DROP_MAX) {
		d = &g_store->drop[i];
		if (memcmp(&d->sha1, sha1, SHA1SIZE) == 0) {
			STORE_UNLOCK();
			return 0;
		}
	}

	/* use SHA1 digest bytes 0 and 1 as an index */
	idx = sha1->digest[0] | ((uint32_t)sha1->digest[1] << 8);

	STORE_UNLOCK();  /* we have to unlock so real_get can acquire the lock */
	if (-1 == real_get(sha1, NULL, &size, 0)) {
		LOGS(L_STORE,L_MINOR,("key %s not found\n", sha1_hexstr(sha1)));
		/* This key is missing now */
		STORE_LOCK();
		g_store->missing[idx] = *sha1;
		STORE_UNLOCK();
		errno = ENOENT;
		return -1;
	}
	STORE_LOCK();

	/* if we cannot simply add another drop key entry */
	if ((g_store->head + 1) % DROP_MAX == g_store->tail) {
		LOGS(L_STORE,L_MINOR,("list is full: committing %d pending deletes\n",
			DROP_MAX/8));
#if	STORE_QSORT
		/* sort by drop time -- earliest first */
		qsort(g_store->drop, DROP_MAX, sizeof(drop_key_t), sort_drop_time);
		g_store->tail = 0;
		g_store->head = DROP_MAX - 1;
#endif
		/* now drop the earliest DROP_MAX/8 keys */
		for (i = g_store->tail, j = 0; j < DROP_MAX/8; j++) {
			d = &g_store->drop[i];
			LOGS(L_STORE,L_DEBUG,("committing key %s delete due %s\n",
				sha1_hexstr(&d->sha1),
				datetime_str(d->time)));
			if (0 != commit(&d->sha1, d->size)) {
				LOGS(L_STORE,L_DEBUG,("committing failed (%s)\n",
					strerror(errno)));
			}
			i = (i + 1) % DROP_MAX;
		}

		/* new tail after committing 'j' deletes */
		g_store->tail = i;

		LOGS(L_STORE,L_MINOR,("removed %d entries; list has %d entries now\n",
			j,
			g_store->head >= g_store->tail ?
				g_store->head - g_store->tail :
				g_store->head + DROP_MAX - g_store->tail));
	}

	t0 = time(NULL);

	/* enter this key in the head slot */
	d = &g_store->drop[g_store->head];
	g_store->head = (g_store->head + 1) % DROP_MAX;
	d->sha1 = *sha1;
	d->time = t0 + 180;	/* nuke in 180 seconds = 3 minutes from now */
	d->size = size;
	LOGS(L_STORE,L_DEBUG,("appended key drop %s (%d bytes) at %s\n",
		sha1_hexstr(&d->sha1),
		d->size, datetime_str(d->time)));

	/* now commit keys which expired their timeout */
	for (i = g_store->tail, j = 0; i != g_store->head; j++) {
		d = &g_store->drop[i];
		if (d->time > t0) {
			break;
		}
		LOGS(L_STORE,L_DEBUG,("committing key %s delete due %s\n",
			sha1_hexstr(&d->sha1),
			datetime_str(d->time)));
		if (0 != commit(&d->sha1, d->size)) {
			LOGS(L_STORE,L_DEBUG,("committing failed (%s)\n",
				strerror(errno)));
		}
		i = (i + 1) % DROP_MAX;
	}

	/* committed some keys? */
	if (j > 0) {
		g_store->tail = i;
		LOGS(L_STORE,L_MINOR,("removed %d entries; list has %d entries now\n",
			j,
			g_store->head >= g_store->tail ?
				g_store->head - g_store->tail :
				g_store->head + DROP_MAX - g_store->tail));
	}
	STORE_UNLOCK();

	return 0;
}

/* Closes the connection to the database */
void store_disconnect_db(void)
{
	FUN("disconnect_db");

	mysql_close(dbconn);
	dbconn = NULL;
/*	dbconnpid = 0;  unnecessary */
	LOGS(L_STORE,L_MINOR,("database connection closed\n"));

	return;
}

/* Shuts down the data store and closes the database connection */
void store_exit(int sig)
{
	FUN("store_exit");

	signal(sig, SIG_DFL);
	LOGS(L_STORE,L_MINOR,("*** {%d} signal %s ***\n", (int)getpid(), signal_name(sig)));
	store_disconnect_db();
	osd_exit(sig);

	return;
}

/*
 * Gets a key of size CHUNKSIZE from the database
 * If buff is NULL then only check if the key exists, and do not fill the buffer
 * Takes sha1 (name), buffer to fill
 * Returns 0 on success/existence, -1 if it doesn't exist or is not accessible
 */
int store_get(const sha1_digest_t *sha1, void *buff)
{
	if (store_connect_db() != 0)
		return -1;

	return real_get(sha1, buff, NULL, 1);
}

/*
 * Same as store_get but adds two options:
 * ret_length = if non-NULL this will receive the data length
 * do_stamp = if set to 1, this will update the key's timestamp
 * Returns -1 on error, 0 on success
 */
#define	MYSQL_SELECT_SHA1_QUERY "select sha1 from store%02x where sha1='%s'"
#define	MYSQL_SELECT_DATA_QUERY "select data from store%02x where sha1='%s'"
#define	MYSQL_SELECT_XXX_SIZE (sizeof(MYSQL_SELECT_DATA_QUERY) + HEXSIZE)

static int real_get(const sha1_digest_t *sha1, void *buff, int *ret_length,
	int do_stamp)
{
	uint8_t *bytes = (uint8_t *)buff;
	char query[MYSQL_SELECT_XXX_SIZE];  /* SQL select query */
	size_t idx, tableno;
	MYSQL_RES *result;  /* holds the results of the select query */
	MYSQL_ROW row;  /* a row from the store table */
	unsigned long *length;  /* the length of a column */
	unsigned long tmplen;
	FUN("real_get");

	if (sha1 == NULL) {
		LOGS(L_STORE,L_ERROR,("sha1 is NULL\n"));
		errno = EINVAL;
		return -1;
	}

	/* use SHA1 digest bytes 0 and 1 as an index */
	idx = sha1->digest[0] | ((uint32_t)sha1->digest[1] << 8);

	/* quick check for recently missing keys */
	if (memcmp(&g_store->missing[idx], sha1, SHA1SIZE) == 0) {
		LOGS(L_STORE,L_DEBUG,("NOT FOUND known missing key #%04x %s\n",
			idx, sha1_hexstr(sha1)));
		errno = ENOENT;
		return -1;
	}

	/* if this is an existance check, first look up the available[] array */
	if (NULL == buff && NULL == ret_length && 0 == do_stamp &&
		0 == memcmp(&g_store->available[idx], sha1, SHA1SIZE)) {
		LOGS(L_STORE,L_DEBUG,("available key #%04x %s\n",
			idx, sha1_hexstr(sha1)));
		return 0;
	}

	/* table number is derived from idx */
	tableno = idx % g_conf->storedepth;

	/* get it from the database */
	if (NULL != buff || (NULL == buff && NULL != ret_length)) {
		/* caller wants the data */
		/* ... or if they want the length, we still have to get the data */
		LOGS(L_STORE,L_DEBUG,("getting key %s from database\n",
			sha1_hexstr(sha1)));
		pm_snprintf(query, MYSQL_SELECT_XXX_SIZE, MYSQL_SELECT_DATA_QUERY,
			tableno, sha1_hexstr(sha1));
	} else {
		/* caller is just checking for existence */
		LOGS(L_STORE,L_DEBUG,("looking for key %s in database\n",
			sha1_hexstr(sha1)));
		/* selecting sha1 is more efficient than selecting data */
		pm_snprintf(query, MYSQL_SELECT_XXX_SIZE, MYSQL_SELECT_SHA1_QUERY,
			tableno, sha1_hexstr(sha1));
	}
	LOGS(L_STORE,L_DEBUGX,("query: %s\n", query));
	if (mysql_query(dbconn, query) != 0) {
		LOGS(L_STORE,L_ERROR,("'%s' failed: %s\n",
			query, mysql_error(dbconn)));
		errno = 0;
		return -1;
	}
	result = mysql_store_result(dbconn);  /* don't forget to free result!! */
	if (result == NULL) {
		LOGS(L_STORE,L_ERROR,("failed to store select results for key %s: %s\n",
			sha1_hexstr(sha1), mysql_error(dbconn)));
		errno = ENOMEM;
		return -1;
	}
	/* figure out what to do based on the number of rows returned */
	if (mysql_num_rows(result) == 0) {
		/* the key is not in the database */
		LOGS(L_STORE,L_DEBUG,("NOT FOUND key %s in database\n",
			sha1_hexstr(sha1)));
		STORE_LOCK();
		/* store this key as recently missing */
		g_store->missing[idx] = *sha1;
		/* remove this key from the recently available keys */
		g_store->available[idx] = null;
		STORE_UNLOCK();
		mysql_free_result(result);
		errno = ENOENT;
		return -1;
	} else if (mysql_num_rows(result) > 1) {
		/* more than one key has the same name
		 * this is impossible because sha1 is the primary key */
		LOGS(L_STORE,L_ERROR,("there is more than one key" \
			" in the database named %s\n",
			sha1_hexstr(sha1)));
		mysql_free_result(result);
		errno = ERANGE;
		return -1;
	} else if (mysql_num_rows(result) != 1) {
		/* the number of rows returned is negative?! */
		LOGS(L_STORE,L_ERROR,("unexpected number of rows for key %s: %lu\n",
			sha1_hexstr(sha1), (unsigned long)mysql_num_rows(result)));
		mysql_free_result(result);
		errno = ERANGE;
		return -1;
	}
	/* 1 row returned */
	row = mysql_fetch_row(result);
	if (row == NULL) {
		/* this is impossible because mysql_num_rows has returned 1 */
		LOGS(L_STORE,L_ERROR,("mysql_fetch_row failed for key %s\n",
			sha1_hexstr(sha1)));
		mysql_free_result(result);
		errno = 0;
		return -1;
	}
	length = mysql_fetch_lengths(result);
	if (length == NULL) {
		LOGS(L_STORE,L_ERROR,("unable to determine length of data" \
			" column for key %s\n",
			sha1_hexstr(sha1)));
		mysql_free_result(result);
		errno = 0;
		return -1;
	} else if (NULL != buff || (NULL == buff && NULL != ret_length)) {
		/* selected data column */
		if (*length > CHUNKSIZE) {
			LOGS(L_STORE,L_ERROR,("data column for key %s has" \
				" an unexpected length: %lu\n",
				sha1_hexstr(sha1), *length));
			mysql_free_result(result);
			errno = ERANGE;
			return -1;
		}
	} else {
		/* selected sha1 column */
		if (*length != HEXSIZE) {
			LOGS(L_STORE,L_ERROR,("sha1 column for key %s has" \
				" an unexpected length: %lu\n",
				sha1_hexstr(sha1), *length));
			mysql_free_result(result);
			errno = ERANGE;
			return -1;
		}
	}
	STORE_LOCK();
	/* store this key as available */
	g_store->missing[idx] = null;
	g_store->available[idx] = *sha1;
	STORE_UNLOCK();

	if (buff == NULL) {
		/* the key is in the database and no data is wanted by the caller */
		LOGS(L_STORE,L_DEBUG,("FOUND key %s in database\n",
			sha1_hexstr(sha1)));
		tmplen = *length;
		mysql_free_result(result);

		/* update timestamp */
		if (do_stamp)
			stamp_key(sha1);

		if (NULL != ret_length)
			*ret_length = tmplen;
		return 0;
	}
	memcpy(buff, *row, *length);
	LOGS(L_STORE,L_DEBUG,("FOUND key %s in database (size %lu)\n",
		sha1_hexstr(sha1), *length));
	if (*length < CHUNKSIZE) {
		/* pad trailing zeroes */
		memset(&bytes[*length], 0, CHUNKSIZE - *length);
	}
	tmplen = *length;
	mysql_free_result(result);

	/* update timestamp */
	if (do_stamp)
		stamp_key(sha1);

	if (NULL != ret_length)
		*ret_length = tmplen;
	return 0;
}

/*
 * When the store is getting full we have to get rid of some
 * keys to make room for new ones
 */
#define	MYSQL_PURGE_QUERY "select sha1,size,unix_timestamp(time) from store%02x"
#define	MYSQL_PURGE_SIZE (sizeof(MYSQL_PURGE_QUERY))
int store_purge(void)
{
	char query[MYSQL_PURGE_SIZE];
	time_t t0;
	fileage_t *list;
	size_t i, tableno, count, limit, deletes;
	MYSQL_RES *result;  /* the result of a SQL select */
	MYSQL_ROW row;  /* a row in the database */
	unsigned long *length;  /* the length of columns in a key */
	uint8_t fpr;
	uint64_t cur_watermark, low_watermark;
	FUN("store_purge");

	if (g_store->currsize < g_store->storesize) {
		LOGS(L_STORE,L_NORMAL,("*** nothing to do: currsize %uMB < storesize %uMB\n",
			(unsigned)(g_store->currsize / 1024 / 1024),
			(unsigned)(g_store->storesize / 1024 / 1024)));
		return 0;
	}

	if (store_connect_db() != 0)
		return -1;

	LOGS(L_STORE,L_NORMAL,("*** purging store: currsize %uMB > storesize %uMB\n",
		(unsigned)(g_store->currsize / 1024 / 1024),
		(unsigned)(g_store->storesize / 1024 / 1024)));

	/* base purging on current time */
	t0 = time(NULL);

	/* allocate a list to hold filenames and ages; leave some slack */
	limit = g_store->storecount + 1024;
	list = (fileage_t *)xcalloc(limit, sizeof(fileage_t));
	count = 0;

	for (tableno = 0; tableno < g_conf->storedepth; tableno++) {
		pm_snprintf(query, MYSQL_PURGE_SIZE, MYSQL_PURGE_QUERY,
			tableno);
		if (mysql_query(dbconn, query) != 0) {
			LOGS(L_STORE,L_ERROR,("sql select failed: %s\n", mysql_error(dbconn)));
			STORE_UNLOCK();
			xfree(list);
			errno = 0;
			return -1;
		}
		result = mysql_store_result(dbconn);
		/* don't forget to free result!! */
		if (result == NULL) {
			LOGS(L_STORE,L_ERROR,("failed to store select results: %s\n",
				mysql_error(dbconn)));
			STORE_UNLOCK();
			xfree(list);
			errno = ENOMEM;
			return -1;
		}
		if (mysql_num_fields(result) != 3) {
			LOGS(L_STORE,L_ERROR,("unexpected number of columns in result: %u " \
						 "(expected 3)\n",
				mysql_num_fields(result)));
			STORE_UNLOCK();
			mysql_free_result(result);
			xfree(list);
			errno = 0;
			return -1;
		}
		while ((row = mysql_fetch_row(result)) != NULL) {
			if (count >= limit) {
				/* the list is full... (unlikely) */
				break;
			}
			length = mysql_fetch_lengths(result);
			if (length == NULL) {
				LOGS(L_STORE,L_ERROR,("unable to determine column lengths\n"));
				continue;
			}
			/* add this row to the list */
			if (length[0] == HEXSIZE) {
				/* sha1 is the correct size */
				if (hex2sha1(&list[count].sha1, row[0]) == -1) {
					LOGS(L_STORE,L_ERROR,("invalid sha1 detected: %s\n",
						row[0]));
				}
			} else {
				/* sha1 is too big or too small */
				LOGS(L_STORE,L_ERROR,("sha1 column for key %s has" \
					" an unexpected length: %lu (expected %d)\n",
					row[0], length[0], HEXSIZE));
				hex2sha1(&list[count].sha1, row[0]);
			}

			if (atoi(row[1]) > CHUNKSIZE || atoi(row[1]) < 1) {
				LOGS(L_STORE,L_ERROR,("data size for key %s has" \
					" an unexpected length: %d\n",
					row[0], atoi(row[1])));
			}

			if (atoi(row[2]) == 0) {
				LOGS(L_STORE,L_ERROR,("time column for key %s has" \
					" an unexpected value: 0\n",
					row[0]));
			}

			fpr = keyroute(&list[count].sha1);
			/* distance to current (0 = keep to 255 = trash) */
			list[count].dist = 255 - g_store->fingerprint[fpr];
			list[count].fpr = fpr;
			list[count].age = (t0 > atoi(row[2])) ? t0 - atoi(row[2]) : 0;
			list[count].size = atoi(row[1]);
			LOGS(L_STORE,L_DEBUGX,("Age=%d Dist=%u Size=%u\n",
				(int)list[count].age, list[count].dist, list[count].size));
			count++;
		}
		mysql_free_result(result);
	}

	/* now sort the list by fingerprint distance and age */
	qsort(list, count, sizeof(fileage_t), sort_age_dist);

	/* remove 5% of the keys */
	low_watermark = g_store->storesize * 95 / 100;
	cur_watermark = g_store->currsize;

	/* start marking files to be purged until we are below the watermark */
	for (i = 0, deletes = 0; i < count; i++) {
		fileage_t *fa = &list[i];

		/* mark this file to be purged */
		fa->marked = 1;
		cur_watermark -= fa->size;
		deletes += 1;

		if (cur_watermark < low_watermark)
			break;
	}

	LOGS(L_STORE,L_NORMAL,("*** purging store: %d keys to delete\n", deletes));

	for (i = 0; i < count; i++) {
		fileage_t *fa = &list[i];

		/* Continue if this file is not marked */
		if (0 == fa->marked)
			continue;

		if (0 != store_del(&fa->sha1)) {
			LOGS(L_STORE,L_ERROR,("deleting key %s failed\n",
				sha1_hexstr(&fa->sha1)));
		}

		--deletes;
		/* every 1024 files go to sleep for 10ms */
		if (0 == (deletes % 1024)) {
			LOGS(L_STORE,L_NORMAL,("*** %d to go\n", deletes));
			osd_usleep(10000);
		}
	}
	xfree(list);

	return 0;
}

/*
 * Puts a key of size CHUNKSIZE in the database
 * Takes sha1 (name), buffer of data
 * Returns 0 on success, -1 on failure
 */
#define MYSQL_INSERT_QUERY \
	"insert into store%02x (sha1,data,size) values (\n" \
	"	'%s',\n" \
	"	'%s',\n" \
	"	%d\n" \
	")"
#define MYSQL_INSERT_SIZE \
	(sizeof(MYSQL_INSERT_QUERY) + HEXSIZE + (CHUNKSIZE * 2) + 5)
#define	MYSQL_UPDATE_QUERY \
	"update store%02x set data='%s', size=%d where sha1='%s'"
#define	MYSQL_UPDATE_SIZE \
	(sizeof(MYSQL_UPDATE_QUERY) + (CHUNKSIZE * 2) + 5)
int store_put(const sha1_digest_t *sha1, const void *buff)
{
	const uint8_t *bytes = (const uint8_t *)buff;
	char *query;  /* SQL insert query */
	char *chunk;  /* the chunk of data */
	size_t size;  /* the size of buff */
	size_t idx, tableno, len;
	uint8_t fpr = 0;
	FUN("store_put");

	if (sha1 == NULL || buff == NULL) {
		LOGS(L_STORE,L_ERROR,("sha1 or buff is NULL\n"));
		errno = EINVAL;
		return -1;
	}

	if (store_connect_db() != 0)
		return -1;

    /* use SHA1 digest bytes 0 and 1 as an index */
	idx = sha1->digest[0] | ((uint32_t)sha1->digest[1] << 8);

	/* table number is derived from the index */
	tableno = idx % g_conf->storedepth;

    /* fingerprint index */
    fpr = keyroute(sha1);

    /* strip off the trailing zeroes */
    size = CHUNKSIZE - 1;
    while (size > 0 && bytes[size] == 0)
        size--;
    size++;

	/* escape the weird chars in the chunk */
	chunk = xcalloc(size * 2 + 1, sizeof(char));
	mysql_real_escape_string(dbconn, chunk, buff, size);

	/* assemble the query */
	query = xcalloc(MYSQL_INSERT_SIZE, sizeof(char));
	len = pm_snprintf(query, MYSQL_INSERT_SIZE, MYSQL_INSERT_QUERY,
		tableno, sha1_hexstr(sha1), chunk, size);

	/* save the key to the database */
	LOGS(L_STORE,L_DEBUG,("putting key %s (size %d) in database\n",
		sha1_hexstr(sha1), size));
	LOGS(L_STORE,L_DEBUGX,("query: %s\n", query));
	if (mysql_real_query(dbconn, query, len) != 0) {
		if (mysql_errno(dbconn) == 1062) {
			/* caller tried to insert a key that already exists */
			LOGS(L_STORE,L_MINOR,("duplicate key insert attempt for key %s\n",
				sha1_hexstr(sha1)));
			len = pm_snprintf(query, MYSQL_UPDATE_SIZE, MYSQL_UPDATE_QUERY,
				tableno, chunk, size, sha1_hexstr(sha1));
			if (mysql_real_query(dbconn, query, len) != 0) {
				LOGS(L_STORE,L_ERROR,("failed to update key %s: %s\n",
					sha1_hexstr(sha1), mysql_error(dbconn)));
				xfree(chunk);
				xfree(query);
				return -1;
			}
			xfree(chunk);
			xfree(query);
			return 0;
		}
		LOGS(L_STORE,L_ERROR,("failed to insert row for key %s: %s\n",
			sha1_hexstr(sha1), mysql_error(dbconn)));
		xfree(chunk);
		xfree(query);
		errno = 0;
		return -1;
	}
	xfree(chunk);
	xfree(query);

	/* update statistics */
	STORE_LOCK();
	store_add_key(sha1, size);
	STORE_UNLOCK();

	return 0;
}

/*
 * Immediately zap a key from the store
 * Returns 0 on success, -1 on failure
 */
int store_zap(const sha1_digest_t *sha1)
{
	int size, rc;
	FUN("store_zap");

	LOGS(L_STORE,L_DEBUG,("Zapping key %s\n", sha1_hexstr(sha1)));

	if (sha1 == NULL) {
		LOGS(L_STORE,L_ERROR,("sha1 is NULL\n"));
		errno = EINVAL;
		return -1;
	}

	if (store_connect_db() != 0)
		return -1;

	if (-1 == real_get(sha1, NULL, &size, 0)) {
		LOGS(L_STORE,L_NORMAL,("key %s is already gone\n", sha1_hexstr(sha1)));
		errno = ENOENT;
		return -1;
	}

	STORE_LOCK();
	rc = commit(sha1, size);
	STORE_UNLOCK();

	return rc;
}
