/* ȿˡ¹Ԥ뤿Υ
 * Improved Iterative Scaling 
 *
 * Copyright (C) 2006 HANAOKA Toshiyuki
 *
 * Special Thanks: Google Summer of Code Program 2006
 *
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "iis.h"

struct input_set {
  int max_feature;
  double *feature_weight;
  double *negative_weight;
  /**/
  double z;
  double *lambda;
  double *delta;
  /**/
  double total_weight;
  struct input_line *lines;
};

/* too slow */
static struct input_line *
find_same_line(struct input_set *is, int *features, int nr)
{
  struct input_line *il;
  for (il = is->lines; il; il = il->next_line) {
    int i;
    if (il->nr_features != nr) {
      continue;
    }
    for (i = 0; i < nr; i++) {
      if (il->features[i] != features[i]) {
	break;
      }
    }
    if (i >= nr) {
      return il;
    }
  }
  return NULL;
}

static struct input_line *
add_line(struct input_set *is, int *features, int nr)
{
  int i;
  struct input_line *il;
  il = malloc(sizeof(struct input_line));
  il->nr_features = nr;
  il->features = malloc(sizeof(double) * nr);
  for (i = 0; i < nr; i++) {
    il->features[i] = features[i];
  }
  il->weight = 0;
  il->negative_weight = 0;
  /* link */
  il->next_line = is->lines;
  is->lines = il;
  return il;
}

static void
accumlate_features(struct input_set *is, int *features,
		   int nr, double weight)
{
  int i;
  for (i = 0; i < nr; i++) {
    int f = features[i];
    is->feature_weight[f] += weight;
  }
}

static void
accumlate_negative_features(struct input_set *is, int *features,
			    int nr, double weight)
{
  int i;
  for (i = 0; i < nr; i++) {
    int f = features[i];
    is->negative_weight[f] -= weight;
  }
}

void
iis_set_features(struct input_set *is, int *features,
		 int nr, double weight)
{
  struct input_line *il;
  double abs_weight = fabs(weight);

  if (weight < 0) {
    accumlate_negative_features(is, features, nr, weight);
  } else {
    accumlate_features(is, features, nr, weight);
  }

  /**/
  il = find_same_line(is, features, nr);
  if (!il) {
    il = add_line(is, features, nr);
  }
  /**/
  if (weight > 0) {
    il->weight += weight;
  } else {
    il->negative_weight += abs_weight;
  }
  is->total_weight += abs_weight;
}

void
iis_init_lambda_and_delta(struct input_set *is)
{
  int i;
  is->lambda = malloc(sizeof(double) * is->max_feature);
  is->delta = malloc(sizeof(double) * is->max_feature);
  for (i = 0; i < is->max_feature; i++) {
    is->lambda[i] = 0.1;
    is->delta[i] = 0;
  }
}

static int
has_feature(struct input_line *il, int f)
{
  int i;
  for (i = 0; i < il->nr_features; i++) {
    if (il->features[i] == f) {
      return 1;
    }
  }
  return 0;
}

static double
calc_p(struct input_set *is, struct input_line *il)
{
  double p = 0;
  int i;
  for (i = 0; i < il->nr_features; i++) {
    int f = il->features[i];
    p += is->lambda[f];
  }
  for (i = 0; i < is->max_feature; i++) {
    p -= is->lambda[i] * is->negative_weight[i];
  }
  p = exp(p);
  p = p / is->z;
  return p;
}

static void
calc_z(struct input_set *is)
{
  struct input_line *il;
  double sum = 0;
  is->z = 1;
  for (il = is->lines; il; il = il->next_line) {
    sum += calc_p(is, il);
  }
  is->z = sum;
  /*printf("Z=%f\n", is->z);*/
}

static double
calc_rem(struct input_set *is, int f, double delta)
{
  struct input_line *il;
  double sum = 0;
  double density;

  /*
   * for feature f
   *
   * for all inputs that have feature f
   * \sigma P * exp(delta*number of features in the input)
   *  = density of deature f
   *
   */
  density = (double)is->feature_weight[f] / (double)is->total_weight;

  for (il = is->lines; il; il = il->next_line) {
    if (!has_feature(il, f)) {
      continue;
    }
    sum += calc_p(is, il) * exp(delta * (double)il->nr_features);
  }
  return sum - density;
}

static void
calc_delta(struct input_set *is, int f)
{
  double max_delta = 10, min_delta = -10;
  double delta;
  double rem;

  do {
    delta = (max_delta + min_delta) / 2;
    rem = calc_rem(is, f, delta);
    if (rem > 0) {
      max_delta = delta;
    } else {
      min_delta = delta;
    }
    /*printf("   delta=%f remainder=%f.\n", delta, rem);*/
  } while (fabs(rem) > 0.000001);
  /*printf("-->delta=%f remainder=%f.\n", delta, rem);*/
  is->delta[f] = delta;
}

static double
update(struct input_set *is)
{
  int i;
  double delta = 0;

  calc_z(is);
  /**/
  for (i = 0; i < is->max_feature; i++) {
    is->delta[i] = 0;
    calc_delta(is, i);
  }
  for (i = 0; i < is->max_feature; i++) {
    is->lambda[i] += is->delta[i];
    delta += fabs(is->delta[i]);
  }
  return delta;
}

void
iis_dump(struct input_set *is)
{
  struct input_line *il;
  int i;
  printf("total_weight,%f\n", is->total_weight);
  for (il = is->lines; il; il = il->next_line) {
    double p = calc_p(is, il);
    printf(" nr=%f ", il->weight);
    for (i = 0; i < il->nr_features; i++) {
      printf("%d,", il->features[i]);
    }
    printf(" p=%f (%f)\n", p, il->weight / is->total_weight);
  }
}

void
iis_iterate(struct input_set *is, double rem)
{
  int count = 0;
  double delta;
  do {
    count ++;
    delta = update(is);
    fprintf(stderr, " delta=%f (%d)\n", delta, count);
    if (count > 10000) {
      fprintf(stderr, "give up\n");
      return ;
    }
  } while (fabs(delta) > rem);
}

double
iis_get_z(struct input_set *is)
{
  return is->z;
}

double
iis_get_lambda(struct input_set *is, int nth)
{
  if (nth < is->max_feature) {
    return is->lambda[nth];
  }
  return 0;
}

struct input_set *
iis_create(int nr)
{
  struct input_set *is;
  int i;
  is = malloc(sizeof(struct input_set));
  is->lines = NULL;
  is->total_weight = 0;
  /**/
  is->max_feature = nr;
  is->feature_weight = malloc(sizeof(double) * nr);
  is->negative_weight = malloc(sizeof(double) * nr);
  for (i = 0; i < nr; i++) {
    is->feature_weight[i] = 0;
    is->negative_weight[i] = 0;
  }
  return is;
}

struct input_line *
iis_get_input_line(struct input_set *is)
{
  return is->lines;
}
