/*
 * zoom.c (C) nejik 2003
 */ 

/*
 * References: Paul Heckber's zoom program
 * http://www.cs.cmu.edu/~ph
 *
 * Copyright:
 * Copyright (c) 1989  Paul S. Heckbert
 * This source may be used for peaceful, nonprofit purposes only, unless
 * under licence from the author. This notice should remain in the source.
 *
 */

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <math.h>
#include <limits.h>
#include <inttypes.h>

#ifdef ARCH_X86
#include "attributes.h"
#include "mmx.h"
#include "mm_accel.h"
#endif

#include "video.h"

#include "csp.h"
#include "zoom.h"
#include "util.h"

static ZOOM_PARAM zoom_param = {
  0, 0,       /* csp, xy flag */
  0, 0,       /* src image width, height */
  0, 0,       /* dest image width, height */
  0, 0,       /* src width, height */
  0, 0,       /* dest width, height */
  0, 0,       /* stride */
  0, 0, 0, 0, /* left right top bottom */
  0, 0, 0, 0, /* start pos, end pos */
  0, 0, 0, 0, /* start pos, end pos for uv */
  0., 0.,     /* scale */
  0., 0.,     /* blur */
  0., 0.,     /* supp */
  NULL, NULL, /* x filter name, window name */
  NULL, NULL, /* y filter name, window name */
  0, 0,       /* window_length */
  NULL, NULL, /* wtab */
  0, 0,       /* 2_window_length */
  NULL, NULL, /* 2_wtab */
};

static short *work_buf = NULL;

typedef double (*FILT_FUNC)(double);

typedef struct {        /* data for parameterized Mitchell filter */
    double p0, p2, p3;
    double q0, q1, q2, q3;
} mitchell_data;

typedef struct {        /* data for parameterized Kaiser window */
    double a;           /* = w*(N-1)/2 in Oppenheim&Schafer notation */
    double i0a;
    /*
     * typically 4<a<9
     * param a trades off main lobe width (sharpness)
     * for side lobe amplitude (ringing)
     */
} kaiser_data;

typedef struct {                /* A 1-D FILTER */
    const char *name;           /* name of filter */
    FILT_FUNC func;             /* filter function */
    double supp;                /* radius of nonzero portion */
    char windowme;              /* should filter be windowed? */
    char cardinal;              /* is this filter cardinal?
                                   ie, does func(x) = (x==0) for integer x? */
    char unitrange;             /* does filter stay within the range [0..1] */
} Filt;

static FILT_FUNC filt_func = NULL;

static Filt filter;
static Filt window;

static kaiser_data kd;
static mitchell_data md;

/*--------------- unit-area filters for unit-spaced samples ---------------*/

/* all filters centered on 0 */ 

double filt_box(double x)           /* box, pulse, Fourier window, */
{
    if (x<-.5) return 0.;
    if (x<.5) return 1.;
    return 0.;
}

double filt_triangle(double x)      /* triangle, Bartlett window, */
{
    if (x<-1.) return 0.;
    if (x<0.) return 1.+x;
    if (x<1.) return 1.-x;
    return 0.;
}

double filt_quadratic(double x)     /* 3rd order (quadratic) b-spline */
{
    double t;

    if (x<-1.5) return 0.;
    if (x<-.5) {t = x+1.5; return .5*t*t;}
    if (x<.5) return .75-x*x;
    if (x<1.5) {t = x-1.5; return .5*t*t;}
    return 0.;
}

double filt_cubic(double x)         /* 4th order (cubic) b-spline */
{
    double t;

    if (x<-2.) return 0.;
    if (x<-1.) {t = 2.+x; return t*t*t/6.;}
    if (x<0.) return (4.+x*x*(-6.+x*-3.))/6.;
    if (x<1.) return (4.+x*x*(-6.+x*3.))/6.;
    if (x<2.) {t = 2.-x; return t*t*t/6.;}
    return 0.;
}

double filt_catrom(double x)        /* Catmull-Rom spline, Overhauser spline */
{
    if (x<-2.) return 0.;
    if (x<-1.) return .5*(4.+x*(8.+x*(5.+x)));
    if (x<0.) return .5*(2.+x*x*(-5.+x*-3.));
    if (x<1.) return .5*(2.+x*x*(-5.+x*3.));
    if (x<2.) return .5*(4.+x*(-8.+x*(5.-x)));
    return 0.;
}

double filt_gaussian(double x)      /* Gaussian (infinite) */
{
    return exp(-2.*x*x)*sqrt(2./PI);
}

double filt_sinc(double x)      /* Sinc, perfect lowpass filter (infinite) */
{
//  if (x < EPSILON && x > -EPSILON)
//    x = 0.;
  return x==0. ? 1. : sin(PI*x)/(PI*x);
}

double filt_bessel(double x)    /* Bessel (for circularly symm. 2-d filt, inf)*/
{
    /*
     * See Pratt "Digital Image Processing" p. 97 for Bessel functions
     * zeros are at approx x=1.2197, 2.2331, 3.2383, 4.2411, 5.2428, 6.2439,
     * 7.2448, 8.2454
     */
  return x==0. ? PI/4. : j1(PI*x)/(2.*x);
}

/*-------------------- parameterized filters --------------------*/

double filt_mitchell(double x)      /* Mitchell & Netravali's two-param cubic */
{
    register mitchell_data *m;

    /*
     * see Mitchell&Netravali, "Reconstruction Filters in Computer Graphics",
     * SIGGRAPH 88
     */
    m = &md;
    if (x<-2.) return 0.;
    if (x<-1.) return m->q0-x*(m->q1-x*(m->q2-x*m->q3));
    if (x<0.) return m->p0+x*x*(m->p2-x*m->p3);
    if (x<1.) return m->p0+x*x*(m->p2+x*m->p3);
    if (x<2.) return m->q0+x*(m->q1+x*(m->q2+x*m->q3));
    return 0.;
}

static void mitchell_init(double b, double c, void *data)
{
  mitchell_data *m = data;
    m->p0 = (  6. -  2.*b        ) / 6.;
    m->p2 = (-18. + 12.*b +  6.*c) / 6.;
    m->p3 = ( 12. -  9.*b -  6.*c) / 6.;
    m->q0 = (        8.*b + 24.*c) / 6.;
    m->q1 = (     - 12.*b - 48.*c) / 6.;
    m->q2 = (        6.*b + 30.*c) / 6.;
    m->q3 = (     -     b -  6.*c) / 6.;
}

/*-------------------- window functions --------------------*/

double filt_hanning(double x)       /* Hanning window */
{
    return .5+.5*cos(PI*x);
}

double filt_hamming(double x)       /* Hamming window */
{
    return .54+.46*cos(PI*x);
}

double filt_blackman(double x)      /* Blackman window */
{
    return .42+.50*cos(PI*x)+.08*cos(2.*PI*x);
}

/*-------------------- parameterized windows --------------------*/

double bessel_i0(double x)
{
    /*
     * modified zeroth order Bessel function of the first kind.
     * Are there better ways to compute this than the power series?
     */
    register int i;
    double sum, y, t;

    sum = 1.;
    y = x*x/4.;
    t = y;
    for (i=2; t>EPSILON; i++) {
        sum += t;
        t *= (double)y/(i*i);
    }
    return sum;
}

double filt_kaiser(double x)        /* parameterized Kaiser window */
{
    /* from Oppenheim & Schafer, Hamming */
    kaiser_data *k = &kd;
    return bessel_i0(k->a*sqrt(1.-x*x))*k->i0a;
}

static void kaiser_init(double a, void *data)
{
  kaiser_data *k = data;
  k->a = a;
  k->i0a = 1./bessel_i0(a);
}

/*--------------- filters for non-unit spaced samples ---------------*/

double filt_normal(double x)        /* normal distribution (infinite) */
{
    /*
     * normal distribution: has unit area, but it's not for unit spaced samples
     * Normal(x) = Gaussian(x/2)/2
     */
    return exp(-x*x/2.)/sqrt(2.*PI);
}

double filt_lanczos(double x)
{
  if (x < -3.) return 0.;
  if (x < 0.) return (filt_sinc(-x) * filt_sinc(-x/3.));
  if (x < 3.) return (filt_sinc( x) * filt_sinc( x/3.));
  return 0.;
}

double filt_hermite(double x)
{
  if (x < -1.) return 0.;
  if (x <  0.) return ((2.*(-x)-3.)*(-x)*(-x)+1.);
  if (x <  1.) return ((2.*x-3.)*x*x+1.);
  return 0.;
}

double filt_blackmanbessel(double x)
{
  return (filt_bessel(x) * filt_blackman(x/filter.supp));
}

double filt_blackmansinc(double x)
{
  return (filt_sinc(x) * filt_blackman(x/filter.supp));
}

static Filt filt[] = {
/*  NAME        FUNC            SUPP    WIN CARD UNIT */
   {"lanczos",  filt_lanczos,   3.0,    0,  0,  1},
   {"blackman", filt_blackman,  1.0,    0,  1,  1},
   {"point",    filt_box,       0.0,    0,  1,  1},
   {"box",      filt_box,       0.5,    0,  1,  1},
   {"triangle", filt_triangle,  1.0,    0,  1,  1},
   {"quadratic",filt_quadratic, 1.5,    0,  0,  1},
   {"cubic",    filt_cubic,     2.0,    0,  0,  1},

   {"catrom",   filt_catrom,    2.0,    0,  1,  0},
   {"mitchell", filt_mitchell,  2.0,    0,  0,  0},

   {"gaussian", filt_gaussian,  1.25,   0,  0,  1},
   {"sinc",     filt_sinc,      4.0,    1,  1,  0},
   {"bessel",   filt_bessel,    3.2383, 1,  0,  0},

   {"hanning",  filt_hanning,   1.0,    0,  1,  1},
   {"hamming",  filt_hamming,   1.0,    0,  1,  1},
   {"kaiser",   filt_kaiser,    1.0,    0,  1,  1},

   {"normal",   filt_normal,    1.25,   0,  0,  1},
   {"hermite",  filt_hermite,   1.0,    0,  0,  1},
   {"blackmanbessel",  filt_blackmanbessel,   3.2383,    0,  0,  1},
   {"blackmansinc",  filt_blackmansinc,   4.0,    0,  0,  1},
   { NULL, NULL, 0, 0, 0, 0 }
};

//static int filt_num = sizeof(filt) / sizeof(Filt) - 1;
void
filt_init(void)
{
  mitchell_init(1./3., 1./3., &md);
  kaiser_init(6.5, &kd);
  filter = filt[0];
  window = filt[0];
  filt_func = filter.func;
}

double
filt_window(double x)
{
//  printf("filt %f, win %f, ret %f\n", filter.func(x), window.func(x / filter.supp), filter.func(x) * window.func(x / filter.supp));
  return filter.func(x) * window.func(x / filter.supp);
}

double
filt_get_default_supp(const char *name)
{
  Filt *f = filt;
  while (f->name != NULL) {
    if (strcmp(f->name, name) == 0)
      return f->supp;
    f++;
  }
  return 0.;
}

double
filt_get_supp(void)
{
  return filter.supp;
}

void
filt_set_supp(double supp)
{
  filter.supp = supp;
}

const char *
filt_get_filt_name(void)
{
  return filter.name;
}

const char *
filt_get_window_name(void)
{
  return window.name;
}

const char*
filt_check(const char *name)
{
  int i;

  if (name == NULL)
    return filt[0].name;

  for (i = 0; filt[i].name != NULL; i++) {
    if (strcmp(filt[i].name, name) == 0)
      return filt[i].name;
  }
  return NULL;
}

FILT_FUNC
filt_find(const char *fil_name, const char *win_name)
{
  int i;
  FILT_FUNC func = NULL;

  if (fil_name != NULL) {
    for (i = 0; filt[i].name != NULL; i++) {
      if (strcmp(filt[i].name, fil_name) == 0) {
        filter = filt[i];
        func = filter.func;
        break;
      }
    }
    if (filt[i].name == NULL) {
      fprintf(stderr, "filt_fild: %s filter not found.\n", fil_name);
      return NULL;
    }
  } else {
    filter = filt[0];
    func = filter.func;
  }

  if (win_name != NULL) {
    for (i = 0; filt[i].name != NULL; i++) {
      if (strcmp(filt[i].name, win_name) == 0) {
        window = filt[i];
        func = filt_window;
        break;
      }
    }
    if (filt[i].name == NULL) {
      fprintf(stderr, "filt_find: %s filter window not found.\n", win_name);
      return NULL;
    }
  } else {
    window = filt[1];
  }

  filt_func = func;
  return func;
}

const char*
zoom_set_xfilt(const char *filt_name)
{
  zoom_param.x_filt_name = filt_check(filt_name);
  if (!zoom_param.x_filt_name) {
    fprintf(stderr, "zoom_set_xfilt: %s not found.\n", filt_name);
    return zoom_param.x_filt_name;
  }
  return zoom_param.x_filt_name;
}

const char*
zoom_set_yfilt(const char *filt_name)
{
  zoom_param.y_filt_name = filt_check(filt_name);
  if (!zoom_param.y_filt_name) {
    fprintf(stderr, "zoom_set_xfilt: %s not found.\n", filt_name);
    return zoom_param.y_filt_name;
  }
  return zoom_param.y_filt_name;
}

const char*
zoom_set_xwindow(const char *window_name)
{
  zoom_param.x_window_name = filt_check(window_name);
  if (!zoom_param.x_window_name) {
    fprintf(stderr, "zoom_set_xwindow: %s not found.\n", window_name);
    return zoom_param.x_window_name;
  }
  return zoom_param.x_window_name;
}

const char*
zoom_set_ywindow(const char *window_name)
{
  zoom_param.y_window_name = filt_check(window_name);
  if (!zoom_param.y_window_name) {
    fprintf(stderr, "zoom_set_xwindow: %s not found.\n", window_name);
    return zoom_param.y_window_name;
  }
  return zoom_param.y_window_name;
}

void
zoom_set_xsupp(double supp)
{
  zoom_param.x_supp = supp;
}

void
zoom_set_ysupp(double supp)
{
  zoom_param.y_supp = supp;
}

void
zoom_set_xblur(double blur)
{
  zoom_param.x_blur = blur;
}

void
zoom_set_yblur(double blur)
{
  zoom_param.y_blur = blur;
}

#ifdef ARCH_X86

static int
make_weighttab_mmx(Weighttab *wtab, int dest_pix_num, int src_pix_num, int *src_start, int *src_max, double scale, double blur, double supp, const char *filt_name, const char *window_name, int rgb_flag)
{
  double filt_scale;
  int wid;
  int i;
  int j;
  short *wp;
  FILT_FUNC filtfunc;
  double tx;
  int pix_bytes = 1;
  int min_pos = INT_MAX;
  int max_pos = INT_MIN;
  int src_0 = *src_start;
  int src_len = *src_max;

  if (rgb_flag)
    pix_bytes = 3;

  if (src_len < src_pix_num) {
    fprintf(stderr, "make_weighttab_mmx: src length is too large, src_len %d, src_pix_num %d.\n", src_len, src_pix_num);
  }

  tx = scale * (src_0);

  filtfunc = filt_find(filt_name, window_name);
  filt_scale = (1.0 > 1./scale) ? 1.0 : 1./scale;
  filt_scale = filt_scale * blur;
  supp = filt_scale * supp;
  if (supp < 0.5)
    supp = 0.5;
  filt_set_supp(supp);
  wid = ceil(2.0 * supp);

  wp = (short*) calloc(dest_pix_num, (((wid+3)/4)*4) * sizeof(short));
  if (!wp)
    mem_alloc_fail("make_weighttab", IMM_EXIT);
  for (i = 0; i < dest_pix_num; i++) {
    wtab[i].weights = &wp[i * (((wid+3)/4)*4)];
  }

//  printf("0 %d, len %d, spix %d, dpix %d\n",src_0, src_len, src_pix_num, dest_pix_num);
  for (j = 0; j < dest_pix_num; j++) {
    int i0, i1;
    double den;
    double sc;
    double cen;
    double tr;
    Weighttab *wt = &wtab[j];
    int sum;
    int t;
		int wpos;

    cen = (double)j + 0.5 + tx;
    cen = cen / scale;
    i0 = cen - supp + 0.5;
    i1 = cen + supp + 0.5;
    if (i0 < 0) i0 = 0;
    if (i1 > src_len) i1 = src_len;
    wt->start_pos = i0;
    wt->end_pos = i1;
    wt->window_length = i1 - i0;

    den = 0;
    for (i = i0; i < i1; i++)
      den += filtfunc(((double)i + 0.5 - cen) / filt_scale);
    sc = den==0. ? 1 : 1/den;
    sum = 0;
		if ((i0 + (((wt->window_length+3)/4)*4)) > src_len) {
			int d = (i0 + (((wt->window_length+3)/4)*4)) - src_len;
			wt->start_pos = i0 - d;
			wp = wt->weights + d;
		} else {
			wp = wt->weights;
		}
    for (i = i0, wpos = 0; i < i1; i++, wpos++) {
      tr = filtfunc(((double)i + 0.5 - cen) / filt_scale);
      tr = sc * tr * WEIGHTONE;
      if (tr < SHRT_MIN || tr > SHRT_MAX) {
				fprintf(stderr, "tr=%g at %d\n", tr, j);
				exit(1);
      }
      t = floor(tr+0.5);
      wp[wpos] = t;
      sum = sum + t;
    }
    if (sum == 0) {
      wt->start_pos = wt->start_pos + wt->end_pos / 2;
      wt->end_pos = wt->start_pos+1;
			for (i = i0,wpos = 0; i < i1; i++, wpos++)
				wp[wpos] = 0;
      wp[0] = WEIGHTONE;
    } else {
      if (sum != WEIGHTONE) {
				i = cen+0.5;
				if (i<i0) i = i0; else if (i>=i1) i = i1-1;
				t = WEIGHTONE - sum;
				wp[i-i0] += t;
//	printf("t %f\n", t);
      }
    }
    if (rgb_flag) {
      wt->start_pos *= 3;
      wt->end_pos *= 3;
      wt->window_length *= 3;
    }
    min_pos = (wt->start_pos > min_pos) ? min_pos : wt->start_pos;
    max_pos = (wt->end_pos < max_pos) ? max_pos : wt->end_pos;
  } 
  *src_start = min_pos;
  *src_max = max_pos;

  return wid;
}

static inline int
calc_wid_xy(uint8_t *src, int length, short *w)
{
	int sum;
	pxor_r2r(mm0,mm0);
	pxor_r2r(mm3,mm3);
	for (; length > 0; length -= 4) {
		movd_m2r(*src, mm1);
		movq_m2r(*w, mm2);
		punpcklbw_r2r(mm3, mm1);
		pmaddwd_r2r(mm2, mm1);
		paddd_r2r(mm1, mm0);
		src += 4;
		w += 4;
	}
	movq_r2r(mm0, mm1);
	psrlq_i2r(32, mm1);
	paddd_r2r(mm1, mm0);
	movd_r2m(mm0, sum);
	return sum;
}

static inline int
calc_wid_yx(short *src, int length, short *w)
{
	int sum;
	pxor_r2r(mm0,mm0);
	for (; length > 0; length -= 4) {
		movq_m2r(*src, mm1);
		movq_m2r(*w, mm2);
		pmaddwd_r2r(mm2, mm1);
		paddd_r2r(mm1, mm0);
		src += 4;
		w += 4;
	}
	movq_r2r(mm0, mm1);
	psrlq_i2r(32, mm1);
	paddd_r2r(mm1, mm0);
	movd_r2m(mm0, sum);
	return sum;
}

static inline void
calc_line_yx(int *dst, uint8_t *src, int width, short cof)
{
	int i;
	uint32_t cofs;

	cofs = ((cof&0xffff) << 16) | (cof&0xffff);
	movd_m2r(cofs, mm7);
	movq_r2r(mm7, mm1);
	psllq_i2r(32, mm1);
	por_r2r(mm1, mm7);
	pxor_r2r(mm6, mm6);
	for (i=width; i > 0; i-=4) {
		movd_m2r(*src, mm1);
		punpcklbw_r2r(mm6, mm1);
		movq_r2r(mm1, mm2);

		pmulhw_r2r(mm7, mm1);
		pmullw_r2r(mm7, mm2);
		movq_r2r(mm1, mm3);
		movq_r2r(mm2, mm4);
		punpcklwd_r2r(mm1, mm2);
		punpckhwd_r2r(mm3, mm4);

		movq_m2r(*dst, mm0);
		paddd_r2r(mm2, mm0);
		movq_r2m(mm0, *dst);
		dst += 2;
		movq_m2r(*dst, mm0);
		paddd_r2r(mm4, mm0);
		movq_r2m(mm0, *dst);
		dst += 2;
		src += 4;
	}
}

static inline void
calc_line_xy(int *dst, short *src, int width, short cof)
{
	int i;
	uint32_t cofs;

	cofs = ((cof&0xffff) << 16) | (cof&0xffff);
	movd_m2r(cofs, mm7);
	movq_r2r(mm7, mm1);
	psllq_i2r(32, mm1);
	por_r2r(mm1, mm7);
	for (i=width; i > 0; i-=4) {
		movq_m2r(*src, mm1);
		movq_r2r(mm1, mm2);
		pmulhw_r2r(mm7, mm1);
		pmullw_r2r(mm7, mm2);
		movq_r2r(mm1, mm3);
		movq_r2r(mm2, mm4);
		punpcklwd_r2r(mm1, mm2);
		punpckhwd_r2r(mm3, mm4);

		movq_m2r(*dst, mm0);
		paddd_r2r(mm2, mm0);
		movq_r2m(mm0, *dst);
		dst += 2;
		movq_m2r(*dst, mm0);
		paddd_r2r(mm4, mm0);
		movq_r2m(mm0, *dst);
		dst += 2;
		src += 4;
	}
}

static inline void
calc_line_post_xy(uint8_t *dst, int *src, int width)
{
	int i;
	for (i = width; i > 0; i -= 8) {
		movq_m2r(*src, mm0);
		src += 2;
		movq_m2r(*src, mm1);
		src += 2;
		movq_m2r(*src, mm2);
		src += 2;
		movq_m2r(*src, mm3);
		src += 2;
		psrad_i2r(FINALSHIFT, mm0);
		psrad_i2r(FINALSHIFT, mm1);
		psrad_i2r(FINALSHIFT, mm2);
		psrad_i2r(FINALSHIFT, mm3);
		packssdw_r2r(mm1, mm0);
		packssdw_r2r(mm3, mm2);
		packuswb_r2r(mm2, mm0);

		movq_r2m(mm0, *dst);
		dst += 8;
	}
}

static inline void
calc_line_post_yx(short *dst, int *src, int width)
{
	int i;
	for (i = width; i > 0; i -= 4) {
		movq_m2r(*src, mm0);
		src += 2;
		movq_m2r(*src, mm1);
		src += 2;
		psrad_i2r(CHANBITS, mm0);
		psrad_i2r(CHANBITS, mm1);
		packssdw_r2r(mm1, mm0);
		movq_r2m(mm0, *dst);
		dst += 4;
	}
}

static void
zoom_xy_mmx(unsigned char *dest, unsigned char *src)
{
  int src_image_width = zoom_param.src_image_width;
  int src_image_height = zoom_param.src_image_height;
  int dest_width = zoom_param.dest_width;
  int dest_height = zoom_param.dest_height;
  int src_y0 = zoom_param.start_y;
  int src_y1 = zoom_param.end_y;
  int csp = zoom_param.csp;
  unsigned char *sp, *dp;
  short *isp, *idp;
  int width, height;
  int src_stride, dest_stride;
  int i, x, y, dpos;
  int sum, sum_r, sum_g, sum_b;
  Weighttab *x_wtab = zoom_param.x_wtab;
  Weighttab *y_wtab = zoom_param.y_wtab;
  short *wp;
  int pix_bytes = 1;
  int accum[dest_width * 3];

  if (csp == CSP_RGB24) {
    pix_bytes = 3;
  }

  dest_stride = dest_width * pix_bytes;
  src_stride = src_image_width * pix_bytes;
  width = dest_width;
  height = src_y1 - src_y0;
  idp = &work_buf[src_y0 * dest_stride];
  for (y = src_y0; y < src_y1; y++) {
    sp = &src[y * src_stride];
    if (csp == CSP_RGB24) {
      dpos = 0;
      for (x = 0; x < width; x++) {
        wp = x_wtab[x].weights;
        sum_r = sum_g = sum_b = 0;
        for (i = x_wtab[x].start_pos; i < x_wtab[x].end_pos;) {
				  sum_r += (sp[i++] * *wp);
				  sum_g += (sp[i++] * *wp);
				  sum_b += (sp[i++] * *wp);
				  wp++;
        }
        idp[dpos++] = (sum_r>>CHANBITS);
        idp[dpos++] = (sum_g>>CHANBITS);
        idp[dpos++] = (sum_b>>CHANBITS);
      }
    } else {
      for (x = 0; x < width; x++) {
	sum = calc_wid_xy(&sp[x_wtab[x].start_pos], x_wtab[x].window_length,
		 	  x_wtab[x].weights);
        idp[x] = (sum>>CHANBITS);
      }
    }
    idp += dest_stride;
  }

  height = dest_height;
  width = dest_width * pix_bytes;
  src_stride = dest_width * pix_bytes;
  dest_stride = dest_width * pix_bytes;
  for (y = 0; y < height; y++) {
    dp = &dest[y * dest_stride];
    memset(accum, 0, dest_stride * sizeof(int));
    wp = y_wtab[y].weights;
    for (i = y_wtab[y].start_pos; i < y_wtab[y].end_pos; i++) {
      isp = &work_buf[i * src_stride];
			calc_line_xy(accum, isp, width, *wp);
      wp++;
    }
#if 0
    for (i = 0; i < width; i++) {
      dp[i] = accum[i] >> FINALSHIFT;
    }
#endif
		calc_line_post_xy(dp, accum, width);
  }

  if (csp == CSP_YV12) {
    int sv_offset = src_image_width * src_image_height;
    int su_offset = sv_offset + sv_offset / 4;
    int dv_offset = dest_width * dest_height;
    int du_offset = dv_offset + dv_offset / 4;
    int soffset;
    int doffset;
    int uv;

    y_wtab = zoom_param.y2_wtab;
    x_wtab = zoom_param.x2_wtab;
    src_image_width /= 2;
    src_image_height /= 2;
    dest_height /= 2;
    dest_width /= 2;
    src_y0 = zoom_param.start_y_uv;
    src_y1 = zoom_param.end_y_uv;

    soffset = sv_offset;
    doffset = dv_offset;
    for (uv = 2; uv > 0; uv--) {
      dest_stride = dest_width;
      src_stride = src_image_width;
      width = dest_width;
      height = src_y1 - src_y0;
      idp = &work_buf[src_y0 * dest_stride];
      for (y = src_y0; y < src_y1; y++) {
        sp = &src[soffset + y * src_stride];
        for (x = 0; x < width; x++) {
					sum = calc_wid_xy(&sp[x_wtab[x].start_pos], x_wtab[x].window_length,
						 	x_wtab[x].weights);
          idp[x] = (sum>>CHANBITS);
        }
        idp += dest_stride;
      }

      height = dest_height;
      width = dest_width;
      src_stride = dest_width;
      dest_stride = dest_width;
      for (y = 0; y < height; y++) {
        dp = &dest[doffset + y * dest_stride];
        memset(accum, 0, dest_stride * sizeof(int));
        wp = y_wtab[y].weights;
        for (i = y_wtab[y].start_pos; i < y_wtab[y].end_pos; i++) {
          isp = &work_buf[i * src_stride];
					calc_line_xy(accum, isp, width, *wp);
          wp++;
        }
#if 0
				for (i = 0; i < width; i++) {
					dp[i] = accum[i] >> FINALSHIFT;
				}
#endif
				calc_line_post_xy(dp, accum, width);
      }
      soffset = su_offset;
      doffset = du_offset;
    }
  }
	emms();
}

static void
zoom_yx_mmx(unsigned char *dest, unsigned char *src)
{
  int src_image_width = zoom_param.src_image_width;
  int src_image_height = zoom_param.src_image_height;
  int dest_width = zoom_param.dest_width;
  int dest_height = zoom_param.dest_height;
  int src_x0 = zoom_param.start_x;
  int src_x1 = zoom_param.end_x;
  int csp = zoom_param.csp;
  unsigned char *sp, *dp;
  short *isp, *idp;
  int width, height;
  int src_stride, dest_stride;
  int i, x, y, dpos;
  int sum, sum_r, sum_g, sum_b;
  Weighttab *x_wtab = zoom_param.x_wtab;
  Weighttab *y_wtab = zoom_param.y_wtab;
  short *wp;
  int pix_bytes = 1;
  int accum[(src_x1-src_x0)*3];

  if (csp == CSP_RGB24) {
    pix_bytes = 3;
  }

  dest_stride = src_image_width * pix_bytes;
  src_stride = src_image_width * pix_bytes;
  height = dest_height;
  width = src_x1 - src_x0;
  idp = work_buf;
  for (y = 0; y < height; y++) {
    wp = y_wtab[y].weights;
//    memset(accum, 0, sizeof(int) * width);
    memset(accum, 0, sizeof(int) * width * pix_bytes);
    for (i = y_wtab[y].start_pos; i < y_wtab[y].end_pos; i++) {
      sp = &src[i * src_stride];
      calc_line_yx(accum, &sp[src_x0], width, *wp);
      wp++;
    }
    calc_line_post_yx(&idp[src_x0], accum, width);
#if 0
    for (x = src_x0, i = 0; x < src_x1; x++, i++) {
      idp[x] = accum[i] >> CHANBITS;
    }
#endif
    idp += dest_stride;
  }

  height = dest_height;
  width = dest_width;
  src_stride = src_image_width * pix_bytes;
  dest_stride = dest_width * pix_bytes;
  for (y = 0; y < height; y++) {
    isp = &work_buf[y * src_stride];
    dp = &dest[y * dest_stride];
    dpos = 0;
    for (x = 0; x < width; x++) {
      wp = x_wtab[x].weights;
      if (csp == CSP_RGB24) {
	sum_r = sum_g = sum_b = 0;
        for (i = x_wtab[x].start_pos; i < x_wtab[x].end_pos;) {
          sum_r += isp[i++] * *wp;
          sum_g += isp[i++] * *wp;
          sum_b += isp[i++] * *wp;
				  wp++;
        } 
        sum_r >>= FINALSHIFT;
        sum_g >>= FINALSHIFT;
        sum_b >>= FINALSHIFT;
        dp[dpos++] = (sum_r > 255) ? 255: (sum_r < 0) ? 0 : sum_r;
        dp[dpos++] = (sum_g > 255) ? 255: (sum_g < 0) ? 0 : sum_g;
        dp[dpos++] = (sum_b > 255) ? 255: (sum_b < 0) ? 0 : sum_b;
      } else {
 	sum = calc_wid_yx(&isp[x_wtab[x].start_pos], x_wtab[x].window_length,
					 	x_wtab[x].weights);
        sum >>= FINALSHIFT;
        dp[x] = (sum > 255) ? 255: (sum < 0) ? 0 : sum;
      }
    }
  }

  if (csp == CSP_YV12) {
    int sv_offset = src_image_width * src_image_height;
    int su_offset = sv_offset + sv_offset / 4;
    int dv_offset = dest_width * dest_height;
    int du_offset = dv_offset + dv_offset / 4;
    int soffset;
    int doffset;
    int uv;

    y_wtab = zoom_param.y2_wtab;
    x_wtab = zoom_param.x2_wtab;
    src_image_width /= 2;
    src_image_height /= 2;
    dest_height /= 2;
    dest_width /= 2;
    src_x0 = zoom_param.start_x_uv;
    src_x1 = zoom_param.end_x_uv;

    soffset = sv_offset;
    doffset = dv_offset;
    for (uv = 2; uv > 0; uv--) {
      dest_stride = src_image_width;
      src_stride = src_image_width;
      height = dest_height;
      width = src_x1 - src_x0;
      idp = work_buf;
      for (y = 0; y < height; y++) {
        wp = y_wtab[y].weights;
//        memset(idp, 0, sizeof(short) * dest_stride);
    	memset(accum, 0, sizeof(int) * width * pix_bytes);
        for (i = y_wtab[y].start_pos; i < y_wtab[y].end_pos; i++) {
          sp = &src[soffset + i * src_stride];
	  calc_line_yx(accum, &sp[src_x0], width, *wp);
          wp++;
        }
	calc_line_post_yx(&idp[src_x0], accum, width);
#if 0
   	for (x = src_x0, i = 0; x < src_x1; x++, i++) {
          idp[x] = accum[i] >> CHANBITS;
        }
#endif
        idp += dest_stride;
      }

      height = dest_height;
      width = dest_width;
      src_stride = src_image_width;
      dest_stride = dest_width;
      for (y = 0; y < height; y++) {
        isp = &work_buf[y * src_stride];
        dp = &dest[doffset + y * dest_stride];
        for (x = 0; x < width; x++) {
          wp = x_wtab[x].weights;
	  sum = calc_wid_yx(&isp[x_wtab[x].start_pos], x_wtab[x].window_length,
					 	x_wtab[x].weights);
       	  sum >>= FINALSHIFT;
          dp[x] = (sum > 255) ? 255: (sum < 0) ? 0 : sum;
        }
      }
      soffset = su_offset;
      doffset = du_offset;
    }
  }
	emms();
}

#endif /* ARCH_X86 */

static int
make_weighttab_c(Weighttab *wtab, int dest_pix_num, int src_pix_num, int *src_start, int *src_max, double scale, double blur, double supp, const char *filt_name, const char *window_name, int rgb_flag)
{
  double filt_scale;
  int wid;
  int i;
  int j;
  short *wp;
  FILT_FUNC filtfunc;
  double tx;
  int pix_bytes = 1;
  int min_pos = INT_MAX;
  int max_pos = INT_MIN;
  int src_0 = *src_start;
  int src_len = *src_max;

  if (rgb_flag)
    pix_bytes = 3;

  if (src_len < src_pix_num) {
    fprintf(stderr, "make_weighttab_c: src length is too large, src_len %d, src_pix_num %d.\n", src_len, src_pix_num);
  }

  tx = scale * (src_0);

  filtfunc = filt_find(filt_name, window_name);
  filt_scale = (1.0 > 1./scale) ? 1.0 : 1./scale;
  filt_scale = filt_scale * blur;
  supp = filt_scale * supp;
  if (supp < 0.5)
    supp = 0.5;
  filt_set_supp(supp);
  wid = ceil(2.0 * supp);

  wp = (short*) calloc(dest_pix_num, wid * sizeof(short));
  if (!wp)
    mem_alloc_fail("make_weighttab", IMM_EXIT);
  for (i = 0; i < dest_pix_num; i++) {
    wtab[i].weights = &wp[i * wid];
  }

//  printf("0 %d, len %d, spix %d, dpix %d\n",src_0, src_len, src_pix_num, dest_pix_num);
  for (j = 0; j < dest_pix_num; j++) {
    int i0, i1;
    double den;
    double sc;
    double cen;
    double tr;
    Weighttab *wt = &wtab[j];
    int sum;
    int t;

    cen = (double)j + 0.5 + tx;
    cen = cen / scale;
    i0 = cen - supp + 0.5;
    i1 = cen + supp + 0.5;
    if (i0 < 0) i0 = 0;
    if (i1 > src_len) i1 = src_len;
    wt->start_pos = i0;
    wt->end_pos = i1;
    wt->window_length = i1 - i0;

    den = 0;
    for (i = i0; i < i1; i++)
      den += filtfunc(((double)i + 0.5 - cen) / filt_scale);
    sc = den==0. ? 1 : 1/den;
    sum = 0;
    for (i = i0, wp = wt->weights; i < i1; i++) {
      tr = filtfunc(((double)i + 0.5 - cen) / filt_scale);
      tr = sc * tr * WEIGHTONE;
      if (tr < SHRT_MIN || tr > SHRT_MAX) {
	fprintf(stderr, "tr=%g at %d\n", tr, j);
	exit(1);
      }
      t = floor(tr+0.5);
      *wp++ = t;
      sum = sum + t;
    }
    if (sum == 0) {
      wt->start_pos = wt->start_pos + wt->end_pos / 2;
      wt->end_pos = wt->start_pos+1;
      wt->weights[0] = WEIGHTONE;
//      printf("i0 %d\n", wt->i0);
    } else {
      if (sum != WEIGHTONE) {
	i = cen+0.5;
	if (i<i0) i = i0; else if (i>=i1) i = i1-1;
	t = WEIGHTONE - sum;
	wt->weights[i-i0] += t;
//	printf("t %f\n", t);
      }
    }
    if (rgb_flag) {
      wt->start_pos *= 3;
      wt->end_pos *= 3;
      wt->window_length *= 3;
    }
    min_pos = (wt->start_pos > min_pos) ? min_pos : wt->start_pos;
    max_pos = (wt->end_pos < max_pos) ? max_pos : wt->end_pos;
  } 
  *src_start = min_pos;
  *src_max = max_pos;

  return wid;
}

static void
zoom_xy_c(unsigned char *dest, unsigned char *src)
{
  int src_image_width = zoom_param.src_image_width;
  int src_image_height = zoom_param.src_image_height;
  int dest_width = zoom_param.dest_width;
  int dest_height = zoom_param.dest_height;
  int src_y0 = zoom_param.start_y;
  int src_y1 = zoom_param.end_y;
  int csp = zoom_param.csp;
  unsigned char *sp, *dp;
  short *isp, *idp;
  int width, height;
  int src_stride, dest_stride;
  int i, x, y, dpos;
  int sum, sum_r, sum_g, sum_b;
  Weighttab *x_wtab = zoom_param.x_wtab;
  Weighttab *y_wtab = zoom_param.y_wtab;
  short *wp;
  int pix_bytes = 1;
  int accum[dest_width * 3];

  if (csp == CSP_RGB24) {
    pix_bytes = 3;
  }

  dest_stride = dest_width * pix_bytes;
  src_stride = src_image_width * pix_bytes;
  width = dest_width;
  height = src_y1 - src_y0;
  idp = &work_buf[src_y0 * dest_stride];
  for (y = src_y0; y < src_y1; y++) {
    sp = &src[y * src_stride];
    if (csp == CSP_RGB24) {
      dpos = 0;
      for (x = 0; x < width; x++) {
        wp = x_wtab[x].weights;
        sum_r = sum_g = sum_b = 0;
        for (i = x_wtab[x].start_pos; i < x_wtab[x].end_pos;) {
	  sum_r += (sp[i++] * *wp);
	  sum_g += (sp[i++] * *wp);
	  sum_b += (sp[i++] * *wp);
	  wp++;
        }
        idp[dpos++] = (sum_r>>CHANBITS);
        idp[dpos++] = (sum_g>>CHANBITS);
        idp[dpos++] = (sum_b>>CHANBITS);
      }
    } else {
      for (x = 0; x < width; x++) {
        wp = x_wtab[x].weights;
        sum = 0;
        for (i = x_wtab[x].start_pos; i < x_wtab[x].end_pos; i++) {
	  sum += (sp[i] * (*wp));
          wp++;
        }
        idp[x] = (sum>>CHANBITS);
      }
    }
    idp += dest_stride;
  }

  height = dest_height;
  width = dest_width * pix_bytes;
  src_stride = dest_width * pix_bytes;
  dest_stride = dest_width * pix_bytes;
  for (y = 0; y < height; y++) {
    dp = &dest[y * dest_stride];
//    memset(accum, 0, dest_stride * sizeof(int));
    memset(accum, 0, width * sizeof(int));
    wp = y_wtab[y].weights;
    for (i = y_wtab[y].start_pos; i < y_wtab[y].end_pos; i++) {
      isp = &work_buf[i * src_stride];
      for (x = 0; x < width; x++) {
	accum[x] += isp[x] * (*wp);
      }
      wp++;
    }
    for (x = 0; x < width; x++) {
      int t = accum[x] >> FINALSHIFT;
      dp[x] = (t > 255) ? 255 : (t < 0) ? 0 : t;
    }
  }

  if (csp == CSP_YV12) {
    int sv_offset = src_image_width * src_image_height;
    int su_offset = sv_offset + sv_offset / 4;
    int dv_offset = dest_width * dest_height;
    int du_offset = dv_offset + dv_offset / 4;
    int soffset;
    int doffset;
    int uv;

    y_wtab = zoom_param.y2_wtab;
    x_wtab = zoom_param.x2_wtab;
    src_image_width /= 2;
    src_image_height /= 2;
    dest_height /= 2;
    dest_width /= 2;
    src_y0 = zoom_param.start_y_uv;
    src_y1 = zoom_param.end_y_uv;

    soffset = sv_offset;
    doffset = dv_offset;
    for (uv = 2; uv > 0; uv--) {
      dest_stride = dest_width;
      src_stride = src_image_width;
      width = dest_width;
      height = src_y1 - src_y0;
      idp = &work_buf[src_y0 * dest_stride];
      for (y = src_y0; y < src_y1; y++) {
        sp = &src[soffset + y * src_stride];
        for (x = 0; x < width; x++) {
          wp = x_wtab[x].weights;
          sum = 0;
          for (i = x_wtab[x].start_pos; i < x_wtab[x].end_pos; i++) {
	    sum += (sp[i] * (*wp));
            wp++;
          }
          idp[x] = (sum>>CHANBITS);
        }
        idp += dest_stride;
      }

      height = dest_height;
      width = dest_width;
      src_stride = dest_width;
      dest_stride = dest_width;
      for (y = 0; y < height; y++) {
        dp = &dest[doffset + y * dest_stride];
//        memset(accum, 0, dest_stride * sizeof(int));
        memset(accum, 0, width * sizeof(int));
        wp = y_wtab[y].weights;
        for (i = y_wtab[y].start_pos; i < y_wtab[y].end_pos; i++) {
          isp = &work_buf[i * src_stride];
          for (x = 0; x < width; x++) {
	    accum[x] += isp[x] * (*wp);
          }
          wp++;
        }
        for (x = 0; x < width; x++) {
          int t = accum[x] >> FINALSHIFT;
          dp[x] = (t > 255) ? 255 : (t < 0) ? 0 : t;
        }
      }
      soffset = su_offset;
      doffset = du_offset;
    }
  }
}

static void
zoom_yx_c(unsigned char *dest, unsigned char *src)
{
  int src_image_width = zoom_param.src_image_width;
  int src_image_height = zoom_param.src_image_height;
  int dest_width = zoom_param.dest_width;
  int dest_height = zoom_param.dest_height;
  int src_x0 = zoom_param.start_x;
  int src_x1 = zoom_param.end_x;
  int csp = zoom_param.csp;
  unsigned char *sp, *dp;
  short *isp, *idp;
  int width, height;
  int src_stride, dest_stride;
  int i, x, y, dpos;
  int sum, sum_r, sum_g, sum_b;
  Weighttab *x_wtab = zoom_param.x_wtab;
  Weighttab *y_wtab = zoom_param.y_wtab;
  short *wp;
  int pix_bytes = 1;
  int accum[src_image_width * 3];

  if (csp == CSP_RGB24) {
    pix_bytes = 3;
  }

  dest_stride = src_image_width * pix_bytes;
  src_stride = src_image_width * pix_bytes;
  height = dest_height;
  width = src_x1 - src_x0;
  idp = work_buf;
  for (y = 0; y < height; y++) {
    wp = y_wtab[y].weights;
//    memset(idp, 0, sizeof(int) * dest_stride);
    memset(accum, 0, sizeof(int) * src_stride);
    for (i = y_wtab[y].start_pos; i < y_wtab[y].end_pos; i++) {
      sp = &src[i * src_stride];
      for (x = src_x0; x < src_x1; x++) {
	accum[x] += (sp[x] * (*wp));
      }
      wp++;
    }
    for (x = src_x0; x < src_x1; x++) {
      idp[x] = accum[x] >> CHANBITS;
    }
    idp += dest_stride;
  }

  height = dest_height;
  width = dest_width;
  src_stride = src_image_width * pix_bytes;
  dest_stride = dest_width * pix_bytes;
  for (y = 0; y < height; y++) {
    isp = &work_buf[y * src_stride];
    dp = &dest[y * dest_stride];
    dpos = 0;
    for (x = 0; x < width; x++) {
      wp = x_wtab[x].weights;
      if (csp == CSP_RGB24) {
	sum_r = sum_g = sum_b = 0;
        for (i = x_wtab[x].start_pos; i < x_wtab[x].end_pos;) {
          sum_r += isp[i++] * *wp;
          sum_g += isp[i++] * *wp;
          sum_b += isp[i++] * *wp;
	  wp++;
        } 
        sum_r >>= FINALSHIFT;
        sum_g >>= FINALSHIFT;
        sum_b >>= FINALSHIFT;
        dp[dpos++] = (sum_r > 255) ? 255: (sum_r < 0) ? 0 : sum_r;
        dp[dpos++] = (sum_g > 255) ? 255: (sum_g < 0) ? 0 : sum_g;
        dp[dpos++] = (sum_b > 255) ? 255: (sum_b < 0) ? 0 : sum_b;
      } else {
        sum = 0;
        for (i = x_wtab[x].start_pos; i < x_wtab[x].end_pos; i++) {
          sum += isp[i] * *wp;
          wp++;
        } 
        sum >>= FINALSHIFT;
        dp[x] = (sum > 255) ? 255: (sum < 0) ? 0 : sum;
      }
    }
  }

  if (csp == CSP_YV12) {
    int sv_offset = src_image_width * src_image_height;
    int su_offset = sv_offset + sv_offset / 4;
    int dv_offset = dest_width * dest_height;
    int du_offset = dv_offset + dv_offset / 4;
    int soffset;
    int doffset;
    int uv;

    y_wtab = zoom_param.y2_wtab;
    x_wtab = zoom_param.x2_wtab;
    src_image_width /= 2;
    src_image_height /= 2;
    dest_height /= 2;
    dest_width /= 2;
    src_x0 = zoom_param.start_x_uv;
    src_x1 = zoom_param.end_x_uv;

    soffset = sv_offset;
    doffset = dv_offset;
    for (uv = 2; uv > 0; uv--) {
      dest_stride = src_image_width;
      src_stride = src_image_width;
      height = dest_height;
      width = src_x1 - src_x0;
      idp = work_buf;
      for (y = 0; y < height; y++) {
        wp = y_wtab[y].weights;
//        memset(idp, 0, sizeof(int) * dest_stride);
//        memset(idp, 0, sizeof(short) * dest_stride);
        memset(accum, 0, sizeof(int) * src_stride);
        for (i = y_wtab[y].start_pos; i < y_wtab[y].end_pos; i++) {
          sp = &src[soffset + i * src_stride];
          for (x = src_x0; x < src_x1; x++) {
	    accum[x] += (sp[x] * (*wp));
          }
          wp++;
        }
        for (x = src_x0; x < src_x1; x++) {
	  idp[x] = accum[x] >> CHANBITS;
        }
        idp += dest_stride;
      }

      height = dest_height;
      width = dest_width;
      src_stride = src_image_width;
      dest_stride = dest_width;
      for (y = 0; y < height; y++) {
        isp = &work_buf[y * src_stride];
        dp = &dest[doffset + y * dest_stride];
        for (x = 0; x < width; x++) {
          wp = x_wtab[x].weights;
          sum = 0;
          for (i = x_wtab[x].start_pos; i < x_wtab[x].end_pos; i++) {
            sum += isp[i] * *wp;
            wp++;
          } 
          sum >>= FINALSHIFT;
          dp[x] = (sum > 255) ? 255: (sum < 0) ? 0 : sum;
        }
      }
      soffset = su_offset;
      doffset = du_offset;
    }
  }
}


static void (*zoom_xy)(unsigned char *dest, unsigned char *src);
static void (*zoom_yx)(unsigned char *dest, unsigned char *src);

void
zoom(unsigned char *dest, unsigned char *src)
{
  if (zoom_param.xy)
    zoom_xy(dest, src);
  else
    zoom_yx(dest, src);
}

int
zoom_init(int src_image_width, int src_image_height, int src_x1, int src_y1, int src_x2, int src_y2, int dest_image_width, int dest_image_height, int dest_x1, int dest_y1, int dest_x2, int dest_y2, int in_csp)
{
  int ww, wh;
  short *buf;
  int pix_depth = 0;
  int start_pos;
  int max_pos;
  int rgb_flag = 0;
  int csp = 0;
  int xy, yx;

  static int (*make_weighttab)(Weighttab *wtab, int dest_pix_num, int src_pix_num, int *src_start, int *src_max, double scale, double blur, double supp, const char *filt_name, const char *window_name, int rgb_flag);

#ifdef ARCH_X86
  { uint32_t cpu_accel;
    cpu_accel = mm_accel();
    make_weighttab = make_weighttab_c;
    zoom_xy = zoom_xy_c;
    zoom_yx = zoom_yx_c;
    if (cpu_accel & MM_ACCEL_X86_MMXEXT) {
      make_weighttab = make_weighttab_mmx;
      zoom_xy = zoom_xy_mmx;
      zoom_yx = zoom_yx_mmx;
    }
#if 0
    if (cpu_accel & MM_ACCEL_X86_MMX) {
      make_weighttab = make_weighttab_mmx;
      zoom_xy = zoom_xy_mmxext;
      zoom_yx = zoom_yx_mmxext;
    }
#endif
  }
#else
  make_weighttab = make_weighttab_c;
  zoom_xy = zoom_xy_c;
  zoom_yx = zoom_yx_c;
#endif /* ARCH_X86 */

  switch (in_csp) {
    case CSP_YUV420P:
    case CSP_I420:
    case CSP_YV12:
      csp = CSP_YV12;
      pix_depth = 12;
      break;
    case CSP_RGB24:
      csp = CSP_RGB24;
      pix_depth = 24;
      break;
  }

  zoom_param.csp = csp;

  if ((src_image_width <= 0) || (src_image_height <= 0) ||
      (src_image_width < src_x2) || (src_image_height < src_y2) ||
      (src_x1 < 0) || (src_y1 < 0) ||
      (dest_image_width <= 0) || (dest_image_height <= 0) ||
      (dest_image_width < dest_x2) || (dest_image_height < dest_y2) ||
      (dest_x1 < 0) || (dest_y1 < 0)) {
    fprintf(stderr, "zoom_init: dimension error.\n");
    return -1;
  }

  if (csp == CSP_YV12) {
    if ((src_x1 % 2) || (src_y1 % 2) || (src_x2 % 2) || (src_y2 % 2) ||
        (src_image_width % 2) || (src_image_height % 2) ||
        (dest_x1 % 2) || (dest_y1 % 2) || (dest_x2 % 2) || (dest_y2 % 2) ||
        (dest_image_width % 2) || (dest_image_height % 2)) {
      fprintf(stderr, "zoom_init: x start, y start, width, height must be even value.\n");
      return -1;
    }
  }

  if (src_x2 <= 0)
    src_x2 = src_image_width - src_x1;
  if (src_y2 <= 0)
    src_y2 = src_image_height - src_y1;

  if (dest_x2 <= 0)
    dest_x2 = dest_image_width - dest_x1;
  if (dest_y2 <= 0)
    dest_y2 = dest_image_height - dest_y1;

  zoom_param.src_image_width = src_image_width;
  zoom_param.src_image_height = src_image_height;
  zoom_param.dest_image_width = dest_image_width;
  zoom_param.dest_image_height = dest_image_height;
  zoom_param.src_width = src_x2 - src_x1;
  zoom_param.src_height = src_y2 - src_y1;
  zoom_param.dest_width = dest_x2 - dest_x1;
  zoom_param.dest_height = dest_y2 - dest_y1;

  zoom_param.src_left = src_x1;
  zoom_param.src_right = src_image_width - src_x2;
  zoom_param.src_top = src_y1;
  zoom_param.src_bottom = src_image_height - src_y2;

  zoom_param.src_stride = src_image_width;
  zoom_param.dest_stride = dest_image_width;

#if 0
  printf("src_width %d, src_height %d, dest_width %d, dest_height %d\n",
  zoom_param.src_width,
  zoom_param.src_height,
  zoom_param.dest_width,
  zoom_param.dest_height
      );
  printf("left %d, right %d, top %d, bottom %d\n",
  zoom_param.src_left,
  zoom_param.src_right,
  zoom_param.src_top,
  zoom_param.src_bottom
      );
#endif
  zoom_param.x_scale =
    (double)zoom_param.dest_width / (double)zoom_param.src_width;
  zoom_param.y_scale =
    (double)zoom_param.dest_height / (double)zoom_param.src_height;

  filt_init();
  if (filt_find(zoom_param.x_filt_name, zoom_param.x_window_name) == NULL) {
    fprintf(stderr,"zoom_init: cannot find filter %s",zoom_param.x_filt_name);
    if (zoom_param.x_window_name)
      fprintf(stderr,", window %s\n",zoom_param.x_window_name);
    else
      fprintf(stderr,"\n");
    return -1;
  }
  zoom_param.x_filt_name = filt_get_filt_name();
  if (zoom_param.x_blur == 0)
    zoom_param.x_blur = 1.;
  if (zoom_param.x_supp == 0)
    zoom_param.x_supp = filt_get_default_supp(zoom_param.x_filt_name);

  if (filt_find(zoom_param.y_filt_name, zoom_param.y_window_name) == NULL) {
    fprintf(stderr,"zoom_init: cannot find filter %s\n",zoom_param.y_filt_name);
    if (zoom_param.y_window_name)
      fprintf(stderr,", window %s\n",zoom_param.y_window_name);
    else
      fprintf(stderr,"\n");
    return -1;
  }
  zoom_param.y_filt_name = filt_get_filt_name();
  if (zoom_param.y_blur == 0)
    zoom_param.y_blur = 1.;
  if (zoom_param.y_supp == 0)
    zoom_param.y_supp = filt_get_default_supp(zoom_param.y_filt_name);

  if (zoom_param.x_wtab) {
    free(zoom_param.x_wtab->weights);
    free(zoom_param.x_wtab);
    zoom_param.x_wtab = NULL;
  }
  zoom_param.x_wtab = (Weighttab*) calloc(zoom_param.dest_width, sizeof(Weighttab));
  if (!zoom_param.x_wtab)
    mem_alloc_fail("zoom_init", IMM_EXIT);
  if (zoom_param.y_wtab) {
    free(zoom_param.y_wtab->weights);
    free(zoom_param.y_wtab);
    zoom_param.y_wtab = NULL;
  }
  zoom_param.y_wtab = (Weighttab*) calloc(zoom_param.dest_height, sizeof(Weighttab));
  if (!zoom_param.y_wtab)
    mem_alloc_fail("zoom_init", IMM_EXIT);

  if (csp == CSP_RGB24)
    rgb_flag = 1;
  else
    rgb_flag = 0;
  start_pos = src_x1;
  max_pos = src_image_width;
  zoom_param.x_window_length = make_weighttab(zoom_param.x_wtab,
      zoom_param.dest_width, zoom_param.src_width, &start_pos, &max_pos,
      zoom_param.x_scale, zoom_param.x_blur, zoom_param.x_supp,
      zoom_param.x_filt_name, zoom_param.x_window_name, rgb_flag);
  zoom_param.start_x = start_pos;
  zoom_param.end_x = max_pos;

  rgb_flag = 0;
  start_pos = src_y1;
  max_pos = src_image_height;
  zoom_param.y_window_length = make_weighttab(zoom_param.y_wtab,
      zoom_param.dest_height, zoom_param.src_height, &start_pos, &max_pos,
      zoom_param.y_scale, zoom_param.y_blur, zoom_param.y_supp,
      zoom_param.y_filt_name, zoom_param.y_window_name, rgb_flag);
  zoom_param.start_y = start_pos;
  zoom_param.end_y = max_pos;

  if (csp == CSP_YV12) {
    if (zoom_param.x2_wtab) {
      free(zoom_param.x2_wtab->weights);
      free(zoom_param.x2_wtab);
      zoom_param.x2_wtab = NULL;
    }
    zoom_param.x2_wtab = (Weighttab*) calloc(zoom_param.dest_width/2, sizeof(Weighttab));
    if (!zoom_param.x2_wtab)
      mem_alloc_fail("zoom_init", IMM_EXIT);
    if (zoom_param.y2_wtab) {
      free(zoom_param.y2_wtab->weights);
      free(zoom_param.y2_wtab);
      zoom_param.y2_wtab = NULL;
    }
    zoom_param.y2_wtab = (Weighttab*) calloc(zoom_param.dest_height/2, sizeof(Weighttab));
    if (!zoom_param.y2_wtab)
      mem_alloc_fail("zoom_init", IMM_EXIT);

    start_pos = src_x1 / 2;
    max_pos = src_image_width / 2;
    zoom_param.x2_window_length = make_weighttab(zoom_param.x2_wtab,
        zoom_param.dest_width/2, zoom_param.src_width/2, &start_pos, &max_pos,
        zoom_param.x_scale, zoom_param.x_blur, zoom_param.x_supp,
        zoom_param.x_filt_name, zoom_param.x_window_name, rgb_flag);
    zoom_param.start_x_uv = start_pos;
    zoom_param.end_x_uv = max_pos;

    start_pos = src_y1 / 2;
    max_pos = src_image_height / 2;
    zoom_param.y2_window_length = make_weighttab(zoom_param.y2_wtab,
        zoom_param.dest_height/2, zoom_param.src_height/2, &start_pos, &max_pos,
        zoom_param.y_scale, zoom_param.y_blur, zoom_param.y_supp,
        zoom_param.y_filt_name, zoom_param.y_window_name, rgb_flag);
    zoom_param.start_y_uv = start_pos;
    zoom_param.end_y_uv = max_pos;
  } else {
    zoom_param.x2_wtab = NULL;
    zoom_param.y2_wtab = NULL;
  }

  ww = (dest_image_width > src_image_width) ? dest_image_width : src_image_width;
  wh = (dest_image_height > src_image_height) ? dest_image_height : src_image_height;
  if (work_buf) {
    free(work_buf);
    work_buf = NULL;
  }
  buf = (short*)malloc(ww * wh * sizeof(short) * pix_depth / 8);
  if (!buf)
    mem_alloc_fail("zoom_init", IMM_EXIT);
  work_buf = buf;

  xy = ((zoom_param.end_y - zoom_param.start_y) * zoom_param.dest_width *
    zoom_param.x_window_length * pix_depth / 8) +
    (zoom_param.dest_height * zoom_param.y_window_length *
     zoom_param.dest_width * pix_depth / 8);

  yx = (zoom_param.dest_height * zoom_param.y_window_length *
      (zoom_param.end_x - zoom_param.start_x)) +
    (zoom_param.dest_height * zoom_param.dest_width *
     zoom_param.x_window_length * pix_depth / 8);

  if (xy < yx)
    zoom_param.xy = 1;
  else
    zoom_param.xy = 0;

#ifndef NDEBUG
  printf("zoom_init: x filter %s",zoom_param.x_filt_name);
  if (zoom_param.x_window_name)
    printf(", window %s\n",zoom_param.x_window_name);
  else
    printf("\n");
  printf("zoom_init: y filter %s",zoom_param.y_filt_name);
  if (zoom_param.y_window_name)
    printf(", window %s\n",zoom_param.y_window_name);
  else
    printf("\n");
#endif
  return 0;
}

