/* vutil.c */

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "csp.h"
#include "video.h"
#include "vutil.h"

void
field_interleave(unsigned char *dest, unsigned char *previous_frame, unsigned char *current_frame, int width, int height, int csp, int order)
{
  int uv_flag = 0;
  int src_stride;
  int dest_stride;
  int src_offset;
  int dest_offset;
  int v_offset = 0;
  int u_offset = 0;
  int uv_width = 0;
  int uv_size = 0;
  char *top;
  char *bottom;
  char *uv_src;
  int i;
//  int depth;

  switch (csp) {
    case CSP_YV12:
      uv_flag = 1;
      v_offset = width * height;
      u_offset = v_offset + (v_offset / 4);
      uv_width = width / 2;
      uv_size = uv_width * (height / 2);
      break;
    case CSP_YUV420P:
    case CSP_I420:
      uv_flag = 1;
      u_offset = width * height;
      v_offset = u_offset + (u_offset / 4);
      uv_width = width / 2;
      uv_size = uv_width * (height / 2);
      break;
    case CSP_RGB24:
      uv_flag = 0;
      //depth = csp_to_pixel_depth(csp);
      //width = width * (depth/8);
      width *= 3;
      break;
  }

  if (order == TOP_FEILD_FIRST) {
    top = current_frame;
    bottom = previous_frame;
    uv_src = current_frame;
  } else {
    top = previous_frame;
    bottom = current_frame;
    uv_src = current_frame;
  }

  src_offset = 0;
  dest_offset = 0;
  src_stride = width;
  dest_stride = width;
  height /= 2;

  for (i = 0; i < height; i++) {
    memcpy(dest + dest_offset, top + src_offset, width);
    dest_offset += dest_stride;
    src_offset += src_stride;

    memcpy(dest + dest_offset, bottom + src_offset, width);
    dest_offset += dest_stride;
    src_offset += src_stride;

#if 0
    if (uv_flag) {
      int x;
      int v;
      int u;
      for (x = 0; x < uv_width; x++) {
        v = *(top + v_offset + x) - 128;
	v += *(bottom + v_offset + x) - 128;
        u = *(top + u_offset + x) - 128;
	u += *(bottom + u_offset + x) - 128;
        *(dest + v_offset + x) = (v / 2) + 128;
        *(dest + u_offset + x) = (u / 2) + 128;
      }
      u_offset += uv_width;
      v_offset += uv_width;
    }
#endif
  }

  if (uv_flag) {
    memcpy(dest + v_offset, uv_src + v_offset, uv_size);
    memcpy(dest + u_offset, uv_src + u_offset, uv_size);
  }
  //printf("field_interleave\n");
}

#if defined ARCH_X86 && (defined USE_MMXEXT || defined USE_MMX)

#include "attributes.h"
#include "mmx.h"

#ifdef USE_MMXEXT

static inline unsigned long
x_flicker1(unsigned char *src0, unsigned char *src1, int width)
{
  unsigned long result;

  width >>= 3;
  pxor_r2r (mm0, mm0);                          // clear mm0
  while (width > 0) {
    movq_m2r (*src0, mm1);                      // load src0 8 pixels
    psadbw_m2r (*src1, mm1);                    // 8 pixels defference to mm1
    paddd_r2r (mm1, mm0);                       // add result to mm0

    src0 += 8;                                  // src0 pointer advance
    src1 += 8;                                  // src1 pointer advance
    width--;                                    // decliment counter
  }
  movd_r2m (mm0, result);                       // result to memory
  return result;
}

static inline unsigned long
x_flicker2(unsigned char *src0, unsigned char *src1, int width)
{
  unsigned long result;

  width >>= 3;
  width--;

  pxor_r2r (mm0, mm0);                // clear result mm0
  movq_m2r (*src1, mm1);              // load src1 8 pixels
  movq_r2r (mm1, mm6);                // hold src1
  movq_m2r (*src0, mm2);              // load src0 8 pixels
  psllq_i2r (56, mm6);                // hold low 1 byte
  psrlq_i2r (56, mm6);                //
  src0 += 8;                          // src0 advance
  src1 += 8;                          // src1 advance
  while (width > 0) {
    movq_r2r (mm1, mm3);              // src1 to mm3
    psadbw_r2r (mm2, mm3);            // 8 pixels defference to mm3
    paddd_r2r (mm3, mm0);             // add 8 pixels defferences to mm0

    movq_r2r (mm1, mm3);              // src1 to mm3
    psllq_i2r (8, mm3);               // 1 pixel move
    por_r2r (mm6, mm3);               // 7 pixels OR last high 1 pixel
    psadbw_r2r (mm2, mm3);            // 8 pixels defference to mm1
    paddd_r2r (mm3, mm0);             // add result to mm0

    movq_m2r (*src1, mm7);            // load src1 next 8 pixels
    movq_r2r (mm7, mm6);              //
    psllq_i2r (56, mm6);              // hold next low 1 pixel

    movq_r2r (mm1, mm3);              // current src1 to mm3
    psrlq_i2r (8, mm3);               // current src1 move 1 pixel
    por_r2r (mm6, mm3);               // current 7 pixel OR next low 1 pixel
    psadbw_r2r (mm2, mm3);            // 8 pixels defference to mm1
    paddd_r2r (mm3, mm0);             // add result to mm0

    movq_m2r (*src0, mm2);            // next src0 to mm2
    movq_r2r (mm1, mm6);              //
    psrlq_i2r (56, mm6);              // hold 1 high pixel for next loop
    movq_r2r (mm7, mm1);              // src1 to mm1

    src0 += 8;                        // src0 pointer advance
    src1 += 8;                        // src1 pointer advance
    width--;                          // decliment counter
  }
  movq_r2r (mm1, mm3);                // same as above loop but don't load data
  psadbw_r2r (mm2, mm3);
  paddd_r2r (mm3, mm0);

  movq_r2r (mm1, mm3);
  psllq_i2r (8, mm3);
  por_r2r (mm6, mm3);
  psadbw_r2r (mm2, mm3);
  paddd_r2r (mm3, mm0);

  movq_r2r (mm7, mm6);
  psrlq_i2r (56, mm6);
  psllq_i2r (56, mm6);

  movq_r2r (mm1, mm3);
  psrlq_i2r (8, mm3);
  por_r2r (mm6, mm3);
  psadbw_r2r (mm2, mm3);
  paddd_r2r (mm3, mm0);

  movd_r2m (mm0, result);            // result to memory
  return result;
}

static inline void
rgb2y_line(unsigned char *y_dest, unsigned char *rgb, int width)
{
  static mmx_t y_add = { 0x1010101010101010LL };
  static mmx_t y_mul = { (((25LL)<<0)|((129LL)<<16)|((66LL)<<32)) };
  int i;

  movq_m2r (y_mul, mm7);
  movq_m2r (y_add, mm6);

  while (width > 0) {
    pxor_r2r (mm4, mm4);
    for (i = 0; i < 8; i++) {
      psrlq_i2r (8, mm4);
      movd_m2r (*rgb, mm0);
      pxor_r2r (mm1, mm1);
      punpcklbw_r2r (mm1, mm0);
      pmaddwd_r2r (mm7, mm0);
      movq_r2r (mm0, mm1);
      psrlq_i2r (32, mm1);
      paddd_r2r (mm1, mm0);
      psrld_i2r (8, mm0);
      psllq_i2r (56, mm0);
      por_r2r (mm0, mm4);

      rgb += 3;
    }

    paddusb_r2r (mm6, mm4);
    movq_r2m (mm4, *y_dest);

    y_dest += 8;
    width -= 8;
  }
}

#else /* MMX */

static inline unsigned int
x_flicker1(unsigned char *src0, unsigned char *src1, int width)
{
  unsigned long result = 0;

  width >>= 3;
  pxor_r2r (mm7, mm7);            // clean mm7 - 0 interleave
  pxor_r2r (mm0, mm0);            // clean mm0 - hold result
  while (width > 0) {
    movq_m2r (*src0, mm1);        // 8 Pixels from src0 to mm1
    movq_m2r (*src1, mm2);        // 8 Pixels from src1 to mm2

    movq_r2r (mm2, mm3);          // hold a copy of mm2 in mm3

    psubusb_r2r (mm1, mm3);       // positive differences between mm1 and mm3
    psubusb_r2r (mm2, mm1);       // positive differences between mm2 nad mm1
    paddusb_r2r (mm3, mm1);       // mm1 now contains abs(mm1-mm2) 8 bytes

    movq_r2r (mm1, mm2);          // copy mm1 to mm2

    punpcklbw_r2r (mm7, mm1);     // unpack mm1 into mm1 and mm2 4 word
    punpckhbw_r2r (mm7, mm2);     // unpack mm1 into mm1 and mm2 4 word
    paddw_r2r (mm2, mm1);         // add mm1 4 word

    movq_r2r (mm1, mm2);          // copy mm1 to mm2

    punpcklwd_r2r (mm7, mm1);     // unpack mm1 into mm1 and mm2
    punpckhwd_r2r (mm7, mm2);     // unpack mm1 into mm1 and mm2
    paddd_r2r (mm2, mm1);         // add mm1 2 double word

    paddd_r2r (mm1, mm0);         // add 8 Pixels differences to 2 double word

    src0 += 8;
    src1 += 8;
    width--;
  }

  movq_r2r (mm0, mm1);            // copy mm0 to mm1

  punpckldq_r2r (mm7, mm0);       // unpack mm0 into mm0 and mm1
  punpckhdq_r2r (mm7, mm1);       // unpack mm0 into mm0 and mm1
  paddd_r2r (mm1, mm0);           // add mm1 1 double word

  movd_r2m (mm0, result);         // result to memory

  return result;
}

static inline unsigned long
x_flicker2(unsigned char *src0, unsigned char *src1, int width)
{
  unsigned long result;
  int x;
  int dif;

  dif = src0[0] - src1[0];
  if (dif < 0)
    dif = -dif;
  result += dif;
  result += dif;
  dif = src0[0] - src1[1];
  if (dif < 0)
    dif = -dif;
  result += dif;

  width--;
  for (x = 1; x < width; x++) {
    dif = src0[x] - src1[x-1];
    if (dif < 0)
      dif = -dif;
    result += dif;
    dif = src0[x] - src1[x];
    if (dif < 0)
      dif = -dif;
    result += dif;
    dif = src0[x] - src1[x+1];
    if (dif < 0)
      dif = -dif;
    result += dif;
  }

  dif = src0[x] - src1[x-1];
  if (dif < 0)
    dif = -dif;
  result += dif;
  dif = src0[x] - src1[x];
  if (dif < 0)
    dif = -dif;
  result += dif;
  result += dif;

  return result;
}

static inline void
rgb2y_line(unsigned char *y_dest, unsigned char *rgb, int width)
{
  int y, r, g, b;
  while (width > 0) {
    r = *rgb++;
    g = *rgb++;
    b = *rgb++;
    y = r * 25;
    y += g * 129;
    y += b * 66;
    y >>= 8;
    y += 16;
    *y_dest = y;
    y_dest++;
    width--;
  }
}

#endif /* MMXEXT */

#else /* NOT USE MMX or MMXEXT */

#undef emms
#define emms()

static inline unsigned long
x_flicker1(unsigned char *src0, unsigned char *src1, int width)
{
  int x;
  int differ;
  unsigned int x_diff = 0;
  for (x = 0; x < width; x++) {
    differ = src0[x] - src1[x];
    if (differ<0)
      differ = -differ;
    x_diff += differ;
  }
  return x_diff;
}

static inline unsigned long
x_flicker2(unsigned char *src0, unsigned char *src1, int width)
{
  int last_x;
  int x;
  int differ;
  unsigned int x_diff = 0;
  last_x = src1[0];
  width--;
  for (x = 0; x < width; x++) {
    differ = src0[x] - last_x;
    if (differ<0) differ = -differ;
    x_diff += differ;
    differ = src0[x] - src1[x];
    if (differ<0) differ = -differ;
    x_diff += differ;
    differ = src0[x] - src1[x+1];
    if (differ<0) differ = -differ;
    x_diff += differ;
    last_x = src1[x];
  }
  differ = src0[x] - last_x;
  if (differ<0) differ = -differ;
  x_diff += differ;
  differ = src0[x] - src1[x];
  if (differ<0) differ = -differ;
  x_diff += differ;
  x_diff += differ;

  return x_diff;
}

static inline void
rgb2y_line(unsigned char *y_dest, unsigned char *rgb, int width)
{
  int y, r, g, b;
  while (width > 0) {
    r = *rgb++;
    g = *rgb++;
    b = *rgb++;
    y = r * 25;
    y += g * 129;
    y += b * 66;
    y >>= 8;
    y += 16;
    *y_dest = y;
    y_dest++;
    width--;
  }
}

#endif

void
rgb2y(unsigned char *dest, unsigned char *rgb, int width, int height)
{
  int y;
  int src_stride = width * 3;

  for (y = 0; y < height; y++) {
    rgb2y_line(dest, rgb, width);
    dest += width;
    rgb += src_stride;
  }
  emms();
}

static unsigned int
flicker_func1 (unsigned char *previous, unsigned char *current, int width, int height, int csp, int field_order)
{
  unsigned int intensity = 0;
  int y;
  int x_diff;
  int src_stride;
  unsigned char *top_field;
  unsigned char *bottom_field;

  switch (csp) {
    case CSP_YV12:
    case CSP_I420:
    case CSP_YUV420P:
      break;
    case CSP_RGB24:
      width *= 3;
      break;
  }

  if (field_order == TOP_FEILD_FIRST) {
    top_field = current;
    bottom_field = previous;
  } else {
    top_field = previous;
    bottom_field = current;
  }
  bottom_field += width;

  src_stride = width * 2;

  height--;

  for (y = 0; y < height; y++) {
    x_diff = 0;
    if (y & 1) {
      x_diff = x_flicker1(bottom_field, top_field, width);
      bottom_field += src_stride;
    } else {
      x_diff = x_flicker1(top_field, bottom_field, width);
      top_field += src_stride;
    }
    intensity += x_diff;
  }

  emms();

  return intensity;
}

static unsigned int
flicker_func2 (unsigned char *previous, unsigned char *current, int width, int height, int csp, int field_order)
{
  unsigned int intensity = 0;
  int y;
  int x_diff;
  int src_stride;
  unsigned char *top_field;
  unsigned char *bottom_field;

  switch (csp) {
    case CSP_YV12:
    case CSP_YUV420P:
    case CSP_I420:
      break;
    case CSP_RGB24:
      intensity = flicker_func1 (previous, current, width, height,
	  csp, field_order);
      return intensity;
      break;
  }

  if (field_order == TOP_FEILD_FIRST) {
    top_field = current;
    bottom_field = previous;
  } else {
    top_field = previous;
    bottom_field = current;
  }
  bottom_field += width;

  src_stride = width * 2;

  height--;

  for (y = 0; y < height; y++) {
    x_diff = 0;
    if (y & 1) {
      x_diff = x_flicker2(bottom_field, top_field, width);
      bottom_field += src_stride;
    } else {
      x_diff = x_flicker2(top_field, bottom_field, width);
      top_field += src_stride;
    }
    intensity += x_diff;
  }

  emms();
#if 0
  {static int p = 0;
  printf("p %4d: intensity %u\n", p++, intensity);
  }
#endif
  return intensity;
}

FLICKER_FUNC flicker_func = flicker_func2;

void
set_flicker_func(int num)
{
  if (num <= 1) {
    flicker_func = flicker_func1;
//    printf("flicker_func 1\n");
  } else {
    flicker_func = flicker_func2;
//    printf("flicker_func 1\n");
  }
}

