/**********************************************************************
 
	Copyright (C) 2005- Hirohisa MORI <joshua@nichibun.ac.jp>
 
	This program is free software; you can redistribute it 
	and/or modify it under the terms of the GLOBALBASE 
	Library General Public License (G-LGPL) as published by 

	http://www.globalbase.org/
 
	This program is distributed in the hope that it will be 
	useful, but WITHOUT ANY WARRANTY; without even the 
	implied warranty of MERCHANTABILITY or FITNESS FOR A 
	PARTICULAR PURPOSE.

**********************************************************************/

#include	"machine/include.h"
#include	"memory_debug.h"
#include	"xl.h"
#include	"change_endian.h"
#include	"matrix.h"

#define BASE_TYPE 	double

int mxt_double_get_size(MATRIX_DATA_TYPE *,void * d);
void * mxt_double_sexp2md(int * cpy,struct matrix_data_type*,XL_SEXP * s);
XL_SEXP * mxt_double_md2sexp(struct matrix_data_type*,void * d);
int mxt_double_cmp(struct matrix_data_type*,void*,void*);
int mxt_doublev_cmp(struct matrix_data_type*,void*,void*);
int mxt_double_thined_rep(struct matrix_data_type * tp,
	MATRIX *m,INTEGER64* dim_code,int * rep,int,void * dest,void * src);
int mxt_double_thined_avg(struct matrix_data_type * tp,
	MATRIX *m,INTEGER64* dim_code,int,void * dest,void * src);
int mxt_double_add(struct matrix_data_type * tp,
	MATRIX *m,INTEGER64* dim_code,int parent,void * ,void * dest,void * src1,void * src2);
int mxt_double_sub(struct matrix_data_type * tp,
	MATRIX *m,INTEGER64* dim_code,int parent,void * ,void * dest,void * src1,void * src2);
void mxt_endian_double(void *d);
void *  mxt_add_vv_double(MATRIX_DATA_TYPE * tp,
	void * d1,void *d2,MX_CACHE * c);
void *  mxt_sub_vv_double(MATRIX_DATA_TYPE * tp,
	void * d1,void *d2,MX_CACHE * c);
void *  mxt_mul_mm_double(MATRIX_DATA_TYPE * tp,
	void * d1,void *d2,MX_CACHE * c);
void *  mxt_mul_sv_double(MATRIX_DATA_TYPE * tp,
	void * d1,void *d2,MX_CACHE * c);
void *	mxt_trans_m_double(MATRIX_DATA_TYPE * tp,void *,MX_CACHE * c);
void mxt_get_zero_double(MATRIX_DATA_TYPE * tp,void *);
void mxt_get_el_double(MATRIX_DATA_TYPE * tp,void *);
double
matrix_newton_double_loop(MX_CACHE_PARAM * p,int,double);
void
mxt_print_double(MATRIX_DATA_TYPE*,MATRIX_STRING_BUFFER*,void*,char*);
void
print_ret_buf(char * str,MX_CACHE_PARAM * p);
void *
read_mx_ipdiff_double2(MX_CACHE_PARAM * p);
void
generate_rand(MX_CACHE_PARAM * p,double tr);

int _mx_deb_flag;


MATRIX_DATA_TYPE mx_type_double = {
	MDT_DOUBLE,
	"double",
	0,
	mxt_double_get_size,
	mxt_double_sexp2md,
	mxt_double_md2sexp,
	mxt_double_cmp,
	mxt_vector_copy,
	xx_mxt_alloc_copy,
	xx_mxt_alloc_data,
	mxt_free_data,
	0,
	0,
	0,
	0,
	mxt_endian_double,
	mxt_endian_double,
	mxt_convert_basic_to_net,
	mxt_convert_basic_to_host,
	0,0,0,0,0,
	mxt_get_zero_double,
	mxt_get_el_double,
	mxt_print_double
};

MATRIX_DATA_TYPE mx_type_double_v = {
	MDT_DOUBLE|MDT_VECTOR,
	"double_v",
	&mx_type_double,
	mxt_vector_get_size,
	mxt_vector_sexp2md,
	mxt_vector_md2sexp,
	mxt_doublev_cmp,
	mxt_vector_copy,
	xx_mxt_alloc_copy,
	xx_mxt_alloc_vector,
	mxt_free_data,
	mxt_double_thined_rep,
	mxt_double_thined_avg,
	mxt_double_add,
	mxt_double_sub,
	mxt_endian_vector_to_net,
	mxt_endian_vector_to_host,
	mxt_convert_basic_to_net,
	mxt_convert_basic_to_host,
	mxt_add_vv_double,
	mxt_sub_vv_double,
	mxt_mul_mm_double,
	mxt_mul_sv_double,
	mxt_trans_m_double,
	0,0,
	mxt_print_vector
};

void
mxt_get_zero_double(
	MATRIX_DATA_TYPE * tp,
	void * buf)
{
	*(BASE_TYPE*)buf = 0;
}

void
mxt_get_el_double(
	MATRIX_DATA_TYPE * tp,
	void * buf)
{
	*(BASE_TYPE*)buf = 1;
}

void mxt_endian_double(void * d)
{
BASE_TYPE * dp;
	dp = (BASE_TYPE*)d;
	change_endian((*dp));
}




int
mxt_double_get_size(MATRIX_DATA_TYPE * tp,void * d)
{
	return sizeof(BASE_TYPE);
}


void * 
mxt_double_sexp2md(int * cpy,struct matrix_data_type* tp,XL_SEXP * s)
{
BASE_TYPE * ret;
	switch ( get_type(s) ) {
	case XLT_FLOAT:
	case XLT_INTEGER:
		break;
	default:
		return 0;
	}
	ret = d_alloc(sizeof(BASE_TYPE));
	switch ( get_type(s) ) {
	case XLT_FLOAT:
		*ret = s->floating.data;
		break;
	case XLT_INTEGER:
		*ret = s->integer.data;
		break;
	default:
		return 0;
	}
	if ( cpy )
		*cpy = 1;
	return (void*)ret;
}

XL_SEXP * 
mxt_double_md2sexp(struct matrix_data_type* tp,void * d)
{
BASE_TYPE * _d;
	_d = d;
	return get_floating(*_d,0);
}

int 
mxt_double_cmp(struct matrix_data_type* tp,void* d1,void* d2)
{
BASE_TYPE * _d1, * _d2;
	_d1 = (BASE_TYPE*)d1;
	_d2 = (BASE_TYPE*)d2;
	if ( (*_d1) < (*_d2) )
		return -1;
	if ( (*_d1) > (*_d2) )
		return 1;
	return 0; 
}



int 
mxt_doublev_cmp(struct matrix_data_type* tp,void* d1,void* d2)
{
MATRIX_DH_SET h1;
MATRIX_DH_SET h2;
int ret;
BASE_TYPE * p1, * p2;
int i;
	get_matrix_dh_set(&h1,d1);
	get_matrix_dh_set(&h2,d2);
	ret = cmp_dh_set(&h1,&h2);
	if ( ret )
		return ret;
	i = h1.total_element;
	for ( p1 = h1.offset , p2 = h2.offset ; i > 0 ; i -- ) {
		if ( *p1 > *p2 )
			return 1;
		if ( *p1 < *p2 )
			return -1;
		p1 ++;
		p2 ++;
	}
	return 0;
}



int
mxt_double_thined_rep(struct matrix_data_type * tp,
	MATRIX *m,INTEGER64* dim_code,int * rep,int parent,void * dest,void * src)
{
MATRIX_DH_SET h_src;
MATRIX_DH_SET h_dest;
INTEGER64 lev;
int i;
int _ofs;
int * ofs;
int * src_ix,* dest_ix;
int * src_inc;
BASE_TYPE * p1, * p2;
int dim;
int _p;
	get_matrix_dh_set(&h_src,src);
	get_matrix_dh_set(&h_dest,dest);
	if ( h_src.hd->dim != m->p.dim )
		return -1;
	if ( h_dest.hd->dim != m->p.dim )
		return -1;
	lev = dim_code[0];
	ofs = d_alloc(sizeof(int)*m->p.dim);
	for ( i = 0 ; i < m->p.dim ; i ++ ) {
		_p = m->dim_divide[i]*parent;
		if ( m->block_size[i] > _p ) {
			_ofs = ((int)1)<<(m->block_size[i] - _p);
			ofs[i] = _ofs * ((dim_code[i+1] >> 
					(lev * m->dim_divide[i] + m->block_size[i]))
				& ((((int)1)<<m->dim_divide[i]*parent)-1));
		}
		else	ofs[i] = 0;
	}
	dim = m->p.dim;
	src_ix = d_alloc(sizeof(int)*dim);
	dest_ix = d_alloc(sizeof(int)*dim);
	src_inc = d_alloc(sizeof(int)*dim);
	memcpy(src_ix,rep,sizeof(int)*dim);
	for ( i = 0 ; i < dim ; i ++ ) {
		src_inc[i] = ((int)1)<<(m->dim_divide[i]*parent);
	}
	for ( ; ; ) {
		for (  i = 0 ; i < dim ; i ++ )
			dest_ix[i] = (src_ix[i]>>(parent*m->dim_divide[i])) + ofs[i];
		p1 = &((BASE_TYPE*)h_src.offset)[get_seq_from_ix(src_ix,h_src.ix,dim)];
		p2 = &((BASE_TYPE*)h_dest.offset)[get_seq_from_ix(dest_ix,h_dest.ix,dim)];
		*p2 = *p1;
		if ( inc_ix(src_ix,rep,src_inc,h_src.ix,dim) )
			break;
	}
	d_f_ree(src_ix);
	d_f_ree(dest_ix);
	d_f_ree(src_inc);
	d_f_ree(ofs);
	return 0;
}





int
mxt_double_thined_avg(struct matrix_data_type * tp,
	MATRIX *m,INTEGER64* dim_code,int parent,void * dest,void * src)
{
MATRIX_DH_SET h_src;
MATRIX_DH_SET h_dest;
INTEGER64 lev;
int i;
int _ofs;
int * ofs;
int * src_ix,* dest_ix, * avg_ix,* avg_size;
int * src_inc,* dest_inc;
BASE_TYPE * p1, * p2;
int dim;
int avg;
int n_size;
int _p;
	get_matrix_dh_set(&h_src,src);
	get_matrix_dh_set(&h_dest,dest);
	if ( h_src.hd->dim != m->p.dim )
		return -1;
	if ( h_dest.hd->dim != m->p.dim )
		return -1;
	lev = dim_code[0];
	ofs = d_alloc(sizeof(int)*m->p.dim);
	for ( i = 0 ; i < m->p.dim ; i ++ ) {
		_p = m->dim_divide[i]*parent;
		if ( m->block_size[i] > _p ) {
			_ofs = ((int)1)<<(m->block_size[i] - _p);
			ofs[i] = _ofs * ((dim_code[i+1] >> 
					(lev * m->dim_divide[i] + m->block_size[i]))
				& ((((int)1)<<m->dim_divide[i]*parent)-1));
		}
		else ofs[i] = 0;
	}
	dim = m->p.dim;
	src_ix = d_alloc(sizeof(int)*dim);
	dest_ix = d_alloc(sizeof(int)*dim);
	src_inc = d_alloc(sizeof(int)*dim);
	dest_inc = d_alloc(sizeof(int)*dim);
	avg_ix = d_alloc(sizeof(int)*dim);
	avg_size = d_alloc(sizeof(int)*dim);
	n_size = 1;
	for ( i = 0 ; i < dim ; i ++ ) {
		src_ix[i] = 0;
		src_inc[i] = 1<<(m->dim_divide[i]*parent);
		n_size *= src_inc[i];
		dest_inc[i] = 1;
	}
	for ( ; ; ) {
		memcpy(avg_ix,src_ix,sizeof(int)*dim);
		avg = 0;
		for ( i = 0 ; i < dim ; i ++ ) {
			avg_size[i] = src_ix[i] + src_inc[i];
			if ( avg_size[i] >= h_src.ix[i] )
				avg_size[i] = h_src.ix[i];
			dest_ix[i] = ofs[i] + (src_ix[i]>>(m->dim_divide[i]*parent));
		}
		n_size = 0;
		for ( ; ; ) {
			p1 = &((BASE_TYPE*)h_src.offset)[get_seq_from_ix(avg_ix,h_src.ix,dim)];
			avg += *p1;
			n_size ++;
			if ( inc_ix(avg_ix,src_ix,dest_inc,avg_size,dim) )
				break;
		}
		p2 = &((BASE_TYPE*)h_dest.offset)[get_seq_from_ix(dest_ix,h_dest.ix,dim)];
		*p2 = avg/n_size;
		if ( inc_ix(src_ix,0,src_inc,h_src.ix,dim) )
			break;
	}
	d_f_ree(src_ix);
	d_f_ree(dest_ix);
	d_f_ree(src_inc);
	d_f_ree(dest_inc);
	d_f_ree(ofs);
	d_f_ree(avg_ix);
	d_f_ree(avg_size);
	return 0;
}




int
mxt_double_add(struct matrix_data_type * tp,
	MATRIX *m,INTEGER64* dim_code,int parent,void *_d1 ,void * dest,void * src1,void * src2)
{
MATRIX_DH_SET h_src1,h_src2;
MATRIX_DH_SET h_dest;
INTEGER64 lev;
int i;
int _ofs;
int * ofs;
int * src1_ix, * src2_ix;
int * src1_inc,* src2_inc;
int * target_ix,* target_size;
BASE_TYPE * p1, * p2, * dest_p;
int dim;
int _p;
INTEGER64 result;
char hd_type;
	get_matrix_dh_set(&h_src1,src1);
	get_matrix_dh_set(&h_src2,src2);
	get_matrix_dh_set(&h_dest,dest);
	if ( h_src1.hd->dim != m->p.dim )
		return -1;
	if ( h_src2.hd->dim != m->p.dim )
		return -1;
	if ( h_dest.hd->dim != m->p.dim )
		return -1;
	lev = dim_code[0];
	if ( lev <= 0 )
		return -1;
	ofs = d_alloc(sizeof(int)*m->p.dim);
	for ( i = 0 ; i < m->p.dim ; i ++ ) {
		_p = m->dim_divide[i]*parent;
		if ( m->block_size[i] > _p ) {
			_ofs = 1<<(m->block_size[i] - _p);
			ofs[i] = _ofs * ((dim_code[i+1] >> 
					(lev * m->dim_divide[i] + m->block_size[i]))
				& ((1<<(m->dim_divide[i]*parent))-1));
		}
	}
	dim = m->p.dim;
	src1_ix = d_alloc(sizeof(int)*dim);
	src2_ix = d_alloc(sizeof(int)*dim);
	src1_inc = d_alloc(sizeof(int)*dim);
	src2_inc = d_alloc(sizeof(int)*dim);
	target_ix = d_alloc(sizeof(int)*dim);
	target_size = d_alloc(sizeof(int)*dim);
	for ( i = 0 ; i < dim ; i ++ ) {
		src2_ix[i] = 0;
		src2_inc[i] = 1<<(m->dim_divide[i]*parent);
		src1_inc[i] = 1;
		src1_ix[i] = ofs[i];
	}
	hd_type = h_dest.hd->type;
	for ( ; ; ) {
		p1 = &((BASE_TYPE*)h_src1.offset)[get_seq_from_ix(src1_ix,h_src1.ix,dim)];

		memcpy(target_ix,src2_ix,sizeof(int)*dim);
		for ( i = 0 ; i < dim ; i ++ )
			target_size[i] = src2_ix[i] + src2_inc[i];
		for ( ; ; ) {
			p2 = &((BASE_TYPE*)h_src2.offset)
				[get_seq_from_ix(target_ix,h_src2.ix,dim)];
			dest_p = &((BASE_TYPE*)h_dest.offset)
				[get_seq_from_ix(target_ix,h_dest.ix,dim)];
			result = (*p1) + (*p2);
			round_int(hd_type,dest,result,0);
			if ( inc_ix(target_ix,src2_ix,src1_inc,target_size,dim) )
				break;
		}
		if ( inc_ix(src2_ix,0,src2_inc,h_src2.ix,dim) )
			break;
		inc_ix(src1_ix,ofs,src1_inc,h_src1.ix,dim);
	}
	d_f_ree(src1_ix);
	d_f_ree(src2_ix);
	d_f_ree(src1_inc);
	d_f_ree(src2_inc);
	d_f_ree(ofs);
	d_f_ree(target_ix);
	d_f_ree(target_size);
	return 0;
}



int
mxt_double_sub(struct matrix_data_type * tp,
	MATRIX *m,INTEGER64* dim_code,int parent,void *_d1 ,void * dest,void * src1,void * src2)
{
MATRIX_DH_SET h_src1,h_src2;
MATRIX_DH_SET h_dest;
INTEGER64 lev;
int i;
int _ofs;
int * ofs;
int * src1_ix, * src2_ix;
int * src1_inc,* src2_inc;
int * target_ix,* target_size;
BASE_TYPE * p1, * p2, * dest_p;
int dim;
int _p;
INTEGER64 result;
char hd_type;
	get_matrix_dh_set(&h_src1,src1);
	get_matrix_dh_set(&h_src2,src2);
	get_matrix_dh_set(&h_dest,dest);
	if ( h_src1.hd->dim != m->p.dim )
		return -1;
	if ( h_src2.hd->dim != m->p.dim )
		return -1;
	if ( h_dest.hd->dim != m->p.dim )
		return -1;
	lev = dim_code[0];
	if ( lev <= 0 )
		return -1;
	ofs = d_alloc(sizeof(int)*m->p.dim);
	for ( i = 0 ; i < m->p.dim ; i ++ ) {
		_p = m->dim_divide[i]*parent;
		if ( m->block_size[i] > _p ) {
			_ofs = 1<<(m->block_size[i] - _p);
			ofs[i] = _ofs * ((dim_code[i+1] >> 
					(lev * m->dim_divide[i] + m->block_size[i]))
				& ((1<<(m->dim_divide[i]*parent))-1));
		}
	}
	dim = m->p.dim;
	src1_ix = d_alloc(sizeof(int)*dim);
	src2_ix = d_alloc(sizeof(int)*dim);
	src1_inc = d_alloc(sizeof(int)*dim);
	src2_inc = d_alloc(sizeof(int)*dim);
	target_ix = d_alloc(sizeof(int)*dim);
	target_size = d_alloc(sizeof(int)*dim);
	for ( i = 0 ; i < dim ; i ++ ) {
		src2_ix[i] = 0;
		src2_inc[i] = 1<<(m->dim_divide[i]*parent);
		src1_inc[i] = 1;
		src1_ix[i] = ofs[i];
	}
	hd_type = h_dest.hd->type;
	for ( ; ; ) {
		p1 = &((BASE_TYPE*)h_src1.offset)[get_seq_from_ix(src1_ix,h_src1.ix,dim)];

		memcpy(target_ix,src2_ix,sizeof(int)*dim);
		for ( i = 0 ; i < dim ; i ++ )
			target_size[i] = src2_ix[i] + src2_inc[i];
		for ( ; ; ) {
			p2 = &((BASE_TYPE*)h_src2.offset)
				[get_seq_from_ix(target_ix,h_src2.ix,dim)];
			dest_p = &((BASE_TYPE*)h_dest.offset)
				[get_seq_from_ix(target_ix,h_dest.ix,dim)];
			result = (*p1) - (*p2);
			round_int(hd_type,dest,result,0);
			if ( inc_ix(target_ix,src2_ix,src1_inc,target_size,dim) )
				break;
		}
		if ( inc_ix(src2_ix,0,src2_inc,h_src2.ix,dim) )
			break;
		inc_ix(src1_ix,ofs,src1_inc,h_src1.ix,dim);
	}
	d_f_ree(src1_ix);
	d_f_ree(src2_ix);
	d_f_ree(src1_inc);
	d_f_ree(src2_inc);
	d_f_ree(ofs);
	d_f_ree(target_ix);
	d_f_ree(target_size);
	return 0;
}




void * 
mxt_add_vv_double(MATRIX_DATA_TYPE * tp,
	void * d1,void *d2,MX_CACHE * c)
{
void * ret;
MATRIX_DH_SET ds1,ds2,ret_ds;
BASE_TYPE * p1,* p2, * ret_p;
int i;
MATRIX_ALLOC_VECTOR_PARAM p;
	get_matrix_dh_set(&ds1,d1);
	get_matrix_dh_set(&ds2,d2);
	if ( ds1.tp != tp )
		return 0;
	if ( ds2.tp != tp )
		return 0;
	if ( ds1.hd->dim != ds2.hd->dim )
		return 0;
	for ( i = 0 ; i < ds1.hd->dim ; i ++ )
		if( ds1.ix[i] != ds2.ix[i] )
			return 0;
	p.dim = ds1.hd->dim;
	p.ix_size = ds1.ix;
	p.default_data = 0;
	p.mxc = c;
	ret = mxt_alloc_vector(tp,MD_CALLOC,&p,0);
	get_matrix_dh_set(&ret_ds,ret);

	p1 = ds1.offset;
	p2 = ds2.offset;
	ret_p = ret_ds.offset;
	for ( i = 0 ; i < ds1.total_element ; i ++ )
		ret_p[i] = p1[i] + p2[i];
	return ret;
}


void * 
mxt_sub_vv_double(MATRIX_DATA_TYPE * tp,
	void * d1,void *d2,MX_CACHE * c)
{
void * ret;
MATRIX_DH_SET ds1,ds2,ret_ds;
BASE_TYPE * p1,* p2, * ret_p;
int i;
MATRIX_ALLOC_VECTOR_PARAM p;
	get_matrix_dh_set(&ds1,d1);
	get_matrix_dh_set(&ds2,d2);
	if ( ds1.tp != tp )
		return 0;
	if ( ds2.tp != tp )
		return 0;
	if ( ds1.hd->dim != ds2.hd->dim )
		return 0;
	for ( i = 0 ; i < ds1.hd->dim ; i ++ )
		if( ds1.ix[i] != ds2.ix[i] )
			return 0;
	p.dim = ds1.hd->dim;
	p.ix_size = ds1.ix;
	p.default_data = 0;
	p.mxc = c;
	ret = mxt_alloc_vector(tp,MD_CALLOC,&p,0);
	get_matrix_dh_set(&ret_ds,ret);

	p1 = ds1.offset;
	p2 = ds2.offset;
	ret_p = ret_ds.offset;
	for ( i = 0 ; i < ds1.total_element ; i ++ )
		ret_p[i] = p1[i] - p2[i];
	return ret;
}




void * 
mxt_mul_sv_double(MATRIX_DATA_TYPE * tp,
	void * d1,void *d2,MX_CACHE * c)
{
void * ret;
MATRIX_DH_SET ds2,ret_ds;
BASE_TYPE * p2, * ret_p;
int i;
MATRIX_ALLOC_VECTOR_PARAM p;
BASE_TYPE m;

	get_matrix_dh_set(&ds2,d2);
	if ( ds2.tp != tp )
		return 0;
	p.dim = ds2.hd->dim;
	p.ix_size = ds2.ix;
	p.default_data = 0;
	p.mxc = c;
	ret = mxt_alloc_vector(tp,MD_CALLOC,&p,0);
	get_matrix_dh_set(&ret_ds,ret);

	m = *(BASE_TYPE*)d1;

	p2 = ds2.offset;
	ret_p = ret_ds.offset;
	for ( i = 0 ; i < ds2.total_element ; i ++ )
		ret_p[i] = m * p2[i];
	return ret;
}



void * 
mxt_mul_mm_double(MATRIX_DATA_TYPE * tp,
	void * d1,void *d2,MX_CACHE * c)
{
void * ret;
MATRIX_DH_SET ds1,ds2,ret_ds;
BASE_TYPE * p1,* p2, * ret_p;
int i,j,k;
int dim[2];
MATRIX_ALLOC_VECTOR_PARAM p;
BASE_TYPE acc;
	get_matrix_dh_set(&ds1,d1);
	get_matrix_dh_set(&ds2,d2);
	if ( ds1.tp != tp )
		return 0;
	if ( ds2.tp != tp )
		return 0;
	if ( ds1.hd->dim != ds2.hd->dim )
		return 0;
	if ( ds1.hd->dim != 2 )
		return 0;
	if ( ds1.ix[0] != ds2.ix[1] )
		return 0;

	dim[0] = ds2.ix[0];
	dim[1] = ds1.ix[1];
	p.dim = 2;
	p.ix_size = &dim[0];
	p.default_data = 0;
	p.mxc = c;
	ret = mxt_alloc_vector(tp,MD_CALLOC,&p,0);
	get_matrix_dh_set(&ret_ds,ret);

	p1 = ds1.offset;
	p2 = ds2.offset;
	ret_p = ret_ds.offset;
	for ( i = 0 ; i < dim[0] ; i ++ )
		for ( j = 0 ; j < dim[1] ; j ++ ) {
			acc = 0;
			for ( k = 0 ; k < ds1.ix[0] ; k ++ )
				acc += p1[k + j*ds1.ix[0]] *
					p2[i + k*ds2.ix[0]];
			ret_p[i + j*ret_ds.ix[0]] = acc;
		}

	return ret;
}


void *
mxt_trans_m_double(MATRIX_DATA_TYPE * tp,void * d1,MX_CACHE * c)
{
void * ret;
MATRIX_DH_SET ds1,ret_ds;
BASE_TYPE * p1, * ret_p;
int i,j;
MATRIX_ALLOC_VECTOR_PARAM p;
int dim[2];
	get_matrix_dh_set(&ds1,d1);
	if ( ds1.tp != tp )
		return 0;
	if ( ds1.hd->dim != 2 )
		return 0;

	dim[0] = ds1.ix[1];
	dim[1] = ds1.ix[0];
	p.dim = 2;
	p.ix_size = &dim[0];
	p.default_data = 0;
	p.mxc = c;
	ret = mxt_alloc_vector(tp,MD_CALLOC,&p,0);
	get_matrix_dh_set(&ret_ds,ret);

	p1 = ds1.offset;
	ret_p = ret_ds.offset;
	for ( i = 0 ; i < dim[0] ; i ++ )
		for ( j = 0 ; j < dim[1] ; j ++ ) {
			ret_p[i + j*ret_ds.ix[0]] =
				p1[j + i*ds1.ix[0]];
		}

	return ret;
}


typedef struct mx_ip_work {
	BASE_TYPE *		data;
} MX_IP_WORK;


typedef struct mx_user {
	MX_USER_HEADER		h;
	INTEGER64 *		point;
	INTEGER64 *		width;
	BASE_TYPE **		data;
	BASE_TYPE **		_data;
} MX_USER;

void
print_data(char * str,double * d,int len);
void
free_user_cache_double(MX_CACHE * c);
void
new_user_cache_double(MX_CACHE * c);

void
print_data(char * str,double * d,int len)
{
int i;
	ss_printf("%s ",str);
	for ( i = 0 ; i < len ; i ++ )
		ss_printf("%f ",d[i]);
	ss_printf("\n");
}


void
free_user_cache_double(MX_CACHE * c)
{
MX_USER * u;
int tot;
int i;

	u = (MX_USER*)c->user_header;
	d_f_ree(u->point);
	d_f_ree(u->width);
	tot = 1<<c->m->p.dim;
	for ( i = 0 ; i < tot ; i ++ )
		d_f_ree(u->_data[i]);
	d_f_ree(u->data);
	d_f_ree(u->_data);
	c->user_header = 0;
}

void
new_user_cache_double(MX_CACHE * c)
{
MATRIX * m;
int tot;
MX_USER * u;
int i;
	if ( c->user_header ) {
		if ( c->user_header->free_func == free_user_cache_double )
			return;
		(*c->user_header->free_func)(c);
	}
	m = c->m;
	u = d_alloc(sizeof(*u));
	u->h.free_func = free_user_cache_double;
	u->point = d_alloc(sizeof(INTEGER64)*(m->p.dim+1));
	u->width = d_alloc(sizeof(INTEGER64)*(m->p.dim+1));
	tot = 1<<m->p.dim;
	u->data = d_alloc(sizeof(BASE_TYPE*)*tot);
	u->_data = d_alloc(sizeof(BASE_TYPE*)*tot);
	for ( i = 0 ; i < tot ; i ++ ) {
		u->_data[i] = d_alloc(sizeof(BASE_TYPE)*(c->ds_len+1));
		u->data[i] = (BASE_TYPE*)(((char*)(u->_data[i]+1)) 
				- (((int)u->_data[i]) % sizeof(BASE_TYPE)));
	}
	c->user_header = &u->h;
}


int
read_mx_ip_double(MX_CACHE_PARAM * p)
{
MX_IP_WORK * w;
int i,j,k;
int tot;

int * target_ix;
MX_CACHE_PARAM p1;
INTEGER64 low,high,_mid,width;
double mid;
int ret;
int ofs;
INTEGER64 * target;
MATRIX * m;
int ds_len;
int level;
BASE_TYPE * point;
MX_USER * u;
INTEGER64 * u_point;
INTEGER64 * u_width;
MX_CACHE * c;

	c = p->c;
	m = c->m;
	ds_len = c->ds_len;
	point = p->inp_buf;
	level = p->inp_level;
	ret = -1;

	target = mxc_alloc(c,sizeof(INTEGER64)*(m->p.dim+1));
	target_ix = 0;
	if ( point ) {
		for ( i = 0 ; i < m->p.dim ; i ++ )
			target[i+1] = point[i];
		target[0] = level;
	}
	else {
		for ( i = 0 ; i <= m->p.dim+1 ; i ++ )
			target[i] = p->dc[i];
	}
	for ( i = 0 ; i < m->p.dim ; i ++ ) {
		if ( target[i+1] < 0 ) {
			mxc_free(c,target);
			return -1;
		}
	}
	u = (MX_USER*)c->user_header;
	if ( u && target[0] == u->point[0] ) {
		u_point = u->point;
		u_width = u->width;
		for  ( i = 1 ; i <= m->p.dim ; i ++ ) {
			if ( u_point[i] > target[i] )
				goto no_hit;
			if ( u_point[i] + u_width[i] <= target[i] )
				goto no_hit;
		}
		w = mxc_alloc(c,sizeof(MX_IP_WORK)*(tot = 1<<m->p.dim));
		memset(w,0,sizeof(MX_IP_WORK)*(tot));
		for ( i = 0 ; i < tot ; i ++ ) {
			w[i].data = mxc_alloc(c,sizeof(BASE_TYPE)*ds_len);
			for ( j = 0 ; j < ds_len ; j ++ )
				w[i].data[j] = u->data[i][j];
if ( _mx_deb_flag )
ss_printf("-1+ %f %f %f\n",w[i].data[0],w[i].data[1],w[i].data[2]);
		}
	}
	else {
		if ( u == 0 ) {
			new_user_cache_double(c);
			u = (MX_USER*)c->user_header;
		}
	no_hit:
		target_ix = mxc_alloc(p->c,sizeof(int)*(m->p.dim));
		p1 = *p;
		p1.dc = target;
		p1.ofs = target_ix;

		w = mxc_alloc(c,sizeof(MX_IP_WORK)*(tot = 1<<m->p.dim));
		memset(w,0,sizeof(MX_IP_WORK)*(tot));
		for ( i = 0 ; i < tot ; i ++ ) {
			w[i].data = mxc_alloc(c,sizeof(BASE_TYPE)*ds_len);
			memset(w[i].data,0,sizeof(BASE_TYPE)*ds_len);

			for ( j = 0 ; j < m->p.dim ; j ++ )
				if ( i & (1<<j) )
					target_ix[j] = 1;
				else	target_ix[j] = 0;

			p1.data_ptrs[0] = w[i].data;
			ret = -1;
			if ( read_mx_cache(&p1) < 0 ) {
				(*c->user_header->free_func)(c);
				goto err;
			}
if ( _mx_deb_flag )
ss_printf("- %f %f %f\n",w[i].data[0],w[i].data[1],w[i].data[2]);
			for ( j = 0 ; j < ds_len ; j ++ )
				u->data[i][j] = w[i].data[j];
		}
		u->point[0] = target[0];
		for ( i = 1 ; i <= m->p.dim ; i ++ ) {
			width = (((INTEGER64)1)<<
				(target[0] * m->dim_divide[i-1]));
			u->width[i] = width;
			u->point[i] = target[i] & (-width);
		}

	}
	for ( i = m->p.dim-1 ; i >= 0 ; i -- ) {
		_mid = target[i+1];
		if ( point )
			mid = point[i];
		else	mid = p->dc[i+1];
		width = (((INTEGER64)1)<<
			(target[0] * m->dim_divide[i]));
		low = _mid & (- width);
		high = low + width;
		if ( mid == low ) {
			;
		}
		else if ( mid == high ) {
			ofs = 1<<i;
			for ( j = 0 ; j < ofs ; j ++ )
				for ( k = 0 ; k < ds_len ; k ++ )
					w[j].data[k] = w[j+ofs].data[k];
		}
		else {
			ofs = 1<<i;
			for ( j = 0 ; j < ofs ; j ++ )
				for ( k = 0 ; k < ds_len ; k ++ ) {
					w[j].data[k] = 
					(w[j].data[k]*(high - mid)
						+ w[j+ofs].data[k]*
							(mid - low))/
						width;
				}
		}
	}
	for ( i = 0 ; i < ds_len ; i ++ ) {
		if ( p->data_ix[i].x == MXC_INVALID )
			continue;
		((BASE_TYPE*)p->data_ptrs[p->data_ix[i].p])
					[p->data_ix[i].x]
			 = w[0].data[p->data_ix[i].x];
	}
	
	ret = 0;
err:

	for ( i = 0 ; i < tot ; i ++ ) {
		if ( w[i].data )
			mxc_free(c,w[i].data);
	}
	mxc_free(c,w);
	if ( target_ix )
		mxc_free(c,target_ix);
	mxc_free(c,target);

	return ret;
}

int
read_mx_ip_double_v(MX_CACHE_PARAM * cp)
{
/*
inp_buf = MX_IP_V*
levels = dc levels;
data = dc index list / data length = m->p.dim
if levels == 0 then inp_level on the MX_CACHE_PARAM is referenced;

ret_buf
levels = 0;
data = reading data corresponding to ds
if ret_buf == 0 then ret_buf is d_alloc-ed
else it is assumed that data is d_alloc-ed by caller and 
this function will store the result in the prepared buffer.
*/

MX_IP_V * v;
int i,j,k,n;
int len;
INTEGER64 * levels;
INTEGER64 level;
BASE_TYPE ** data;
INTEGER64 * target;
int * target_ix;
INTEGER64 ** pibot;
INTEGER64 ** width;
BASE_TYPE ** corner;
MX_CACHE * c;
int tot;
int dim;
MATRIX * m;
INTEGER64 p,w;
BASE_TYPE d;
BASE_TYPE * data_buf;
MX_CACHE_PARAM p1;
int dest;
BASE_TYPE mid;
BASE_TYPE * data_ptr;
INTEGER64 * width_ptr;
INTEGER64 * pibot_ptr;
INTEGER64 low,high;
int ofs;
MX_IP_V * ret_v;
BASE_TYPE ** ret_ptr;
INTEGER64 lev;

int hit[5],all,pt;
double deb;
	c = cp->c;
	m = c->m;
	v = cp->inp_buf;
	len = v->len;
	levels = v->levels;
	level = cp->inp_level;
	data = (BASE_TYPE**)v->data;
	dim = m->p.dim;
	tot = 1<<dim;

	dest = 0;
	for ( i = 0 ; i < c->ds_len ; i ++ ) {
		if ( cp->data_ix[i].p != 0 )
			continue;
		if ( cp->data_ix[i].x == MXC_INVALID )
			continue;
		if ( dest < cp->data_ix[i].x )
			dest = cp->data_ix[i].x;
	}
	dest ++;
	
	pibot = mxc_alloc(c,sizeof(INTEGER64*)*(dim+1));
	for ( i = 0 ; i < dim + 1 ; i ++ )
		pibot[i] = mxc_alloc(c,sizeof(INTEGER64)*len);
	width = mxc_alloc(c,sizeof(INTEGER64*)*dim);
	for ( i = 0 ; i < dim ; i ++ )
		width[i] = mxc_alloc(c,sizeof(INTEGER64)*len);
	corner = mxc_alloc(c,sizeof(BASE_TYPE*)*tot*dest);
	if ( cp->ret_buf == 0 )
		for ( i = 0 ; i < tot*dest ; i ++ )
			corner[i] = mxc_alloc(c,sizeof(BASE_TYPE)*len);
	else {
		ret_v = cp->ret_buf;
		ret_ptr = (BASE_TYPE**)ret_v->data;
		for ( i = 0 ; i < dest ; i ++ )
			corner[i] = ret_ptr[i];
		for ( ; i < tot*dest ; i ++ )
			corner[i] = mxc_alloc(c,sizeof(BASE_TYPE)*len);
	}
	target = mxc_alloc(c,sizeof(INTEGER64)*(dim+1));
	target_ix = mxc_alloc(c,sizeof(int)*dim);
	data_buf = mxc_alloc(c,sizeof(BASE_TYPE)*dest);

	p1 = *cp;
	p1.data_ptrs[0] = data_buf;
	p1.ofs = target_ix;
	p1.dc = target;
	
ss_printf("vec-1\n");
hit[0] = hit[1] = hit[2] = hit[3] = all = 0;

	for ( i = 0 ; i < len ; i ++ ) {
all++;
		if ( levels )
			lev = levels[i];
		else	lev = level;
pt = 0;
		if ( i == 0 )
			goto get_point;
pt = 1;
		if ( pibot[0][i-1] != lev )
			goto get_point;
		for ( j = 0 ; j < dim ; j ++ ) {
			p = pibot[j+1][i-1];
			d = data[j][i];
pt = 2;
			if ( p > d )
				goto get_point;
			w = width[j][i-1];
pt = 3;
			if ( p + w <= d )
				goto get_point;
			pibot[j+1][i] = p;
			width[j][i] = w;
		}
		pibot[0][i] = pibot[0][i-1];
		for ( j = 0 ; j < tot*dest ; j ++ )
			corner[j][i] = corner[j][i-1];
		continue;
	get_point:
hit[pt] ++;
		target[0] = lev;
		for ( j = 0 ; j < dim ; j ++ ) {
			target[j+1] = p = data[j][i];
			width[j][i] = w = 1<<(lev * m->dim_divide[j]);
			pibot[j+1][i] = p & (- w);
/*
if ( i == 0 )
ss_printf(">pp(%i)>>> %lli %lli -  %f\n",j,pibot[j+1][0],width[j][0],data[j][0]);
*/
		}
		pibot[0][i] = lev;
		for ( j = 0 ; j < tot ; j ++ ) {
			for ( k = 0 ; k < dim ; k ++ ) {
				if ( j & (1<<k) )
					target_ix[k] = 1;
				else	target_ix[k] = 0;
			}
			if ( read_mx_cache(&p1) < 0 ) {
				for ( k = 0 ; k < dest ; k ++ )
					corner[dest*j + k][i] = 0;
			}
			else {
				for ( k = 0 ; k < dest ; k ++ ) {
					deb = corner[dest*j + k][i] = data_buf[k];
/*
if ( i == 0 )
ss_printf(">>>> %i -  %f\n",dest*j + k,deb);
*/
				}
			}
		}
	}
ss_printf("vec-2 %i :: %i %i %i %i\n",all,hit[0],hit[1],hit[2],hit[3]);

	for ( j = dim-1 ; j >= 0 ; j -- ) {
		width_ptr = width[j];
		data_ptr = data[j];
		pibot_ptr = pibot[j+1];
		for ( i = 0 ; i < len ; i ++ ) {
			w = width_ptr[i];
			mid = data_ptr[i];
			p = pibot_ptr[i];
			low = p;
			high = low + w;
			if ( mid == low ) {
				;
			}
			else if ( mid == high ) {
				ofs = 1<<j;
				for ( k = 0 ; k < ofs ; k ++ )
					for ( n = 0 ; n < dest ; n ++ )
{
						corner[dest*k + n][i]
							= corner[dest*(k+ofs) + n][i];
/*
if ( i == 0 )
ss_printf("===1= %i %f\n",dest*k+ n,corner[dest*k + n][0]);
*/
}
			}
			else {
				ofs = 1<<j;
				for ( k = 0 ; k < ofs ; k ++ )
					for ( n = 0 ; n < dest ; n ++ ) {
						corner[dest*k + n][i] =
						(corner[dest*k + n][i]*(high - mid)
						  + corner[dest*(k+ofs) + n][i]*
						  		(mid - low))/w;
/*
if ( i == 0 )
ss_printf("===2= %i %i %f (%f %lli %lli %lli)\n",ofs,dest*k+ n,corner[dest*k + n][0],mid,low,high,w);
*/
					}
			}
		}
	}

	if ( cp->ret_buf == 0 ) {
		ret_v = mxc_alloc(c,sizeof(*ret_v));
		memset(ret_v,0,sizeof(*ret_v));
		ret_v->len = len;
		ret_v->data = mxc_alloc(c,sizeof(BASE_TYPE*)*dest);
		ret_ptr = (BASE_TYPE**)ret_v->data;
		for ( i = 0 ; i < dest ; i ++ )
			ret_ptr[i] = corner[i];
		cp->ret_buf = ret_v;
	}
ss_printf("vec-3\n");

	mxc_free(c,data_buf);
	mxc_free(c,target);
	mxc_free(c,target_ix);
	for ( i = 0 ; i < dim + 1 ; i ++ )
		mxc_free(c,pibot[i]);
	mxc_free(c,pibot);
	for ( i = dest ; i < tot*dest ; i ++ )
		mxc_free(c,corner[i]);
	mxc_free(c,corner);
	for ( i = 0 ; i < dim ; i ++ )
		mxc_free(c,width[i]);
	mxc_free(c,width);
	return 0;
}


void *
read_mx_ipdiff_double2(MX_CACHE_PARAM * p)
{
MX_IP_WORK * _w, * ww;
int i,j,k;
int n;
int tot;

int * target_ix;
MX_CACHE_PARAM p1;
INTEGER64 low,high,_mid,width;
double mid;
void * ret;
double * ptr;
int ofs;
INTEGER64 * target;
MATRIX * m;
int ds_len;
int level;
BASE_TYPE * point;
MATRIX_ALLOC_VECTOR_PARAM vp;
int ix[2];
int * ix_ref;
MATRIX_DH_SET ds;
int m_ix[2];

	m = p->c->m;
	ds_len = p->c->ds_len;
	point = p->inp_buf;
	level = p->inp_level;
	ret = 0;

	target = mxc_alloc(p->c,sizeof(INTEGER64)*(m->p.dim+1));
	if ( point ) {
		for ( i = 0 ; i < m->p.dim ; i ++ )
			target[i+1] = point[i];
		target[0] = level;
	}
	else {
		for ( i = 0 ; i <= m->p.dim+1 ; i ++ )
			target[i] = p->dc[i];
		target[0] = level;
	}
	for ( i = 0 ; i < m->p.dim ; i ++ ) {
		if ( target[i+1] < 0 ) {
			mxc_free(p->c,target);
			return 0;
		}
	}
	target_ix = mxc_alloc(p->c,sizeof(int)*(m->p.dim));
	p1 = *p;
	p1.dc = target;
	p1.ofs = target_ix;

	ix_ref = mxc_alloc(p->c,sizeof(int)*ds_len);
	ix[0] = m->p.dim;
	ix[1] = 0;
	for ( i = 0 ; i < ds_len ; i ++ ) {
		if( p->data_ix[i].x == MXC_INVALID )
			continue;
		ix_ref[ix[1]] = p->data_ix[i].x;
		ix[1] ++;
	}
	for ( i = ix[1] ; i < ds_len ; i ++ )
		ix_ref[i] = -1;

	vp.dim = 2;
	vp.ix_size = &ix[0];
	vp.default_data = 0;
	vp.mxc = p->c;
	ret = mxt_alloc_vector(
			&mx_type_double_v,
			MD_CALLOC,
			&vp,0);
	get_matrix_dh_set(&ds,ret);
	ptr = ds.offset;

	_w = mxc_alloc(p->c,sizeof(MX_IP_WORK)*(tot = 1<<m->p.dim));
	memset(_w,0,sizeof(MX_IP_WORK)*(tot));
	ww = mxc_alloc(p->c,sizeof(MX_IP_WORK)*(tot = 1<<m->p.dim));
	memset(ww,0,sizeof(MX_IP_WORK)*(tot));
	for ( i = 0 ; i < tot ; i ++ ) {
		_w[i].data = mxc_alloc(p->c,sizeof(BASE_TYPE)*ds_len);
		memset(_w[i].data,0,sizeof(BASE_TYPE)*ds_len);

		for ( j = 0 ; j < m->p.dim ; j ++ )
			if ( i & (1<<j) )
				target_ix[j] = 1;
			else	target_ix[j] = 0;

		p1.data_ptrs[0] = _w[i].data;
		if ( read_mx_cache(&p1) < 0 ) {
			mxc_free(p->c,ret);
			ret = 0;
			goto err;
		}
		ww[i].data = mxc_alloc(p->c,sizeof(BASE_TYPE)*ds_len);
		memset(ww[i].data,0,sizeof(BASE_TYPE)*ds_len);
	}
	for ( n = 0 ; n < m->p.dim ; n ++ ) {
		width = (((INTEGER64)1)<<
			(p1.dc[0] * m->dim_divide[n]));
		for ( i = 0 ; i < tot ; i ++ ) {
			for ( k = 0 ; k < ds_len ; k ++ ) {
				if ( i & (1<<n) ) {
					ww[i].data[k] = 
						(_w[i].data[k] 
						- _w[i & (~(1<<n))].data[k])/
						width;
				}
				else {
					ww[i].data[k] = 
						(_w[i |(1<<n)].data[k]
						- _w[i].data[k])/
						width;
				}
			}
		}
		for ( i = m->p.dim-1 ; i >= 0 ; i -- ) {
			_mid = target[i+1];
			if ( point )
				mid = point[i];
			else	mid = p->dc[i+1];
			width = (((INTEGER64)1)<<
				(p1.dc[0] * m->dim_divide[i]));
			low = _mid & (- width);
			high = low + width;
			if ( mid == low ) {
				;
			}
			else if ( mid == high ) {
				ofs = 1<<i;
				for ( j = 0 ; j < ofs ; j ++ )
					for ( k = 0 ; k < ds_len ; k ++ )
						ww[j].data[k] = ww[j+ofs].data[k];
			}
			else {
				ofs = 1<<i;
				for ( j = 0 ; j < ofs ; j ++ )
					for ( k = 0 ; k < ds_len ; k ++ ) {
						ww[j].data[k] = 
						(ww[j].data[k]*(high - mid)
							+ ww[j+ofs].data[k]*
								(mid - low))/
							width;
					}
			}
		}
		for ( j = 0 ; j < ds_len ; j ++ ) {
			if ( ix_ref[j] < 0 )
				break;
			m_ix[0] = j;
			m_ix[1] = n;
			ptr[get_seq_from_ix(&m_ix[0],ds.ix,2)] = ww[0].data[ix_ref[j]];
		}
	}
	
err:

	for ( i = 0 ; i < tot ; i ++ ) {
		if ( ww[i].data )
			mxc_free(p->c,ww[i].data);
		if ( _w[i].data )
			mxc_free(p->c,_w[i].data);
	}
	mxc_free(p->c,_w);
	mxc_free(p->c,ww);
	mxc_free(p->c,target_ix);
	mxc_free(p->c,target);
	mxc_free(p->c,ix_ref);

	return ret;
}



void *
read_mx_ipdiff_double(MX_CACHE_PARAM * p)
{
MX_CACHE_PARAM p1;
BASE_TYPE * data1, * data2;
int i,j;
MATRIX * m;
void * ret;
MATRIX_ALLOC_VECTOR_PARAM vp;
int ix[2];
int * ix_ref;
INTEGER64 delta;
BASE_TYPE diff;
MATRIX_DH_SET ds;
BASE_TYPE * ptr;
int m_ix[2];
INTEGER64 * target;
int ds_len;
BASE_TYPE * point;
int level;
int * target_ix;
//char bit;
//MX_CACHE_PARAM_IX tmp;


	point = p->inp_buf;

	level = p->inp_level;
	m = p->c->m;
	ds_len = p->c->ds_len;

	target = mxc_alloc(p->c,sizeof(INTEGER64)*(m->p.dim+1));
	target_ix = mxc_alloc(p->c,sizeof(int)*(m->p.dim));

	data1 = mxc_alloc(p->c,sizeof(BASE_TYPE)*ds_len);
	data2 = mxc_alloc(p->c,sizeof(BASE_TYPE)*ds_len);
	ix_ref = mxc_alloc(p->c,sizeof(int)*ds_len);

	p1 = *p;
	p1.dc = target;
	p1.ofs = 0;
	p1.inp_level = level;
	p1.inp_buf = 0;

	ix[0] = m->p.dim;
	ix[1] = 0;
	for ( i = 0 ; i < ds_len ; i ++ ) {
		if( p->data_ix[i].x == MXC_INVALID )
			continue;
		ix_ref[ix[1]] = p->data_ix[i].x;
		ix[1] ++;
	}
	for ( i = ix[1] ; i < ds_len ; i ++ )
		ix_ref[i] = -1;

	vp.dim = 2;
	vp.ix_size = &ix[0];
	vp.default_data = 0;
	vp.mxc = p->c;
	ret = mxt_alloc_vector(
			&mx_type_double_v,
			MD_CALLOC,
			&vp,0);
	get_matrix_dh_set(&ds,ret);
	ptr = ds.offset;

	if ( point ) {
		for ( j = 0 ; j < m->p.dim ; j ++ )
			target[j+1] = point[j];
		target[0] = level;
	}
	else {
		for ( j = 0 ; j <= m->p.dim+1 ; j ++ )
			target[j] = p->dc[j];
		target[0] = level;
	}
	for ( j = 0 ; j < m->p.dim ; j ++ ) {
		if ( target[j+1] < 0 )
			goto finish;
		delta = ((INTEGER64)1)<<
				(target[0] * m->dim_divide[j]);
		target[j+1] = ((INTEGER64)target[j+1]) & (-delta);
	}
//	tmp = p1.data_ix[0];
	p1.data_ptrs[0] = data1;
/*
	p1.data_ptrs[1] = &bit;
	p1.data_ix[0].x = 0;
	p1.data_ix[0].p = 1;
*/
	p1.ofs = 0;
	if ( read_mx_cache(&p1) < 0 ) {
		mxc_free(p->c,ret);
		ret = 0;

//p1.data_ix[0] = tmp;

		goto finish;
	}
//p1.data_ix[0] = tmp;

	for ( i = 0 ; i < m->p.dim ; i ++ ) {


		p1.data_ptrs[0] = data2;
		p1.ofs = target_ix;
		for ( j = 0 ; j < m->p.dim ; j ++ ) {
			if ( j == i )
				target_ix[j] = 1;
			else	target_ix[j] = 0;
		}

		if ( read_mx_cache(&p1) < 0 ) {
			mxc_free(p->c,ret);
			ret = 0;
			goto finish;
		}

		for ( j = 0 ; j < ds_len ; j ++ ) {
			if ( ix_ref[j] < 0 )
				break;
//ss_printf("RES %i %i %i %f %f (%lli)\n",i,j,ix_ref[j],data1[ix_ref[j]],data2[ix_ref[j]],target[0]);
			delta = ((INTEGER64)1)<<
					(target[0] * m->dim_divide[j]);
			diff = (data2[ix_ref[j]] - data1[ix_ref[j]])/delta;
			m_ix[0] = j;
			m_ix[1] = i;
			ptr[get_seq_from_ix(&m_ix[0],ds.ix,2)] = diff;
		}
	}

finish:

	mxc_free(p->c,target);
	mxc_free(p->c,data1);
	mxc_free(p->c,data2);
	mxc_free(p->c,target_ix);
	mxc_free(p->c,ix_ref);
//	mxc_free(p->c,p1.dc);
	return ret;
}


void
print_ret_buf(char * str,MX_CACHE_PARAM * p)
{
MATRIX_DH_SET ret_ds;
BASE_TYPE * rp;
int i;
	get_matrix_dh_set(&ret_ds,p->ret_buf);
	rp = ret_ds.offset;
	ss_printf("%s\n",str);
	for ( i = 0 ; i < ret_ds.total_element ; i ++ )
		ss_printf("\t%f\n",rp[i]);
}


double
matrix_newton_double_loop(
	MX_CACHE_PARAM * p,
	int detail,
	double tr)
{
MX_CACHE_PARAM p1;
void * diff;
MATRIX * m;
int i,j;
int ds_len;
void * y_data;
MATRIX_ALLOC_VECTOR_PARAM vp;
int ix[2];
MATRIX_DH_SET y_ds;
MATRIX_DH_SET ret_ds;
MATRIX_DH_SET b3_ds;
MATRIX_DH_SET diff_ds;
MATRIX_DH_SET avg_ds;
BASE_TYPE * y_ptr,* ret_ptr, * b3_ptr;
void * b1,* b2,* b3;
double tt,a;
int m_ix[2];
	m = p->c->m;

	ds_len = p->c->ds_len;

	ix[0] = 1;
	ix[1] = m->p.dim;
	vp.dim = 2;
	vp.ix_size = ix;
	vp.default_data = 0;
	vp.mxc = p->c;
	y_data = mxt_alloc_vector(
			&mx_type_double_v,
			MD_CALLOC,
			&vp,0);
	get_matrix_dh_set(&y_ds,y_data);
	y_ptr = y_ds.offset;

	get_matrix_dh_set(&ret_ds,p->ret_buf);
	ret_ptr = ret_ds.offset;

	p1 = *p;
//	p1.data = mxc_alloc(p->c,sizeof(void*)*ds_len);
	p1.inp_buf = ret_ptr;
	p1.inp_level = p->inp_level;
	p1.data_ptrs[0] = y_ptr;
	p1.dc = 0;
/*
	p1.dc = mxc_alloc(p->c,sizeof(INTEGER64)*(m->p.dim+1));
	for ( i = 0 ; i < m->p.dim ; i ++ )
		p1.dc[i+1] = ret_ptr[i];
	p1.dc[0] = p->inp_level;
*/
	diff = 0;
	if ( read_mx_ip_double(&p1) < 0 )
		goto outover;
	switch ( detail ) {
	case 0:
		diff = read_mx_ipdiff_double(&p1);
		break;
	case 1:
		diff = read_mx_ipdiff_double2(&p1);
		break;
	case 2:
		diff = 0;
		break;
	}
/*
ss_printf("D-1 %p %s\n",p->inp_buf,mxt_print(&mx_type_double_v,p->inp_buf,0));
ss_printf("D-2 %p %s\n",p->avg_mtx,mxt_print(&mx_type_double_v,p->avg_mtx,0));
ss_printf("D-3 %p %s\n",y_data,mxt_print(&mx_type_double_v,y_data,0));
*/
	if( diff == 0 ) {
	outover:
		b3 = resolve_equation_double(
			p->avg_mtx,
			b2=mxt_compound_vv(p->inp_buf,
				b1=mxt_e_m(&mx_type_double_v,1,p->c),
				1,p->c),p->c);
	}
	else {
	retry:

		b2 = resolve_equation_double(
			diff,b1=mxt_sub_vv(p->inp_buf,y_data,p->c),p->c);
		if ( b2 == 0 ) {
			mxc_free(p->c,b1);
			get_matrix_dh_set(&diff_ds,diff);
			get_matrix_dh_set(&avg_ds,p->avg_mtx);
			for ( i = 0 ; i < diff_ds.ix[0] ; i ++ )
				for ( j = 0 ; j < diff_ds.ix[1] ; j ++ ) {
					m_ix[0] = i;
					m_ix[1] = j;
					((BASE_TYPE*)diff_ds.offset)
					[get_seq_from_ix(&m_ix[0],diff_ds.ix,2)]
					= ((BASE_TYPE*)avg_ds.offset)
					   [get_seq_from_ix(&m_ix[0],avg_ds.ix,2)];
				}
			goto retry;
		}
		b3 = mxt_add_vv(b2,p->ret_buf,p->c);
	}
	get_matrix_dh_set(&b3_ds,b3);
	b3_ptr = b3_ds.offset;
	tt = 0;
	for ( i = 0 ; i < ret_ds.ix[0]*ret_ds.ix[1] ; i ++ ) {
		a = ret_ptr[i] - b3_ptr[i];
		tt += a*a;
		ret_ptr[i] = b3_ptr[i];
//ss_printf("RET %i %f\n",i,ret_ptr[i]);
	}
	mxc_free(p->c,b1);
	mxc_free(p->c,b2);
	mxc_free(p->c,b3);
	mxc_free(p->c,y_data);
	if ( diff ) {
		mxc_free(p->c,diff);
	}
//	mxc_free(p->c,p1.dc);
	return sqrt(tt);
}


#define G_RAND 5
#define G_RATE 2

void
generate_rand(MX_CACHE_PARAM * p,double tr)
{
MATRIX * m;
BASE_TYPE * ret_ptr;
MATRIX_DH_SET ret_ds;
int i;
double * point;
int * target,* inc,* start_pos,*end_pos;
MX_CACHE_PARAM p1;
BASE_TYPE * retdata;
double _tr,fit_tr,a;
int * fit_index;
int gr;

BASE_TYPE * inp_ptr;
MATRIX_DH_SET inp_ds;

	m = p->c->m;
	get_matrix_dh_set(&ret_ds,p->ret_buf);
	ret_ptr = ret_ds.offset;
	get_matrix_dh_set(&inp_ds,p->inp_buf);
	inp_ptr = inp_ds.offset;

	target = mxc_alloc(p->c,sizeof(int)*(m->p.dim));
	start_pos = mxc_alloc(p->c,sizeof(int)*(m->p.dim));
	end_pos = mxc_alloc(p->c,sizeof(int)*(m->p.dim));
	inc = mxc_alloc(p->c,sizeof(int)*(m->p.dim));
	fit_index = mxc_alloc(p->c,sizeof(int)*(m->p.dim));
	point = mxc_alloc(p->c,sizeof(BASE_TYPE)*(m->p.dim));
	retdata = mxc_alloc(p->c,sizeof(BASE_TYPE)*m->p.dim);

retry:
	gr = tr/G_RAND * G_RATE;

	for ( i = 0 ; i < m->p.dim ; i ++ ) {
		target[i] = start_pos[i] = - G_RAND;
		end_pos[i] = G_RAND+1;
		inc[i] = 1;
	}
	p1 = *p;
	p1.data_ptrs[0] = retdata;
	p1.dc = 0;

	memset(fit_index,0,sizeof(int)*m->p.dim);
	fit_tr = -1;
	for ( ; ; ) {
		for ( i = 0 ; i < m->p.dim ; i ++ )
			point[i] = ret_ptr[i] + target[i] * gr;
		p1.inp_buf = point;
		if ( read_mx_ip_double(&p1) >= 0) {
			_tr = 0;
			for ( i = 0 ; i < m->p.dim ; i ++ ) {
				a = retdata[i] - inp_ptr[i];
				_tr += a * a;
			}
			_tr = sqrt(_tr);
			if ( fit_tr < 0 || fit_tr > _tr ) {
				fit_tr = _tr;
				for ( i = 0 ; i < m->p.dim ; i ++ )
					fit_index[i] = target[i];
			}
		}
		if ( inc_ix(target,start_pos,inc,end_pos,m->p.dim) )
			break;
	}
	for ( i = 0 ; i < m->p.dim ; i ++ )
		ret_ptr[i] += fit_index[i] * gr;
	if ( tr >= p->tolerance ) {
		tr = tr/2;
		goto retry;
	}
	p->result_y_tolerance = fit_tr;
	mxc_free(p->c,target);
	mxc_free(p->c,start_pos);
	mxc_free(p->c,end_pos);
	mxc_free(p->c,inc);
	mxc_free(p->c,point);
	mxc_free(p->c,fit_index);
	mxc_free(p->c,retdata);

	return;
}

int 
matrix_newton_double(
	MX_CACHE_PARAM * p)
{
double tr;
int rate;
int retry,lev0_retry;
int f;
int gr_try;
	f = 1;
	retry = 0;
	lev0_retry = 0;
	gr_try = 0;
	tr = -1;
	p->newton_status = MXCS_NORMAL;
	p->result_y_tolerance = 0;
	for ( ; ; ) {


		if ( p->inp_level == 0 ) {

			if ( lev0_retry > 25 ) {
				p->newton_status = MXCS_RAND;
				if ( (lev0_retry % 10) == 0  ) {
					generate_rand(p,tr);
					if ( p->y_tolerance &&
						p->result_y_tolerance < p->y_tolerance ){
						break;
					}
					gr_try ++;
					if ( gr_try >= 2 ) {
						p->newton_status = MXCS_STOP;
						break;
					}
				}
				tr = matrix_newton_double_loop(p,1,tr);
			}
			else 

			if ( lev0_retry > p->try_avg * 1.2 &&
				lev0_retry > 10 ) {
		
				p->newton_status = MXCS_DETAIL;
				tr = matrix_newton_double_loop(p,1,tr);
			}
			else {
				tr = matrix_newton_double_loop(p,0,tr);
			}

		}
		else	tr = matrix_newton_double_loop(p,0,tr);


//			tr = matrix_newton_double_loop(p,1);

		p->result_tolerance = tr;
		if ( tr < p->tolerance && p->inp_level == 0 ) {
			break;
		}
		if ( p->try_max && retry >= p->try_max ) {
			p->newton_status = MXCS_STOP;
			break;
		}
		retry ++;
		if ( p->inp_level == 0 )
			lev0_retry ++;
		else	lev0_retry = 0;
		if ( f ) {
			rate = tr/p->tolerance;
			rate = rate >> p->c->m->block_size[0];
			for ( ; rate ; rate = rate >> 1 , p->inp_level ++ );
		}
		f = 0;
		p->inp_level --;
		if ( p->inp_level >= p->c->m->total_levels )
			p->inp_level = p->c->m->total_levels - 1;
		if ( p->inp_level < 0 )
			p->inp_level = 0;
	}
	p->try_avg = p->try_avg * 0.9 + retry * 0.1;
	return 0;
}

void * 
resolve_equation_double(void * a,void * v,MX_CACHE * c)
{
MATRIX_DH_SET a_ds,v_ds,ret_ds;
void * ret;
int size;
int f;
BASE_TYPE * a_ptr,* v_ptr, * ret_ptr;
MATRIX_ALLOC_VECTOR_PARAM vp;
int ix[2];
int i,j;
int _i,_j;
BASE_TYPE max,d;
int max_i;
MATRIX_DH_SET _a_ds,_v_ds,_ret_ds;
void * _a,*_v,*_ret;
BASE_TYPE * _v_ptr,* _a_ptr,* _ret_ptr;
BASE_TYPE k,v_acc;

	get_matrix_dh_set(&a_ds,a);
	get_matrix_dh_set(&v_ds,v);
	f = 0;

	if ( a_ds.hd->dim != 2 )
		return 0;
	if ( a_ds.ix[0] != a_ds.ix[1] )
		return 0;
	if ( v_ds.tp != a_ds.tp )
		return 0;
	size = a_ds.ix[0];
	switch ( v_ds.hd->dim ) {
	case 1:
		if ( v_ds.ix[0] != size )
			return 0;
		v = mxt_get_matrix_from_ary(
			v_ds.tp,v_ds.offset,size,2,c);
		f = 1;
		break;
	case 2:
		if ( v_ds.ix[0] == 1 && v_ds.ix[1] == size )
			break;
		if ( v_ds.ix[0] == size && v_ds.ix[1] == 1 ) {
			v = mxt_trans_m(v,c);
			get_matrix_dh_set(&v_ds,v);
			f = 1;
			break;
		}
		return 0;
	default:
		return 0;
	}

	ix[0] = 1;
	ix[1] = size;
	vp.ix_size = ix;
	vp.dim = 2;
	vp.default_data = 0;
	ret = mxt_alloc_vector(
			v_ds.tp,
			MD_CALLOC,
			&vp,c);
	get_matrix_dh_set(&ret_ds,ret);
	ret_ptr = ret_ds.offset;

	a_ptr = a_ds.offset;
	v_ptr = v_ds.offset;
	if ( size == 1 ) {
		if ( *a_ptr == 0 ) {
			mxc_free(c,ret);
			return 0;
		}
		*ret_ptr = (*v_ptr)/(*a_ptr);
		return ret;
	}

	max = -1;
	max_i = -1;
	for ( i = 0 ; i < size ; i ++ ) {
		d = a_ptr[size*i+i];
		if ( d < 0 )
			d = -d;
		if ( max < d ) {
			max = d;
			max_i = i;
		}
	}
	if ( max == 0 ) {
		mxc_free(c,ret);
		return 0;
	}
	max = a_ptr[size*max_i + max_i];

	ix[0] = 1;
	ix[1] = size-1;
	vp.ix_size = ix;
	vp.dim = 2;
	vp.default_data = 0;
	_v = mxt_alloc_vector(
			v_ds.tp,
			MD_CALLOC,
			&vp,c);
	get_matrix_dh_set(&_v_ds,_v);
	_v_ptr = _v_ds.offset;

	ix[0] = size-1;
	ix[1] = size-1;
	vp.ix_size = ix;
	vp.dim = 2;
	vp.default_data = 0;
	_a = mxt_alloc_vector(
			v_ds.tp,
			MD_CALLOC,
			&vp,c);
	get_matrix_dh_set(&_a_ds,_a);
	_a_ptr = _a_ds.offset;

	/* i :: tate */
	for ( i = 0 ; i < size ; i ++ ) {
		if ( i == max_i )
			continue;
		if ( i < max_i )
			_i = i;
		else	_i = i-1;
		k = max/a_ptr[i*size+max_i];
		if ( a_ptr[i*size+max_i] == 0 ) {
			return 0;
		}
		for ( j = 0 ; j < size ; j ++ ) {
			if ( j == max_i )
				continue;
			if ( j < max_i )
				_j = j;
			else	_j = j-1;

			_a_ptr[_i*(size-1) + _j]
				= a_ptr[i*size + j]*k - 
					a_ptr[max_i*size + j];
		}
		_v_ptr[_i] = v_ptr[i]*k - v_ptr[max_i];
	}
	_ret = resolve_equation_double(_a,_v,c);
	mxc_free(c,_a);
	mxc_free(c,_v);
	if ( _ret == 0 ) {
		mxc_free(c,ret);
		return 0;
	}
	get_matrix_dh_set(&_ret_ds,_ret);
	_ret_ptr = _ret_ds.offset;

	v_acc = 0;
	for ( i = 0 ; i < size ; i ++ ) {
		if ( i == max_i )
			continue;
		if ( i < max_i )
			_i = i;
		else	_i = i-1;
		ret_ptr[i] = _ret_ptr[_i];
		v_acc += _ret_ptr[_i] * a_ptr[max_i*size + i];
	}
	ret_ptr[max_i] = (v_ptr[max_i] - v_acc)/max;

	mxc_free(c,_ret);

	return ret;
}

void
mxt_print_double(MATRIX_DATA_TYPE*tp,MATRIX_STRING_BUFFER*b,void*d,char*fmt)
{
char buffer[1000];
	if ( fmt == 0 ) {
		sprintf(buffer,"%f",*(BASE_TYPE*)d);
	}
	else {
		sprintf(buffer,fmt,*(BASE_TYPE*)d);
	}
	out_matrix_string_buffer(b,buffer);
}




void *
mxt_reverse_double(void * a,MX_CACHE * c)
{

void * inp,* ret,* _ret;
BASE_TYPE * inp_ptr,* ret_ptr,* _ret_ptr;
MATRIX_ALLOC_VECTOR_PARAM vp;
int ix[2];
int i,j;
MATRIX_DH_SET a_ds,inp_ds,ret_ds,_ret_ds;
int size;
BASE_TYPE zero;

	get_matrix_dh_set(&a_ds,a);

	zero = 0;
	ix[0] = size = a_ds.ix[0];
	vp.ix_size = ix;
	vp.dim = 1;
	vp.default_data = &zero;
	vp.mxc = c;
	inp = mxt_alloc_vector(
			&mx_type_double_v,
			MD_CALLOC,
			&vp,0);
	get_matrix_dh_set(&inp_ds,inp);
	inp_ptr = inp_ds.offset;

	ix[0] = size;
	ix[1] = size;
	vp.ix_size = ix;
	vp.dim = 2;
	vp.default_data = &zero;
	ret = mxt_alloc_vector(
			&mx_type_double_v,
			MD_CALLOC,
			&vp,0);
	get_matrix_dh_set(&ret_ds,ret);
	ret_ptr = ret_ds.offset;


	for ( i = 0 ; i < size ; i ++ ) {
		inp_ptr[i] = 1;
		_ret = resolve_equation_double(a,inp,c);
		if ( _ret == 0 ) {
			mxc_free(c,ret);
			ret = 0;
			goto end;
		}
		inp_ptr[i] = 0;
		get_matrix_dh_set(&_ret_ds,_ret);
		_ret_ptr = _ret_ds.offset;
		for ( j = 0 ; j < size ; j ++ )
			ret_ptr[j + i * size] = _ret_ptr[j];
		mxc_free(c,_ret);
	}

end:
	mxc_free(c,inp);
	return ret;
}

void *
convert_ex_matrix_double(EX_MATRIX * ex,MX_CACHE * c)
{
int ix[2];
int i,j;
MATRIX_DH_SET ds;
MATRIX_ALLOC_VECTOR_PARAM vp;
double zero;
void * ret;
double * ret_ptr;
	ix[0] = ex->h_size;
	ix[1] = ex->v_size;
	zero = 0;
	vp.dim = 2;
	vp.ix_size = ix;
	vp.default_data = &zero;
	vp.mxc = c;
	ret = mxt_alloc_vector(
			&mx_type_double_v,
			MD_CALLOC,
			&vp,0);
	get_matrix_dh_set(&ds,ret);
	ret_ptr = ds.offset;
			
	for ( i = 0 ; i < ex->v_size ; i ++ )
		for ( j = 0 ; j < ex->h_size ; j ++ ) {
			ret_ptr[j + i * ex->h_size] = XD(ex,i,j);
		}
	return ret;
}


