/*************************************************************************************************/
/*!
   	@file		pp_cpp_texture_weight16.h
	@author 	Fanzo
 	@date 		2008/4/20
*/
/*************************************************************************************************/
#pragma		once

///////////////////////////////////////////////////////////////////////////////////////////////////
//include files


#pragma pack( push , 8 )		//set align

namespace icubic
{
//=================================================================================================
// weight16
//=================================================================================================
//=================================================================================================
cb_inline
void weight16_frgb
		(
		pp_pixel_calc*	c , 
		pp_pixel_calc	c1 , 
		pp_pixel_calc	c2 , 
		pp_pixel_calc	c3 , 
		pp_pixel_calc	c4 , 
		uint8			d , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	// c=tbl2[d]*c1 + tbl1[d]*c2 + tbl1[1-d]*c3 + tbl2[1-d]*c4
	MulPixel_frgb( &c1 , weighttbl2[ d ] );
	MulPixel_frgb( &c2 , weighttbl1[ d ] );
	MulPixel_frgb( &c3 , weighttbl1[ 256 - d ] );
	MulPixel_frgb( &c4 , weighttbl2[ 256 - d ] );
	AddPixel_frgb( &c1 , c2 );
	AddPixel_frgb( &c1 , c3 );
	AddPixel_frgb( &c1 , c4 );
	*c	= c1;
}
//=================================================================================================
cb_inline
void weight16_frgba
		(
		pp_pixel_calc*	c , 
		pp_pixel_calc	c1 , 
		pp_pixel_calc	c2 , 
		pp_pixel_calc	c3 , 
		pp_pixel_calc	c4 , 
		uint8			d , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	// c=tbl2[d]*c1 + tbl1[d]*c2 + tbl1[1-d]*c3 + tbl2[1-d]*c4
	MulPixel_frgba( &c1 , weighttbl2[ d ] );
	MulPixel_frgba( &c2 , weighttbl1[ d ] );
	MulPixel_frgba( &c3 , weighttbl1[ 256 - d ] );
	MulPixel_frgba( &c4 , weighttbl2[ 256 - d ] );
	AddPixel_frgba( &c1 , c2 );
	AddPixel_frgba( &c1 , c3 );
	AddPixel_frgba( &c1 , c4 );
	*c	= c1;
}
//=================================================================================================
cb_inline
void weight16_fa
		(
		pp_pixel_calc*	c , 
		pp_pixel_calc	c1 , 
		pp_pixel_calc	c2 , 
		pp_pixel_calc	c3 , 
		pp_pixel_calc	c4 , 
		uint8			d , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	// c=tbl2[d]*c1 + tbl1[d]*c2 + tbl1[1-d]*c3 + tbl2[1-d]*c4
	MulPixel_fa( &c1 , weighttbl2[ d ] );
	MulPixel_fa( &c2 , weighttbl1[ d ] );
	MulPixel_fa( &c3 , weighttbl1[ 256 - d ] );
	MulPixel_fa( &c4 , weighttbl2[ 256 - d ] );
	AddPixel_fa( &c1 , c2 );
	AddPixel_fa( &c1 , c3 );
	AddPixel_fa( &c1 , c4 );
	*c	= c1;
}
//=================================================================================================
// texture weight16
//=================================================================================================

//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_repeat_rgb_rgb
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_rgb();
				LoadAddr_rgb( &c[ cnt ] , psrc + soff );
				rgb_to_frgb( &c[ cnt ] , b_color , c[ cnt ] );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_frgb( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
//		Clip_frgb( &dc );
		
		// store		
		frgb_to_rgb( &dc , dc );
		StoreAddr_rgb( dest , dc );
		
		dest = AddPixelSize_rgb( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_repeat_rgb_rgba
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_rgba();
				LoadAddr_rgba( &c[ cnt ] , psrc + soff );
				rgba_to_frgba( &c[ cnt ] , b_color , c[ cnt ] );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_frgba( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
//		Clip_frgba( &dc );
		
		// store		
		frgba_to_rgb( &dc , dc );
		StoreAddr_rgb( dest , dc );
		
		dest = AddPixelSize_rgb( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_repeat_rgb_a
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_a();
				LoadAddr_a( &c[ cnt ] , psrc + soff );
				a_to_frgb( &c[ cnt ] , b_color , c[ cnt ] );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_frgb( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
//		Clip_frgb( &dc );
		
		// store		
		frgb_to_rgb( &dc , dc );
		StoreAddr_rgb( dest , dc );
		
		dest = AddPixelSize_rgb( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_repeat_rgba_rgb
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_rgb();
				LoadAddr_rgb( &c[ cnt ] , psrc + soff );
				rgb_to_frgb( &c[ cnt ] , b_color , c[ cnt ] );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_frgb( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
//		Clip_frgb( &dc );
		
		// store		
		frgb_to_rgba( &dc , dc );
		StoreAddr_rgba( dest , dc );
		
		dest = AddPixelSize_rgba( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_repeat_rgba_rgba
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_rgba();
				LoadAddr_rgba( &c[ cnt ] , psrc + soff );
				rgba_to_frgba( &c[ cnt ] , b_color , c[ cnt ] );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_frgba( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
//		Clip_frgba( &dc );
		
		// store		
		frgba_to_rgba( &dc , dc );
		StoreAddr_rgba( dest , dc );
		
		dest = AddPixelSize_rgba( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_repeat_rgba_a
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_a();
				LoadAddr_a( &c[ cnt ] , psrc + soff );
				a_to_frgba( &c[ cnt ] , b_color , c[ cnt ] );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_frgba( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
//		Clip_frgba( &dc );
		
		// store		
		frgba_to_rgba( &dc , dc );
		StoreAddr_rgba( dest , dc );
		
		dest = AddPixelSize_rgba( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_repeat_a_rgb
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_rgb();
				LoadAddr_rgb( &c[ cnt ] , psrc + soff );
				rgb_to_fa( &c[ cnt ] , b_color , c[ cnt ] );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_fa( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
//		Clip_fa( &dc );
		
		// store		
		fa_to_a( &dc , dc );
		StoreAddr_a( dest , dc );
		
		dest = AddPixelSize_a( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_repeat_a_rgba
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_rgba();
				LoadAddr_rgba( &c[ cnt ] , psrc + soff );
				rgba_to_fa( &c[ cnt ] , b_color , c[ cnt ] );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_fa( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
//		Clip_fa( &dc );
		
		// store		
		fa_to_a( &dc , dc );
		StoreAddr_a( dest , dc );
		
		dest = AddPixelSize_a( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_repeat_a_a
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_a();
				LoadAddr_a( &c[ cnt ] , psrc + soff );
				a_to_fa( &c[ cnt ] , b_color , c[ cnt ] );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_fa( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
//		Clip_fa( &dc );
		
		// store		
		fa_to_a( &dc , dc );
		StoreAddr_a( dest , dc );
		
		dest = AddPixelSize_a( dest );
		su += du;
		sv += dv;
	}	
}

//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_clamp_rgb_rgb
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_rgb();
				LoadAddr_rgb( &c[ cnt ] , psrc + soff );
				rgb_to_frgb( &c[ cnt ] , b_color , c[ cnt ] );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_frgb( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
//		Clip_frgb( &dc );
		
		// store		
		frgb_to_rgb( &dc , dc );
		StoreAddr_rgb( dest , dc );
		
		dest = AddPixelSize_rgb( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_clamp_rgb_rgba
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_rgba();
				LoadAddr_rgba( &c[ cnt ] , psrc + soff );
				rgba_to_frgba( &c[ cnt ] , b_color , c[ cnt ] );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_frgba( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
//		Clip_frgba( &dc );
		
		// store		
		frgba_to_rgb( &dc , dc );
		StoreAddr_rgb( dest , dc );
		
		dest = AddPixelSize_rgb( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_clamp_rgb_a
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_a();
				LoadAddr_a( &c[ cnt ] , psrc + soff );
				a_to_frgb( &c[ cnt ] , b_color , c[ cnt ] );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_frgb( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
//		Clip_frgb( &dc );
		
		// store		
		frgb_to_rgb( &dc , dc );
		StoreAddr_rgb( dest , dc );
		
		dest = AddPixelSize_rgb( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_clamp_rgba_rgb
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_rgb();
				LoadAddr_rgb( &c[ cnt ] , psrc + soff );
				rgb_to_frgb( &c[ cnt ] , b_color , c[ cnt ] );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_frgb( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
//		Clip_frgb( &dc );
		
		// store		
		frgb_to_rgba( &dc , dc );
		StoreAddr_rgba( dest , dc );
		
		dest = AddPixelSize_rgba( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_clamp_rgba_rgba
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_rgba();
				LoadAddr_rgba( &c[ cnt ] , psrc + soff );
				rgba_to_frgba( &c[ cnt ] , b_color , c[ cnt ] );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_frgba( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
//		Clip_frgba( &dc );
		
		// store		
		frgba_to_rgba( &dc , dc );
		StoreAddr_rgba( dest , dc );
		
		dest = AddPixelSize_rgba( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_clamp_rgba_a
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_a();
				LoadAddr_a( &c[ cnt ] , psrc + soff );
				a_to_frgba( &c[ cnt ] , b_color , c[ cnt ] );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_frgba( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
//		Clip_frgba( &dc );
		
		// store		
		frgba_to_rgba( &dc , dc );
		StoreAddr_rgba( dest , dc );
		
		dest = AddPixelSize_rgba( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_clamp_a_rgb
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_rgb();
				LoadAddr_rgb( &c[ cnt ] , psrc + soff );
				rgb_to_fa( &c[ cnt ] , b_color , c[ cnt ] );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_fa( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
//		Clip_fa( &dc );
		
		// store		
		fa_to_a( &dc , dc );
		StoreAddr_a( dest , dc );
		
		dest = AddPixelSize_a( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_clamp_a_rgba
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_rgba();
				LoadAddr_rgba( &c[ cnt ] , psrc + soff );
				rgba_to_fa( &c[ cnt ] , b_color , c[ cnt ] );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_fa( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
//		Clip_fa( &dc );
		
		// store		
		fa_to_a( &dc , dc );
		StoreAddr_a( dest , dc );
		
		dest = AddPixelSize_a( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_clamp_a_a
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_a();
				LoadAddr_a( &c[ cnt ] , psrc + soff );
				a_to_fa( &c[ cnt ] , b_color , c[ cnt ] );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_fa( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
//		Clip_fa( &dc );
		
		// store		
		fa_to_a( &dc , dc );
		StoreAddr_a( dest , dc );
		
		dest = AddPixelSize_a( dest );
		su += du;
		sv += dv;
	}	
}

//=================================================================================================
// texture weight16 alpha
//=================================================================================================

//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_alpha_repeat_rgb_rgb
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );

		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_rgb();
				LoadAddr_rgb( &c[ cnt ] , psrc + soff );
				rgb_to_frgb( &c[ cnt ] , b_color , c[ cnt ] );
				MulAlpha_frgb( &c[ cnt ] , alpha );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_frgb( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
		
		// store		
		frgb_to_rgb( &dc , dc );
		StoreAddr_rgb( dest , dc );
		
		dest = AddPixelSize_rgb( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_alpha_repeat_rgb_rgba
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );

		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_rgba();
				LoadAddr_rgba( &c[ cnt ] , psrc + soff );
				rgba_to_frgba( &c[ cnt ] , b_color , c[ cnt ] );
				MulAlpha_frgba( &c[ cnt ] , alpha );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_frgba( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
		
		// store		
		frgba_to_rgb( &dc , dc );
		StoreAddr_rgb( dest , dc );
		
		dest = AddPixelSize_rgb( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_alpha_repeat_rgb_a
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );

		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_a();
				LoadAddr_a( &c[ cnt ] , psrc + soff );
				a_to_frgb( &c[ cnt ] , b_color , c[ cnt ] );
				MulAlpha_frgb( &c[ cnt ] , alpha );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_frgb( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
		
		// store		
		frgb_to_rgb( &dc , dc );
		StoreAddr_rgb( dest , dc );
		
		dest = AddPixelSize_rgb( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_alpha_repeat_rgba_rgb
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );

		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_rgb();
				LoadAddr_rgb( &c[ cnt ] , psrc + soff );
				rgb_to_frgb( &c[ cnt ] , b_color , c[ cnt ] );
				MulAlpha_frgb( &c[ cnt ] , alpha );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_frgb( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
		
		// store		
		frgb_to_rgba( &dc , dc );
		StoreAddr_rgba( dest , dc );
		
		dest = AddPixelSize_rgba( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_alpha_repeat_rgba_rgba
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );

		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_rgba();
				LoadAddr_rgba( &c[ cnt ] , psrc + soff );
				rgba_to_frgba( &c[ cnt ] , b_color , c[ cnt ] );
				MulAlpha_frgba( &c[ cnt ] , alpha );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_frgba( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
		
		// store		
		frgba_to_rgba( &dc , dc );
		StoreAddr_rgba( dest , dc );
		
		dest = AddPixelSize_rgba( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_alpha_repeat_rgba_a
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );

		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_a();
				LoadAddr_a( &c[ cnt ] , psrc + soff );
				a_to_frgba( &c[ cnt ] , b_color , c[ cnt ] );
				MulAlpha_frgba( &c[ cnt ] , alpha );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_frgba( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
		
		// store		
		frgba_to_rgba( &dc , dc );
		StoreAddr_rgba( dest , dc );
		
		dest = AddPixelSize_rgba( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_alpha_repeat_a_rgb
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );

		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_rgb();
				LoadAddr_rgb( &c[ cnt ] , psrc + soff );
				rgb_to_fa( &c[ cnt ] , b_color , c[ cnt ] );
				MulAlpha_fa( &c[ cnt ] , alpha );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_fa( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
		
		// store		
		fa_to_a( &dc , dc );
		StoreAddr_a( dest , dc );
		
		dest = AddPixelSize_a( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_alpha_repeat_a_rgba
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );

		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_rgba();
				LoadAddr_rgba( &c[ cnt ] , psrc + soff );
				rgba_to_fa( &c[ cnt ] , b_color , c[ cnt ] );
				MulAlpha_fa( &c[ cnt ] , alpha );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_fa( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
		
		// store		
		fa_to_a( &dc , dc );
		StoreAddr_a( dest , dc );
		
		dest = AddPixelSize_a( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_alpha_repeat_a_a
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );

		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_a();
				LoadAddr_a( &c[ cnt ] , psrc + soff );
				a_to_fa( &c[ cnt ] , b_color , c[ cnt ] );
				MulAlpha_fa( &c[ cnt ] , alpha );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_fa( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
		
		// store		
		fa_to_a( &dc , dc );
		StoreAddr_a( dest , dc );
		
		dest = AddPixelSize_a( dest );
		su += du;
		sv += dv;
	}	
}

//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_alpha_clamp_rgb_rgb
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );

		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_rgb();
				LoadAddr_rgb( &c[ cnt ] , psrc + soff );
				rgb_to_frgb( &c[ cnt ] , b_color , c[ cnt ] );
				MulAlpha_frgb( &c[ cnt ] , alpha );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_frgb( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
		
		// store		
		frgb_to_rgb( &dc , dc );
		StoreAddr_rgb( dest , dc );
		
		dest = AddPixelSize_rgb( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_alpha_clamp_rgb_rgba
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );

		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_rgba();
				LoadAddr_rgba( &c[ cnt ] , psrc + soff );
				rgba_to_frgba( &c[ cnt ] , b_color , c[ cnt ] );
				MulAlpha_frgba( &c[ cnt ] , alpha );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_frgba( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
		
		// store		
		frgba_to_rgb( &dc , dc );
		StoreAddr_rgb( dest , dc );
		
		dest = AddPixelSize_rgb( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_alpha_clamp_rgb_a
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );

		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_a();
				LoadAddr_a( &c[ cnt ] , psrc + soff );
				a_to_frgb( &c[ cnt ] , b_color , c[ cnt ] );
				MulAlpha_frgb( &c[ cnt ] , alpha );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_frgb( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
		
		// store		
		frgb_to_rgb( &dc , dc );
		StoreAddr_rgb( dest , dc );
		
		dest = AddPixelSize_rgb( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_alpha_clamp_rgba_rgb
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );

		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_rgb();
				LoadAddr_rgb( &c[ cnt ] , psrc + soff );
				rgb_to_frgb( &c[ cnt ] , b_color , c[ cnt ] );
				MulAlpha_frgb( &c[ cnt ] , alpha );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_frgb( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgb( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
		
		// store		
		frgb_to_rgba( &dc , dc );
		StoreAddr_rgba( dest , dc );
		
		dest = AddPixelSize_rgba( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_alpha_clamp_rgba_rgba
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );

		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_rgba();
				LoadAddr_rgba( &c[ cnt ] , psrc + soff );
				rgba_to_frgba( &c[ cnt ] , b_color , c[ cnt ] );
				MulAlpha_frgba( &c[ cnt ] , alpha );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_frgba( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
		
		// store		
		frgba_to_rgba( &dc , dc );
		StoreAddr_rgba( dest , dc );
		
		dest = AddPixelSize_rgba( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_alpha_clamp_rgba_a
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );

		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_a();
				LoadAddr_a( &c[ cnt ] , psrc + soff );
				a_to_frgba( &c[ cnt ] , b_color , c[ cnt ] );
				MulAlpha_frgba( &c[ cnt ] , alpha );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_frgba( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_frgba( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
		
		// store		
		frgba_to_rgba( &dc , dc );
		StoreAddr_rgba( dest , dc );
		
		dest = AddPixelSize_rgba( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_alpha_clamp_a_rgb
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );

		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_rgb();
				LoadAddr_rgb( &c[ cnt ] , psrc + soff );
				rgb_to_fa( &c[ cnt ] , b_color , c[ cnt ] );
				MulAlpha_fa( &c[ cnt ] , alpha );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_fa( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
		
		// store		
		fa_to_a( &dc , dc );
		StoreAddr_a( dest , dc );
		
		dest = AddPixelSize_a( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_alpha_clamp_a_rgba
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );

		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_rgba();
				LoadAddr_rgba( &c[ cnt ] , psrc + soff );
				rgba_to_fa( &c[ cnt ] , b_color , c[ cnt ] );
				MulAlpha_fa( &c[ cnt ] , alpha );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_fa( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
		
		// store		
		fa_to_a( &dc , dc );
		StoreAddr_a( dest , dc );
		
		dest = AddPixelSize_a( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight16f_m_alpha_clamp_a_a
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	int32			x[ 4 ] , y[ 4 ];
	pp_pixel_calc	c[ 16 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		uint8	dx , dy;
		WrapNear4_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );

		// load color
		int			cnt = 0;
		int			yoff;
		for( yoff = 0 ; yoff < 4 ; yoff++ )
		{
			int		xoff;
			for( xoff = 0 ; xoff < 4 ; xoff++ )
			{
				int32	soff = y[ yoff ] * src_pitchbyte + x[ xoff ] * PixelSize_a();
				LoadAddr_a( &c[ cnt ] , psrc + soff );
				a_to_fa( &c[ cnt ] , b_color , c[ cnt ] );
				MulAlpha_fa( &c[ cnt ] , alpha );
				cnt++;
			}			
		}
		// weight
		pp_pixel_calc	dc , cc[ 4 ];
		weight16_fa( &cc[ 0 ] , c[ 0 ] , c[ 1 ] , c[ 2 ] , c[ 3 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 1 ] , c[ 4 ] , c[ 5 ] , c[ 6 ] , c[ 7 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 2 ] , c[ 8 ] , c[ 9 ] , c[ 10 ] , c[ 11 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &cc[ 3 ] , c[ 12 ] , c[ 13 ] , c[ 14 ] , c[ 15 ] , dx , weighttbl1 , weighttbl2 );
		weight16_fa( &dc , cc[ 0 ] , cc[ 1 ] , cc[ 2 ] , cc[ 3 ] , dy , weighttbl1 , weighttbl2 );
		
		// store		
		fa_to_a( &dc , dc );
		StoreAddr_a( dest , dc );
		
		dest = AddPixelSize_a( dest );
		su += du;
		sv += dv;
	}	
}

//=================================================================================================
//!	texture weight16f
//!	@retval			---
//-------------------------------------------------------------------------------------------------
cb_inline
void pp_cpp_texture_weight16f_m
		(
		pp_format		destformat , 
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		pp_format		srcformat , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		pp_wraptype		wrap , 
		const float*	weighttbl1 , 
		const float*	weighttbl2
		)
{
	typedef void (*func)( void* , int32 , float , float , float , float , const void* , int32 , int16 , int16 , const pp_color& , const float* , const float* );
	typedef void (*func_a)( void* , int32 , float , float , float , float , const void* , int32 , int16 , int16 , const pp_color& , const pp_alpha& , const float* , const float* );
	static
	func	funclist[2][4][4] = 
	{
		{
			{
			pp_cpp_texture_weight16f_m_repeat_rgb_rgb , 
			pp_cpp_texture_weight16f_m_repeat_rgb_rgba , 
			pp_cpp_texture_weight16f_m_repeat_rgb_a , 
			} , 
			{
			pp_cpp_texture_weight16f_m_repeat_rgba_rgb , 
			pp_cpp_texture_weight16f_m_repeat_rgba_rgba , 
			pp_cpp_texture_weight16f_m_repeat_rgba_a , 
			} , 
			{
			pp_cpp_texture_weight16f_m_repeat_a_rgb , 
			pp_cpp_texture_weight16f_m_repeat_a_rgba , 
			pp_cpp_texture_weight16f_m_repeat_a_a , 
			} , 
		} , 
		{
			{
			pp_cpp_texture_weight16f_m_clamp_rgb_rgb , 
			pp_cpp_texture_weight16f_m_clamp_rgb_rgba , 
			pp_cpp_texture_weight16f_m_clamp_rgb_a , 
			} , 
			{
			pp_cpp_texture_weight16f_m_clamp_rgba_rgb , 
			pp_cpp_texture_weight16f_m_clamp_rgba_rgba , 
			pp_cpp_texture_weight16f_m_clamp_rgba_a , 
			} , 
			{
			pp_cpp_texture_weight16f_m_clamp_a_rgb , 
			pp_cpp_texture_weight16f_m_clamp_a_rgba , 
			pp_cpp_texture_weight16f_m_clamp_a_a , 
			} , 
		} , 
	};
	static
	func_a	funclist_a[2][4][4] = 
	{
		{
			{
			pp_cpp_texture_weight16f_m_alpha_repeat_rgb_rgb , 
			pp_cpp_texture_weight16f_m_alpha_repeat_rgb_rgba , 
			pp_cpp_texture_weight16f_m_alpha_repeat_rgb_a , 
			} , 
			{
			pp_cpp_texture_weight16f_m_alpha_repeat_rgba_rgb , 
			pp_cpp_texture_weight16f_m_alpha_repeat_rgba_rgba , 
			pp_cpp_texture_weight16f_m_alpha_repeat_rgba_a , 
			} , 
			{
			pp_cpp_texture_weight16f_m_alpha_repeat_a_rgb , 
			pp_cpp_texture_weight16f_m_alpha_repeat_a_rgba , 
			pp_cpp_texture_weight16f_m_alpha_repeat_a_a , 
			} , 
		} , 
		{
			{
			pp_cpp_texture_weight16f_m_alpha_clamp_rgb_rgb , 
			pp_cpp_texture_weight16f_m_alpha_clamp_rgb_rgba , 
			pp_cpp_texture_weight16f_m_alpha_clamp_rgb_a , 
			} , 
			{
			pp_cpp_texture_weight16f_m_alpha_clamp_rgba_rgb , 
			pp_cpp_texture_weight16f_m_alpha_clamp_rgba_rgba , 
			pp_cpp_texture_weight16f_m_alpha_clamp_rgba_a , 
			} , 
			{
			pp_cpp_texture_weight16f_m_alpha_clamp_a_rgb , 
			pp_cpp_texture_weight16f_m_alpha_clamp_a_rgba , 
			pp_cpp_texture_weight16f_m_alpha_clamp_a_a , 
			} , 
		} , 
	};
	if( alpha.a == 256 )
		( funclist[ wrap ][ destformat ][ srcformat ] )( dest , len , ssu , ssv , ttu , ttv , src , src_pitchbyte , src_w , src_h , b_color , weighttbl1 , weighttbl2 );
	else
		( funclist_a[ wrap ][ destformat ][ srcformat ] )( dest , len , ssu , ssv , ttu , ttv , src , src_pitchbyte , src_w , src_h , b_color , alpha , weighttbl1 , weighttbl2 );
}

};	//namespace

//using namespace icubic;		

#pragma pack( pop )			//release align

