#------------------------------------------------------------------------------
#    59Tracker, weblog software for personal publisher.
#    Copyright (C) 2004-2010 Kaga, Hiroaki
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program; if not, write to the Free Software
#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#------------------------------------------------------------------------------

package Lib::String;

use strict;
use warnings;

use Encode;
use Encode::Guess qw/euc-jp shiftjis 7bit-jis/;
use Encode::JP::H2Z;

use Lib::Logger;
use Lib::Util;

my $logger = Lib::Logger->new();

# コンストラクタ
sub new {
	my $self = {};

    use Lib::Conf;
    my $conf = Lib::Conf->new();

    $self->{system_dir} = $conf->get_system_dir();

    $self->{tail} = '...'; # 尻尾

    bless($self);
    return $self;
}

# 尻尾
sub set_tail {
    my $self = shift;
    $self->{tail} = shift;
}

# 文字列を一定の長さで切る(文字コードはシフトＪＩＳを前提)
sub cut {
    my $self = shift;
    my $srcstr = shift; # 第一引数：文字列
    my $cutlen = shift; # 第二引数：切る長さ

    $srcstr =~ s/\r?\n//g; # 改行を取り除く
    $srcstr =~ s/<.*?>//g; # HTMLタグを取り除く
#    my $util = Lib::Util->new();
#    $srcstr = $util->tag_invalidate($srcstr);

    # シフトJISに変換
    $srcstr = encode('cp932', decode_utf8($srcstr));

    # 文字列が指定の長さと同じか短い場合はそのまま返す
    my $srclen = length($srcstr);
    my $deststr = '';
    if ($srclen <= $cutlen) {
        $deststr = $srcstr;
    }
    else {
        # 指定された長さで文字列を切る
        for (my $i = 0; $i < $cutlen; $i++) {
            my $ch = substr($srcstr, $i, 1);
            $deststr .= $ch;
            if ($ch =~ /[\x81-\x9F\xE0-\xEF]/) {
                $i++;
                $ch = substr($srcstr, $i, 1);
                $deststr .= $ch;
            }
        }
        # 文章を切った場合は末尾を付ける
        $deststr .= $self->{tail};
    }

    $deststr = $self->convert($deststr, 'sjis', 'utf8');

	return $deststr;
}

# 文字エンコードが不明な文字列をUTF-8に変換
sub any2utf8 {
    my $self = shift;
    my ($text) = @_;

    # 内部コード（UTF8フラグ付き）にデコード
    $text = $self->decode_string($text);

    # UTF8フラグを取る
    $text = Encode::encode_utf8($text);

    return $text;
}

# 文字エンコードの変換(文字列, 変換元エンコード, 変換先エンコード)
# 変換した文字列を返す
sub convert {
    my $self = shift;
    my ($text, $srccode, $destcode) = @_;

    if ($srccode eq 'euc') {
        $srccode = 'euc-jp';
    }
    elsif ($srccode eq 'sjis') {
        $srccode = 'shiftjis';
    }
    elsif ($srccode eq 'jis') {
        $srccode = '7bit-jis';
    }

    if ($destcode eq 'euc') {
        $destcode = 'euc-jp';
    }
    elsif ($destcode eq 'sjis') {
        $destcode = 'shiftjis';
    }
    elsif ($destcode eq 'jis') {
        $destcode = '7bit-jis';
    }

    Encode::from_to($text, $srccode, $destcode);

    return $text;
}

# 全角英数字を半角に変換（文字エンコードはEUC-JP）
sub zen_han {
    my $self = shift;
    my $alnum_str = shift;
    my $a = qr{(?<!\x8F)};
    my $b = qr{(?=(?:[\xA1-\xFE][\xA1-\xFE])*(?:[\x00-\x7F\x8E\x8F]|\z))}x;
    $alnum_str =~ s/$a\xA3([\xB0-\xB9\xC1-\xDA\xE1-\xFA])$b/pack("C", ord($1) - 0x80)/oeg;
    return $alnum_str;
}

# 半角カナを全角カナに変換（文字エンコードはEUC-JP）
sub kana_han_zen {
    my $self = shift;
    my $katakana = shift;
    Encode::JP::H2Z::h2z(\$katakana);
    return $katakana;
}

# 文字コードを内部コードにデコード（UTF-8フラグ付き）
sub decode_string {
    my $self = shift;
    my $src = shift;
    my $dest = decode('Guess', $src) unless (Encode::is_utf8($src));
    return $dest;
}

# 内部コードを指定した文字コードにエンコード
sub encode_string {
    my $self = shift;
    my $codetype = shift;
    my $src = shift;
    my $dest = encode($codetype, $src);
    return $dest;
}

# テキストをMIME-Base64形式に変換
sub encode_mime {
    my $self = shift;
    my $text = shift;
    my $enctext = encode('MIME-Header-ISO_2022_JP', $text);
    return $enctext;
}

1;
# End of String.pm
