# WWW::SourceForge.JP::Wiki::Html2Wiki
# by hylom
# This code is under GPL.
#

use lib 'C:\Users\hirom\Documents\Dev\sfwiki';
package WWW::SourceForgeJP::Wiki::Html2Wiki;

use warnings;
use strict;

use utf8;
use open IN => ':utf8';
#use open OUT => ':encoding(Shift_JIS)';
use open OUT => ':utf8';
use open ':std';

use IO::File;
use HTML::Entities;


use Carp;

### 画像は[[Embed(ほげほげ:hoge.png)]]とかで埋める

######## 各種設定 #############
my $hyperlink_prefix = "";
my $usage = "usage: faq_gen.pl <source.html>.";
#my $src_file = shift @ARGV or die $usage;

sub new {
    my $class = shift @_;
    my $image_prefix = shift @_;
    croak "image_prefix is not defined\n" unless( $image_prefix );

    my $self = {
	'image_prefix' => $image_prefix,
    };

    return bless $self, $class;
}
    

# convert( HTML-string )
sub convert {
    my $self = shift @_;
    my $html = shift @_;

    my $div_count = 1;
    my $end_flag = 0;
    my $anchor_name = "";
    my $work_hash = { tmp => '',};

    my $work = "";

    my $work_pre = "";
    my $work_post = "";
    my $work_pre_ref = \$work;
    my $work_post_ref = \$work;

    my $trim_crlf = 1;

    while ( $html =~ s/^([^<]*)<\s*(\/{0,1}[A-Za-z0-9!-]+\s*[^>]*)\s*>//m ) {
	my $text = HTML::Entities::decode_entities($1);
	if( $trim_crlf == 1 ) {
	    $text =~ s/\n//mg;
	    $text =~ s/\s{2,}/ /mg;
	}
	my $tag_all = $2;

#  print STDERR $tag_all, "\n";
	# &quot;で挟まれたテキストは「」で挟む
	#$text =~ s/&quot;(.*?)&quot;/「$1」/g;
	$work .= $text;

	#タグのparse
	$tag_all =~ s/(\S+)\s*>{0,1}//;
	my $tag = lc $1;

	my %attr;
	my $string = $tag_all;
	while( $tag_all ne "" ) {
	    unless( $tag_all =~ s/^([^=]+)=// ) {
		last;
	    }
	    my $key = lc $1;
	    my $val = "";
	    if ( $tag_all =~ s/^"([^"]*?)"\s*// ) {
		$val = $1;
	    } elsif ( $tag_all =~ s/^'([^']*?)'\s*// ) {
		$val = $1;
	    } elsif ( $tag_all =~ s/^(\S+)\s*// ) {
		$val = $1;
	    } else { 
		$val = $tag_all;
		$tag_all = "";
	    }
	    $attr{$key} = $val;
	}
	# 以上で$tagにtag、%attrにattributeと値の組み合わせが入る
	# degug:
#   print STDERR "$tag - ";
#   for my $key (keys %attr) {
#     print STDERR "$key:$attr{$key}, ";
#   }
#   print STDERR "\n\n";
	# end debug

#   #タグの正規化（すべて小文字に）
#   # warnings: ""の中に'とかは無視！
#   my @terms = split /["']/, $tag;
#   my $tag_lc = "";
#   while (@terms) {
#     my $term = shift @terms;
#     $tag_lc .= lc $term;
#     if ( $term = shift @terms ) {
#       $tag_lc .= qq("$term");
#     }
#   }
#   $tag = $tag_lc;
#   $tag =~ s/^(\/{0,1}[a-z0-9!-]+)\s*//;
#   my $attr = $tag;
#   $tag = $1;


	# 無視するタグ
	my @tag_bypass = qw( meta head /head body /body html /html !doctype script /script tbody /tbody col );
	if ( grep /^$tag$/, @tag_bypass ) {
	    $tag = "";
	}

	# <title></title>タグ
	if( $tag eq "title" ) {
	    $work .= "{{{ comment\nh2w-title:";
	    $tag = "";
	}
	if( $tag eq "/title" ) {
	    $work .= "\n}}}\n";
	    $tag = "";
	}
	# <rawhtml></rawhtml>タグ（h2w専用タグ）
	if( $tag eq "rawhtml" ) {
	    $html =~ s/^(.*?)<\/rawhtml>//m;
	    my $raw_html = $1;
	    $work .= "{{{ html\n";
	    $work .= $raw_html;
	    $work .= "\n}}}\n";
	    $tag = "";
	}

	# <footnote></footnote>タグ（h2w専用タグ）
	if( $tag eq "footnote" ) {
	    $html =~ s/^(.*?)<\/footnote>//m;
	    my $raw_text = $1;
	    $work .= "[[Footnote($raw_text)]]";
	    $tag = "";
	}

	#<div>をカウント
	if ( $tag eq "div" ) {
	    $div_count++;
	    $tag = "";
	} elsif ( $tag eq  "/div" ) {
	    $div_count--;
#     if ( $div_count < 1 ) {
#       last;
#     }
	    $tag = "";
	}


	#<!-- -->タグの処理
	if ( $tag =~ m/^!--/ ) {
	    $tag =~ s/^!--//;
	    $string =~ s/--$//;
	    
	    $work .= "\n\n{{{ comment\n";
	    $work .= "$tag $string";
	    $work .= "\n}}}\n\n";
	    $tag = "";
	}

	#<br>タグの処理
	if ( $tag eq "br" ) {
	    $work .= "[[BR]]";
	    $tag = "";
	}

	# <p>タグの処理 - 改行2個に
	if ( $tag eq "p" ) {
	    $work .= "\n\n";
	    $tag = "";
	} elsif ( $tag eq "/p" ) {
	    $work .= "\n\n";
	    $tag = "";
	}

	# <pre>タグの処理
	if( $tag eq "pre" ) {
	    $work .= "\n{{{\n";
	    $trim_crlf = 0;
	    $tag = "";
	} elsif( $tag eq "/pre" ) {
	    $work .= "\n}}}\n";
	    $trim_crlf = 1;
	    $tag = "";
	}

	# <font>タグの処理 - 無視する
	if ( $tag eq "font" ) {
	    $tag = "";
	} elsif ( $tag eq "/font" ) {
	    $tag = "";
	}

	# <i>タグの処理
	if ( $tag eq "i" ) {
	    $$work_pre_ref .= q('');
	    $tag = "";
	} elsif ( $tag eq "/i" ) {
	    $$work_post_ref .= q('');
	    $tag = "";
	}

	# <tt>タグの処理
	if ( $tag eq "tt" ) {
	    $$work_pre_ref .= q(`);
	    $tag = "";
	} elsif ( $tag eq "/tt" ) {
	    $$work_post_ref .= q(`);
	    $tag = "";
	}

	# <address>タグの処理
	if ( $tag eq "address" ) {
	    $$work_pre_ref .= q('');
	    $tag = "";
	} elsif ( $tag eq "/address" ) {
	    $$work_post_ref .= q('');
	    $tag = "";
	}

	# <em>タグの処理
	if ( $tag eq "em" ) {
	    $$work_pre_ref .= q('');
	    $tag = "";
	} elsif ( $tag eq "/em" ) {
	    $$work_post_ref .= q('');
	    $tag = "";
	}

	# <b>タグの処理
	if ( $tag eq "b" ) {
	    $$work_pre_ref .= q(''');
	    $tag = "";
	} elsif ( $tag eq "/b" ) {
	    $$work_post_ref .= q(''');
	    $tag = "";
	}

	# <strong>タグの処理
	if ( $tag eq "strong" ) {
	    $$work_pre_ref .= q(''');
	    $tag = "";
	} elsif ( $tag eq "/strong" ) {
	    $$work_post_ref .= q(''');
	    $tag = "";
	}

	# <big>タグの処理
	if ( $tag eq "big" ) {
	    $$work_pre_ref .= q(''');
	    $tag = "";
	} elsif ( $tag eq "/big" ) {
	    $$work_post_ref .= q(''');
	    $tag = "";
	}

	# <small>タグの処理
	if ( $tag eq "small" ) {
	    $$work_pre_ref .= '__';
	    $tag = "";
	} elsif ( $tag eq "/small" ) {
	    $$work_post_ref .= '__';
	    $tag = "";
	}

	# <td>タグの処理
	if ( $tag eq "td" ) {
	    $work .= '||';
	    $tag = "";
	} elsif ( $tag eq "/td" ) {
	    $tag = "";
	}

	# <tr>タグの処理
	if ( $tag eq "tr" ) {
#    $work .= "\n";
	    $tag = "";
	} elsif ( $tag eq "/tr" ) {
	    $work .= "||\n";
	    $tag = "";
	}

	# <ul>タグの処理
	if ( $tag eq "ul" ) {
	    if ( $work_hash->{ul_level} ) {
		$work_hash->{ul_level} = q(  ) . $work_hash->{ul_level};
		$work_hash->{ul_level} =~ s/1./*/;
	    } else {
		$work_hash->{ul_level} = "  * ";
	    }
#	    $work .= "\n";
	    $tag = "";
	} elsif ( $tag eq "/ul" ) {
	    $work_hash->{ul_level} =~ s/^  //;
#	    $work .= "\n";
	    $tag = "";
	}

	# <ol>タグの処理
	if ( $tag eq "ol" ) {
	    if ( $work_hash->{ul_level} ) {
		$work_hash->{ul_level} = q(  ) . $work_hash->{ul_level};
		$work_hash->{ul_level} =~ s/\*/1./;
	    } else {
		$work_hash->{ul_level} = "  1. ";
	    }
	    $work .= "\n";
	    $tag = "";
	} elsif ( $tag eq "/ol" ) {
	    $work_hash->{ul_level} =~ s/^  //;
	    $work .= "\n";
	    $tag = "";
	}

	# <li>タグの処理
	if ( $tag eq "li" ) {
	    $work .= "\n";
	    $work .= $work_hash->{ul_level};
	    $tag = "";
	} elsif ( $tag eq "/li" ) {
	    $tag = "";
	}




	# <table>タグの処理
	if ( $tag eq "table" ) {
	    $work .= "\n\n";
	    $tag = "";
	} elsif ( $tag eq "/table" ) {
	    $tag = "";
	}


	# <hr>タグの処理
	if ( $tag eq "hr" ) {
	    $work .= "\n\n----\n\n";
	    $tag = "";
	}

	# <a>タグの処理
	if ( $tag eq "a" ) {
	    if ( exists $attr{'name'} ) {
		# <a name>タグの処理
		$anchor_name = " #$attr{'name'}";
		$tag = "";
	    }
	    if ( exists $attr{'href'} ) {
		# <a href="">タグの処理
		my $link_to = $attr{'href'};
		if ( ! ($link_to =~ m/^(http:\/\/|mailto:)/ ) ) {
		    $link_to =~ s/\.s{0,1}html//;
		    $link_to = $hyperlink_prefix . $link_to;
		}
		$work_hash->{href} = $link_to;
		$work_hash->{href_stack} = $work;
		$work_hash->{href_stack_post} = "";
		$work = "";
		$tag = "";
		$work_pre = "";
		$work_post = "";
		$work_pre_ref = \$work_pre;
		$work_post_ref = \$work_post;
	    }
	}
	if ( $tag eq "/a" ) {
	    if ( $work_pre_ref != \$work_pre ) {
		#<a name>タグを閉じる
		$tag = "";
	    } else {
		$work =~ s/\[/［/g;   # 半角[]はリンクブラケットとして扱われるので変換
		$work =~ s/\]/］/g;
		$work = $work_hash->{href_stack} . $work_pre . 
		    "[$work_hash->{href} $work]" . $work_hash->{href_stack_post} . 
		    $work_post;
		$work_hash->{href} = "";
		$work_hash->{href_stack} = "";
		$work_hash->{href_stack_post} = "";
		$work_pre_ref = \$work;
		$work_post_ref = \$work;
		$tag = "";
	    }
	}


	# <h[0-9]>タグの処理
	if ( $tag =~ m/^h([0-9])$/ ) {
	    my $h_level = $1;
	    $work .= "\n\n";
	    for ( my $i = 0; $i < $h_level; $i++ ) {
		$work .= "=";
	    }
	    $work .= q( );
	    $tag = "";
	} elsif ( $tag =~ m/^\/h([0-9])$/ ) {
	    my $h_level = $1;
	    $work .= q( );
	    for ( my $i = 0; $i < $h_level; $i++ ) {
		$work .= "=";
	    }
	    $work .= "$anchor_name\n\n";
	    $anchor_name = "";
	    $tag = "";
	}

	# <img>タグの処理
	if ( $tag =~ m/^img$/ ) {
	    my $img_location = $self->get_img_location( $attr{src} );
	    my $opt = "";

	    if ( exists $attr{'align'} ) {
		if ( $attr{'align'} eq 'left' ) {
		    $opt = " float=left";
		} elsif( $attr{'align'} eq 'right' ) {
		    $opt = " float=right";
		}
		$work .= "[[Embed($img_location$opt)]]\n";
	    }
	    $tag = "";
	}

#  $work .= "<$tag>"  if( $tag ne "" );
	print STDERR "$tag: unimplemented!\n" if( $tag ne "" );
    }

    $work =~ s/^\n*//m;
    $work =~ s/\n{3,}/\n\n/mg;

    $work =~ s/\\n/\n/g;
    $work =~ s/\\\\/\\/g;

    print $work;
}

sub get_img_location {
    my $self = shift @_;
    my $src = shift @_;

    return "$self->{image_prefix}$src";
}

