#!/usr/bin/perl -w

##### BEGIN LICENSE BLOCK #####
#
# cdae2-epw
# Copyright (c) 2013  kusagame
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
##### END LICENSE BLOCK #####

use strict;
use Cwd;
use Encode;
use FileHandle;
use Compress::Raw::Zlib;
use XML::TreeBuilder;
use encoding 'utf-8';
binmode(STDIN,  ":utf8");
binmode(STDOUT, ":utf8");
my $utf8 = find_encoding( "utf8" );
my $sjis = find_encoding( "sjis" );

print "----------------------------------------------------------------------\n"
	.	"    cdae2-epw Copyright (C) 2013  kusagame\n"
	.	"\t-This program comes with ABSOLUTELY NO WARRANTY.\n"
	.	"\t-This is free software, and you are welcome to redistribute it\n"
	.	"\t-under certain conditions.\n"
	.	"----------------------------------------------------------------------\n";
print " > Extracting Data...\n";
my $content_filename = 'CONTENT.tda';
my $content_index_filename = 'CONTENT.tda.tdz';
my $content_handle = new FileHandle;
if (!$content_handle->open("$content_filename", 'r')) {
	die "$content_filename: $^E\n";
}
binmode $content_handle;
my $content_index_handle = new FileHandle;
if (!$content_index_handle->open("$content_index_filename", 'r')) {
	die "$content_index_filename: $^E\n";
}
binmode $content_index_handle;
open(OUT, ">:utf8", "tmp.txt");
binmode(OUT);

my $heading_position;
my $text_position;
for (;;) {
	my $size;
	my $tmp;
	my $zipped_contents;
	my $contents;
	my $content;

	if (read($content_index_handle, $tmp, 8) != 8) {
		last;
	}
	($size) = unpack("x4V", $tmp);
	if (read($content_handle, $zipped_contents, $size) != $size) {
		die "File reading error: $content_filename\n";
	}
	my ($inflater, $status) = new Compress::Raw::Zlib::Inflate();
	if ($status != Z_OK) {
		die "Failed to initialize inflater\n";
	}
	$inflater->inflate($zipped_contents, $contents);
	if ($status != Z_OK && $status != Z_STREAM_END) {
		die "Failed to inflate\n";
	}

	foreach $content (split(/\0+/, $contents)) {
		print OUT $content;
	}
}
close(OUT);

print " > Formatting Data...\n";
my $data = "tmp.txt";
my $tree;

open (FILE, "<:utf8", $data) or die"$!";
open (NEWFILE, ">:utf8", 'tmp.xml') or die "$!";
print NEWFILE "<xml>\n";
while (my $line = <FILE>){
	$line =~ s|<g>|(=|g;
	$line =~ s|</g>|)|g;
	unless ($line =~ /^\s*$/){
		print NEWFILE $line;
	}
}
print NEWFILE "\n</xml>";
close NEWFILE;
close FILE;
unlink("tmp.txt");

open (NEWFILE2, ">:utf8", 'cdae2_utf8.html') or die "$!";
$data = "tmp.xml";
$tree = XML::TreeBuilder->new();
$tree->parse_file($data);

print NEWFILE2 "<HTML>\n";
print NEWFILE2 "<HEAD>\n";
print NEWFILE2 "</HEAD>\n";
print NEWFILE2 "<BODY>\n";
print NEWFILE2 "<DL>\n";

my $xml = $tree->find_by_tag_name( 'xml' );
foreach my $entry_tree ( $xml->find_by_tag_name( 'entry' )) {
	my $count = 1;

	foreach my $head_tree ( $entry_tree->find_by_tag_name( 'head' )){
		my @head_tree = $head_tree->find_by_tag_name( 'w' );
		my @head_tree1 = $head_tree->find_by_tag_name( 'gw' );
		my @head_tree2 = $head_tree->find_by_tag_name( 'pron' );
		my @head_tree3 = $head_tree->find_by_tag_name( 'pos' );
		my @head_tree4 = $head_tree->find_by_tag_name( 'gram' );
		my @head_tree5 = $head_tree->find_by_tag_name( 'inf' );
		for(;;){
			unless(@head_tree > 0){
				last;
			}
			my $tmp = shift(@head_tree);
			print NEWFILE2 "<DT>\n" . $tmp->as_text . "\n</DT>\n" . "<DD>\n";
			print NEWFILE2 $tmp->as_text;
		}
		for(;;){
			unless(@head_tree1 > 0){
				last;
			}
			my $tmp = shift(@head_tree1);
			print NEWFILE2 "(" . $tmp->as_text . ")";
		}
		if(0){
			for(;;){
				unless(@head_tree2 > 0){
					last;
				}
				my $tmp = shift(@head_tree2);
				print NEWFILE2 " /" . $tmp->as_text . "/";
			}
		}
		for(;;){
			unless(@head_tree3 > 0){
				last;
			}
			my $tmp = shift(@head_tree3);
			print NEWFILE2 " " . $tmp->as_text;
		}
		for(;;){
			unless(@head_tree4 > 0){
				last;
			}
			my $tmp = shift(@head_tree4);
			print NEWFILE2 " [" . $tmp->as_text . "]";
		}
		if(0){
			for(;;){
				unless(@head_tree5 > 0){
					last;
				}
				my $tmp = shift(@head_tree5);
				print NEWFILE2 " " . $tmp->as_text;
			}
		}
	}
	print NEWFILE2 "<BR>\n";
	
	my $sbody = $entry_tree->find_by_tag_name( 'sbody' );

	my $def_count = 1;
	my $exmap_count = 1;
	foreach my $sbody_tree ( $entry_tree->find_by_tag_name( 'sbody' )){
		my @sbody_tree = $sbody_tree->find_by_tag_name( 'def' );
		my @sbody_tree2 = $sbody_tree->find_by_tag_name( 'examp' );
		my @sbody_tree3 = $sbody_tree->find_by_tag_name( 'cat-xref' );
		for(;;){
			unless(@sbody_tree > 0){
				last;
			}
			my $tmp = shift(@sbody_tree);
			print NEWFILE2 "[" . $def_count++ ."]";
			print NEWFILE2 " " . $tmp->as_text . "<BR>\n";
		}
		for(;;){
			unless(@sbody_tree2 > 0){
				last;
			}
			my $tmp = shift(@sbody_tree2);

			print NEWFILE2 "\t◇" . $tmp->as_text . "<BR>\n";
		}
		for(;;){
			unless(@sbody_tree3 > 0){
				last;
			}
			my $tmp = shift(@sbody_tree3);
			print NEWFILE2 "《" . $tmp->as_text . "》<BR>\n";
		}
		print NEWFILE2 "<BR>\n";
	}

	foreach my $rhead_tree ( $entry_tree->find_by_tag_name( 'rhead' )){
		my @rhead_tree = $rhead_tree->find_by_tag_name( 'word' );
		my @rhead_tree2 = $rhead_tree->find_by_tag_name( 'pos' );
		for(;;){
			unless(@rhead_tree > 0){
				last;
			}
			my $tmp = shift(@rhead_tree);
			print NEWFILE2 "\t-" . $tmp->as_text;
		}
		for(;;){
			unless(@rhead_tree2 > 0){
				last;
			}
			my $tmp2 = shift(@rhead_tree2);
			print NEWFILE2 " (" . $tmp2->as_text . ")<BR>\n";
		}
	}
	print NEWFILE2 "</DD>\n";	
}
print NEWFILE2 "</DL>\n";
print NEWFILE2 "</BODY>\n";
print NEWFILE2 "</HTML>";
close NEWFILE2;
unlink("tmp.xml");

print " > Creating cdae2.html...\n";
open(FILE2, "< cdae2_utf8.html");
open(NEWFILE3, "> cdae2.html");
while (my $string = decode( $utf8, <FILE2> )){
	print NEWFILE3 encode( $sjis, $string );
}
close FILE2;
close NEWFILE3;
unlink("cdae2_utf8.html");

print " > Creating cdae2.ebs...\n";
open(FILE3, ">:utf8", "cdae2.ebs");
my $wd = Cwd::getcwd();
$wd =~ s|/|\\|g;
print FILE3 "InPath=" . $wd. "\\\n";
print FILE3 "OutPath=CDAE2\\\nBook=CAMBRIDGE Dictionary of American English 2nd Edition;CDAE2;英和辞典;_;_;GAI16H00;GAI16F00;_;_;_;_;_;_;\nSource=".'$(BASE)\cdae2.html;CDAE2;_;HTML';
close FILE3;
