#!/usr/local/bin/perl
use strict;
use Encode qw(from_to);

=pod
use Time::HiRes qw/time/;
my $time = time;
END{ printf STDERR "time-cost: %.1lf s\n",time - $time }
=cut

sub new{
	my $self = {};
	$self->{-file} = shift or return undef;
	open($self->{-fh},$self->{-file}) or return undef;
	bless $self;
	$self->next();
	return $self;
}

sub next{
	my $self = shift or die qq/internal error/;
	my $fh = $self->{-fh} or return $self;
	local $_ = <$fh>;
	if(m/^(\d+)\t(.+)\t(\d+)\n/){
		$self->{-freq} = $1;
		$self->{-str}  = $2;
		$self->{-gram} = $3;
	}else{
		$self->{-freq} = 0;
		$self->{-str}  = "";
		$self->{-gram} = 0;
		close $self->{-fh};
		$self->{-fh} = undef;
	}
	return $self;
}

my @file = @ARGV;
if(@file < 2){
	die qq/usage: $0 file1 file2 [..fileN]\n/;
}

my @ngram = ();
foreach my $file (@file){
	my $ngram = new($file) or die qq/$file:$!\n/;
	push(@ngram,$ngram);
}

my($ngram1,@ngram2) = @ngram;
while(@ngram2){
	my $freq = $ngram1->{-freq};
	my $str  = $ngram1->{-str};
	my $gram = $ngram1->{-gram};
	foreach my $ngram (@ngram2){
		my $g = $ngram->{-gram};
		my $s = $ngram->{-str};
		my $f = $ngram->{-freq};
		if(0 < ($gram == $g ? $str cmp $s : $g <=> $gram)){
			$freq = $f;
			$str  = $s;
			$gram = $g;
		}
	}
	
	my @freq = ();
	my $total = 0;
	foreach my $ngram (@ngram){
		if($ngram->{-str} eq $str){
			push(@freq,$ngram->{-freq});
			$total += $ngram->{-freq};
			$ngram->next();
		}else{
			push(@freq,0);
		}
	}
	
	if($total == $freq){
		($ngram1,@ngram2) = grep($_->{-fh},@ngram);
		@ngram2 ? next : last;
	}
	
	my $total2 = 0;
	foreach my $freq (@freq){
		$total2 += $freq * $freq;
	}
	my $eva = sqrt(@freq * $total2 / $total / $total - 1);
#	from_to($str,"utf8" => "sjis");
	printf "%d\t%s\t%d\t<%s>\t%.3lf\n",
		$total,$str,$gram,join(" ",@freq),$eva;
}

__END__
