#!/usr/bin/perl
## Copyright (c) 1991-2004 Doshita Lab. Speech Group, Kyoto University
##   All rights reserved

## setup
# mkfa executable location
$mkfabin = "`dirname $0`/mkfa";
# tmpdir
$tmpdir = "/tmp";

if ($#ARGV < 0 || $ARGV[0] eq "-h") {
    usage();
}

$make_dict = 1;
$make_term = 1;

$gramprefix = "";
foreach $arg (@ARGV) {
    if ($arg eq "-t") {
	$make_term = 1;
    } elsif ($arg eq "-n") {
	$make_dict = 0;
    } else {
	$gramprefix = $arg;
    }
}
if ($gramprefix eq "") {
    usage();
}
$gramfile = "$ARGV[$#ARGV].grammar";
$vocafile = "$ARGV[$#ARGV].voca";
$dfafile  = "$ARGV[$#ARGV].dfa";
$dictfile = "$ARGV[$#ARGV].dict";
$termfile = "$ARGV[$#ARGV].term";
$tmpprefix = "$tmpdir/g$$";
$tmpvocafile = "${tmpprefix}.voca";
$rgramfile = "${tmpprefix}.grammar";

# generate reverse grammar file
open(GRAM,"< $gramfile") || die "cannot open \"$gramfile\"";
open(RGRAM,"> $rgramfile") || die "cannot open \"$rgramfile\"";
$n = 0;
while (<GRAM>) {
    chomp;
    s/#.*//g;
    if (/^[ \t]*$/) {next;}
    ($left, $right) = split(/\:/);
    print RGRAM $left, ': ', join(' ', reverse(split(/ /,$right))), "\n";
    $n ++;
}
close(GRAM);
close(RGRAM);
print "$gramfile has $n rules\n";

# make temporary voca for mkfa (include only category info)
if (! -r $vocafile) {
	die "cannot open voca file $vocafile";
}
open(VOCA,"$vocafile") || die "cannot open vocabulary file";
open(TMPVOCA,"> $tmpvocafile") || die "cannot open temporary file $tmpvocafile";
if ($make_term == 1) {
    open(GTERM, "> $termfile");
}
$n1 = 0;
$n2 = 0;
$termid = 0;
while (<VOCA>) {
    chomp;
    s/#.*//g;
    if (/^[ \t]*$/) {next;}
    if (/^%[ \t]*([A-Za-z0-9_]*)/) {
	printf(TMPVOCA "\#%s\n", $1);
	if ($make_term == 1) {
	    printf(GTERM "%d\t%s\n",$termid, $1);
	    $termid++;
	}
	$n1++;
    } else {
	$n2++;
    }
}
close(VOCA);
close(TMPVOCA);
if ($make_term == 1) {
    close(GTERM);
}
print "$vocafile    has $n1 categories and $n2 words\n";

# call mkfa and make .dfa
print "---\n";
$status = system("$mkfabin -e1 -f $tmpprefix");
unlink("$rgramfile");
unlink("$tmpvocafile");
unlink("${tmpprefix}.h");
print "---\n";
if ($status != 0) {
    # error
    print "no .dfa or .dict file generated\n";
    exit;
}
system("mv ${tmpprefix}.dfa $dfafile");
# remove temporary files
unlink("${tmpprefix}.dfa");

# convert .voca -> .dict
# terminal number should be ordered by voca at mkfa output
if ($make_dict == 1) {
    $nowid = -1;
    open(VOCA, "$vocafile")  || die "No vocafile \"$vocafile\" found.\n";
    open(DICT, "> $dictfile") || die "cannot open $dictfile for writing.\n";
    while (<VOCA>) {
	chomp;
	s/#.*//g;
	if (/^[ \t]*$/) {next;}
	if (/^%/) {
	    $nowid++;
	    next;
	} else {
	    @a = split;
	    $name = shift(@a);
	    printf(DICT "%d\t[%s]\t%s\n", $nowid, $name, join(' ', @a));
	}
    }
    close(VOCA);
    close(DICT);
}

$lscom = "ls -ld $dfafile";
if ($make_term == 1) {
    $lscom .= " $termfile";
}
if ($make_dict == 1) {
    $lscom .= " $dictfile";
}
system($lscom);

sub usage {
    print "mkdfa.pl --- DFA compiler\n";
    print "usage: $0 [-n] prefix\n";
    print "\t-n ... keep current dict, not generate\n";
    exit;
}
