#!/usr/bin/env perl

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# This file is part of G-language Genome Analysis Environment package
#
#     Copyright (C) 2001 - 2003 Keio University
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# 
#   $Id: Codon.pm,v 1.3 2002/07/30 17:40:56 gaou Exp $
#
# G-language GAE is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
# 
# G-language GAE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public
# License along with G-language GAE -- see the file COPYING.
# If not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# 
#END_HEADER
#

package G::Seq::Codon;

use SubOpt;
use GD;
use G::Messenger;
use G::Seq::Util;
use GD;

use strict;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);

use SelfLoader;

require Exporter;

@ISA = qw(Exporter AutoLoader);
# Items to export into callers namespace by default. Note: do not export
# names by default without a very good reason. Use EXPORT_OK instead.
# Do not simply export all your public functions/methods/constants.
@EXPORT = qw(
	     codon_compiler
	     _codon_usage_table
	     entropy_cu
	     entropy_scu
	     enc
	     cbi
	     icdi
	     fop
	     w_value
	     cai
	     phx
	     cui
	     aaui
	     bui
	     dinuc
	     codon_pca

	     codon_counter
	     amino_counter
	     codon_usage
	     _codon_table
	     _codon_amino_printer
	     _codon_usage_printer
);
$VERSION = '0.01';

my %Table = ('gca'=>'A','gcc'=>'A','gcg'=>'A','gct'=>'A',
	     'tgc'=>'C','tgt'=>'C',
	     'gac'=>'D','gat'=>'D',
	     'gaa'=>'E','gag'=>'E',
	     'ttc'=>'F','ttt'=>'F',
	     'gga'=>'G','ggc'=>'G','ggg'=>'G','ggt'=>'G',
	     'cac'=>'H','cat'=>'H',
	     'ata'=>'I','atc'=>'I','att'=>'I',
	     'aaa'=>'K','aag'=>'K',
	     'cta'=>'L','ctc'=>'L','ctg'=>'L','ctt'=>'L','tta'=>'L','ttg'=>'L',
	     'atg'=>'M',
	     'aac'=>'N','aat'=>'N',
	     'cca'=>'P','ccc'=>'P','ccg'=>'P','cct'=>'P',
	     'caa'=>'Q','cag'=>'Q',
	     'aga'=>'R','agg'=>'R','cga'=>'R','cgc'=>'R','cgg'=>'R','cgt'=>'R',
	     'agc'=>'S','agt'=>'S','tca'=>'S','tcc'=>'S','tcg'=>'S','tct'=>'S',
	     'aca'=>'T','acc'=>'T','acg'=>'T','act'=>'T', 
	     'gta'=>'V','gtc'=>'V','gtg'=>'V','gtt'=>'V', 
	     'tgg'=>'W',
	     'tac'=>'Y','tat'=>'Y',
	     'taa'=>'/','tag'=>'/','tga'=>'/');

my %_1to3 = ('A'=>'Ala', 'C'=>'Cys', 'D'=>'Asp', 'E'=>'Glu', 'F'=>'Phe',
	     'G'=>'Gly', 'H'=>'His', 'I'=>'Ile', 'K'=>'Lys', 'L'=>'Leu',
	     'M'=>'Met', 'N'=>'Asn', 'P'=>'Pro', 'Q'=>'Gln', 'R'=>'Arg',
	     'S'=>'Ser', 'T'=>'Thr', 'V'=>'Val', 'W'=>'Trp', 'Y'=>'Tyr',
	     '/'=>'TER', 'U'=>'Sec');

my %Mwt = ('A'=>89.09, 'C'=>121.16, 'D'=>133.10, 'E'=>147.13, 'F'=>165.19,
           'G'=>75.07, 'H'=>155.16, 'I'=>131.17, 'K'=>146.19, 'L'=>131.17,
           'M'=>149.21, 'N'=>132.12, 'P'=>115.13, 'Q'=>146.15, 'R'=>174.20,
           'S'=>105.09, 'T'=>119.12, 'V'=>117.15, 'W'=>204.22, 'Y'=>181.19);

# hydropathy index (Kyte and Doolittle, 1982, J.Mol.Biol. 157:105-132)
my %KyteDoolittle = ('A'=> 1.8, 'C'=> 2.5, 'D'=>-3.5, 'E'=>-3.5, 'F'=> 2.8,
		     'G'=>-0.4, 'H'=>-3.2, 'I'=> 4.5, 'K'=>-3.9, 'L'=> 3.8,
		     'M'=> 1.9, 'N'=>-3.5, 'P'=>-1.6, 'Q'=>-3.5, 'R'=>-4.5,
		     'S'=>-0.8, 'T'=>-0.7, 'V'=> 4.2, 'W'=>-0.9, 'Y'=>-1.3);

# hydrophilicity value (Hopp and Woods, 1981, Proc.Natl.Acad.Sci.USA 78:3824)
my %HoppWoods = ('A'=>-0.5, 'C'=>-1.0, 'D'=> 3.0, 'E'=> 3.0, 'F'=>-2.5,
		 'G'=> 0.0, 'H'=>-0.5, 'I'=>-1.8, 'K'=> 3.0, 'L'=>-1.8,
		 'M'=>-1.3, 'N'=> 0.2, 'P'=> 0.0, 'Q'=> 0.2, 'R'=> 3.0,
		 'S'=> 0.3, 'T'=>-0.4, 'V'=>-1.5, 'W'=>-3.4, 'Y'=>-2.3);

# hydrophilicity scale (Parker et al., 1986, Biochem. 25:5425)
my %Parkeretal = ('A'=> 2.1, 'C'=> 1.4, 'D'=> 10 , 'E'=> 7.8, 'F'=>-9.2,
		  'G'=> 5.7, 'H'=> 2.1, 'I'=>-8.0, 'K'=> 5.7, 'L'=>-9.2,
		  'M'=>-4.2, 'N'=> 7.0, 'P'=> 2.1, 'Q'=> 6.0, 'R'=> 4.2,
		  'S'=> 6.5, 'T'=> 5.2, 'V'=>-3.7, 'W'=>-10 , 'Y'=>-1.9);

#  -index     hydropathic indices of amino acids (default:1)
#              1: Kyte & Doolittle, 2: Hopp & Woods, 3: Parker et al


__DATA__

#::::::::::::::::::::::::::::::
#        Methods Start
#::::::::::::::::::::::::::::::


=head2 codon_compiler
    
 codon_compiler ver.20030401
 Author: Haruo Suzuki (haruo@g-language.org)
 Usage: pointer HASH = &codon_compiler(pointer G instance);
 Options:
  -output    output option (default: 'show')
  -filename  output filename (default: 'usage.csv')
  -id        ID of a group of genes or a single gene (default:'All')
  -translate 1 when translates using regular codon table (default: 0)
  -del_key   regular expression to delete amino acid and codon keys
             (default:'[^ACDEFGHIKLMNPQRSTVWYacgtU]')
  -data      type of amino acid and codon usage (default:'C0')
   'A0': Absolute amino acid frequency
   'A1': Relative amino acid usage ('RAAU')
   'C0': Absolute codon frequency
   'C1': Relative use of codon normalized by total number of codons
   'C2': Relative use of codon normalized by each amino acid totals
   'C3': Relative synonymous codon usage (RSCU) 
   'C4': Ratio to maximum of minor codon
   'C5': Maximum (1) or minor (0) codon
 Description:
  This program characterizes various types of amino acid and codon
  usage, and inputs in the G instance. e.g. usage of Met codon ATG
  will be accessible at $gb->{$id}->{$data}->{'Matg'}
 Informations stored in the G instance:
  $gb->{$id}->{tna}: total number of amino acids excluding formylmethionine
  $gb->{$id}->{tnc}: total number of codons excluding start & stop codons
  $gb->{$id}->{ndc}: number of distinct codons
  $gb->{$id}->{ndaa}: number of distinct amino acids
  $gb->{$id}->{nltk}: number of amino acids with less than k residues
  $gb->{$id}->{naa1}: number of amino acids with one residue in the gene
  $gb->{degeneracy}->{*}: degree of codon degeneracy for amino acid '*'
  $gb->{"legend_codon_compiler_$data"}
  $gb->{errormsg_codon_compiler}
 Normalization procedures:
  Five types of normalization procedures, C1, C2, C3, C4, and C5,
  are applied to describe codon usage. First, to exclude the effect 
  of the gene size, data are normalized for the pooled totals (C1).
  Second, to exclude the effect of the amino acid composition, 
  data are normalized for each individual amino acid (C2). Third,
  to exclude the effect of the synonymous codon number in each amino
  acid box, the relative synonymous codon usage (RSCU) is calculated.
  The RSCU is the observed codon frequency divided by that expected
  when all codons for the same amino acid are used equally (C3).
  Fourth, the relative adaptiveness (W) of each codon is calculated.
  The W is the ratio of the usage of each codon, to that of the most
  abundant codon for the same amino acid (C4). Fifth, binary number,
  1 or 0 refering to maximum or minor codon, is assigned (C5).

=cut

sub codon_compiler {
    &opt_default(output=>'show',filename=>'usage.csv',id=>'All',translate=>0,
		 del_key=>'[^ACDEFGHIKLMNPQRSTVWYacgtU]',data=>'C0');
    my @args = opt_get(@_);
    my $gb = opt_as_gb(shift @args);
    my $output = opt_val("output");
    my $filename = opt_val("filename");
    my $id = opt_val("id");
    my $data = opt_val("data");
    my $translate = opt_val("translate");
    my $del_key = opt_val("del_key");

    if($data !~ /A[01]|[CR][0-5]/){ &msg_error("$data: undefined type of amino acid and codon usage.\n"); return; }

    my $seq;
    my $i;
    my $aaseq;
    my $aa;
    my $codon;
    my %count;
    my $omit;
    
    if($id =~ /FEATURE/){ # for a single gene
	$seq = substr(lc($gb->get_geneseq($id)), $gb->{$id}->{codon_start} - 1 + 3, -3); # excluding start and stop codons

	if($translate){
	    $i = 0;
	    while($i <= length($seq) - 3){
		my $codon = substr($seq, $i, 3);
		$aaseq .= ($codon =~ /[^acgt]/) ? '?' : $Table{$codon};
		$i += 3;
	    }
	}
	else { $aaseq = substr($gb->{$id}->{translation}, 1); }

	$i = 0;
	while($aaseq){
	    my $aa = substr($aaseq, 0, 1);
	    if($data =~ /A/){ $count{$aa} ++; }
	    if($data =~ /[CR]/){ $count{$aa.substr($seq, $i, 3)} ++; }       
	    substr($aaseq, 0, 1) = '';
	    $i += 3;
	}
    }
    else { # for a group of genes
	my $errormsg;

	foreach my $cds ($gb->cds()){
	    next if( $id ne 'All' && !($gb->{$cds}->{$id}) );

	    $seq = substr(lc($gb->get_geneseq($cds)), $gb->{$cds}->{codon_start} - 1 + 3, -3);

	    # omitting CDS with illegal characteristics
	    if($data =~ /C/ && $id eq 'All'){
		if(length($seq) % 3){
		    unless($gb->{errormsg_codon_compiler}){ $errormsg ++; &msg_error("$cds:\tillegal length\n"); }
		    $omit .= "$cds|";
		    next;
		}
		if($gb->{$cds}->{note} =~ /Protein sequence is in conflict with the conceptual translation$/){
		    unless($gb->{errormsg_codon_compiler}){ $errormsg ++; &msg_error("$cds:\t", $gb->{$cds}->{note}, "\n"); }
		    $omit .= "$cds|";
		    next;
		}
	    }
	    # result of 'A0' is not necessarily equal to that of 'A0C0'

	    if($translate){
		$i = 0;
		while($i <= length($seq) - 3){
		    my $codon = substr($seq, $i, 3);
		    $aaseq .= ($codon =~ /[^acgt]/) ? '?' : $Table{$codon};
		    $i += 3;
		}
	    }
	    else {
		$aaseq = substr($gb->{$cds}->{translation}, 1);
		if($data =~ /C/ && $id eq 'All' && length($seq) / 3 != length($aaseq) && !($gb->{errormsg_codon_compiler})){
		    $errormsg ++; &msg_error("$cds:\t", length($aaseq), " amino acids, but ", length($seq)/3, " codons\n");
		}
	    }
	    
	    $i = 0;
	    while($aaseq){
		my $aa = substr($aaseq, 0, 1);
		if($data =~ /A/){ $count{$aa} ++; }
		if($data =~ /[CR]/){ $count{$aa.substr($seq, $i, 3)} ++; }
		substr($aaseq, 0, 1) = '';
		$i += 3;
	    }
	}
	$gb->{errormsg_codon_compiler} = 1 if($errormsg);
    }

    # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    #foreach (keys %count){ if($_ =~ /[A-Z][a-z]{3}/ && $_ !~ /Agc|Ctg|Dga|Ega|Ftt|Ggg|Hca|Iat|Kaa|Lct|Ltt|M|Naa|Pcc|Qca|Rag|Rcg|Sag|Stc|Tac|Vgt|Wtg|Yta|Tct|Qta|Sct/){ print "|", $gb->{$id}->{gene}; last; } }

    my $tna;  # total number of amino acids
    my $tnc;  # total number of codons in a gene
    my %tbox; # total number of codons in each amino acid box
    my $ndc;  # number of distinct codons
    my $ndaa; # number of distinct amino acids
    foreach (keys %count){
	if($del_key && $_ =~ /$del_key/){ delete $count{$_}; next; }
	if($id eq 'All' && $count{$_} < 2){ delete $count{$_}; next; }
	if($_ =~ /[A-Z]$/){ $tna += $count{$_}; $ndaa ++; }
	if($_ =~ /[A-Z][a-z]{3}/){ $tnc += $count{$_}; $tbox{substr($_, 0, 1)} += $count{$_}; $ndc ++; }
    }
    $gb->{$id}->{tna} = $tna; # input in the G instance
    $gb->{$id}->{tnc} = $tnc;
    $gb->{$id}->{ndc} = $ndc;
    $gb->{$id}->{ndaa} = $ndaa ? $ndaa : scalar keys %tbox;
    
    my %usage = %count;
    $gb->{$id}->{$data} = \%usage; # input in the G instance
    &codon_compiler($gb, -output=>'n', -id=>'All', -data=>$data, -translate=>$translate, -del_key=>$del_key) unless($gb->{All}->{$data});    
    my @keysAll = sort keys %{$gb->{All}->{$data}};
    
    if($data =~ /[CR]/){
	if(!$gb->{degeneracy}){
	    my %degeneracy;
	    foreach (@keysAll){
		$degeneracy{substr($_, 0, 1)} ++  if($_ =~ /[A-Z][a-z]{3}/);
	    }
	    $gb->{degeneracy} = \%degeneracy; # input in the G instance
	}
	
	my $nltk; # number of amino acids with less than k residues
	my $naa1; # number of amino acids with one residue in the gene
	foreach (keys %tbox){
	    $nltk ++ if($tbox{$_} < $gb->{degeneracy}->{$_});
	    $naa1 ++ if($tbox{$_} == 1);
	}
	$gb->{$id}->{nltk} = sprintf "%d", $nltk; # input in the G instance
	$gb->{$id}->{naa1} = sprintf "%d", $naa1; # input in the G instance
    }
    
    ########## normalization
    my %max;
    foreach (keys %count){
	if($_ =~ /[A-Z]$/){
	    if($data =~ /A1/){ $usage{$_} = $count{$_} / $tna; }
	}
	if($_ =~ /[A-Z][a-z]{3}/){
	    $aa = substr($_, 0, 1);
	    if($data =~ /C1|R1/){ $usage{$_} = $count{$_} / $tnc; }
	    if($data =~ /C2|R2/){ $usage{$_} = $count{$_} / $tbox{$aa}; }
	    if($data =~ /C3|R3/){ $usage{$_} = $count{$_} / $tbox{$aa} * $gb->{degeneracy}->{$aa}; }
	    if($data =~ /C4|R4|C5|R5/){
		$max{$aa} = $count{$_} if($count{$_} > $max{$aa});
	    }
	}
    }
    if($data =~ /C4|R4|C5|R5/){
	foreach (keys %count){
	    if($_ =~ /[A-Z][a-z]{3}/){
		$usage{$_} = $count{$_} / $max{substr($_, 0, 1)};
		$usage{$_} = 0 if($data =~ /C5|R5/ && $usage{$_} < 1);
	    }
	}
    }
    if($data =~ /R2|R3|R4/){
	foreach (@keysAll){
	    if($_ =~ /[A-Z][a-z]{3}/){
		$aa = substr($_, 0, 1);
		next if($tbox{$aa});
		if($data =~ /R2/){ $usage{$_} = 1 / $gb->{degeneracy}->{$aa}; }
		if($data =~ /R3|R4/){ $usage{$_} = 1; }
	    }
	}
    }
    
    ########## output    
    my $gene = ($id =~ /FEATURE/) ? $gb->{$id}->{gene} : "{$id}";
    my $total;
    if($output =~ /stdout|show/){
	unless($gb->{"legend_codon_compiler_$data"}){
	    msg_send("\n\n");
	    foreach(@keysAll){ msg_send("$_\t"); }
	    msg_send("#\ttotal\tgene");
	    $gb->{"legend_codon_compiler_$data"} = 1;
	}
	msg_send("\n\n");
	my $value;
	foreach(@keysAll){
	    $total += $usage{$_};
	    $value = sprintf((int($usage{$_}) - $usage{$_}) ? "%.3f" : "%d", $usage{$_});
	    $value = '-' if($data =~ /C2|C3|C4|R2|R3|R4/ && $tbox{substr($_, 0, 1)} == '');
	    msg_send("$value\t");
	}
	msg_send(scalar keys %usage, "\t$total\t$gene");
	if($omit){ chop $omit; msg_send("\t(omitting /$omit/)"); }
    }    
    if($output eq 'f'){
	open(FILE,">>$filename");
	unless($gb->{"legend_codon_compiler_$data"}){
	    if($data =~ /A/){
		print FILE "\nAmino acid (3-letter code),";
		foreach(@keysAll){ print FILE "$_1to3{$_},"; }
	    }
	    if($data =~ /[CR]/){
		if(1){ ##################################################
		    print FILE "\ndegeneracy,";
		    foreach(@keysAll){ print FILE "$gb->{degeneracy}->{substr($_, 0, 1)},"; }
		    print FILE "\nAmino acid (3-letter code),";
		    foreach(@keysAll){ print FILE "$_1to3{substr($_, 0, 1)},"; }
		    print FILE "\nAmino acid (1-letter code),";
		    foreach(@keysAll){ print FILE substr($_, 0, 1), ","; }
		    print FILE "\nDNA codon,";
		    foreach(@keysAll){ print FILE uc(substr($_, 1, 3)), ","; }
		    print FILE "\nRNA codon,";
		    foreach(@keysAll){
			my $codon = uc(substr($_, 1, 3));
			$codon =~ s/T/U/g;
			print FILE "$codon,";
		    }
		    print FILE "\nAmino acid (codon),";
		    foreach(@keysAll){
			my $codon = uc(substr($_, 1, 3));
			$codon =~ s/T/U/g;
			print FILE "$_1to3{substr($_, 0, 1)} ($codon),";
		    }
		} #######################################################
	    }
	    print FILE "\nkeys,";
	    foreach(@keysAll){ print FILE "$_,"; }
	    print FILE ",richness,total,gene,product";	    
	    $gb->{"legend_codon_compiler_$data"} = 1;
	}
    	print FILE "\n$id,";
	foreach(@keysAll){
	    $total += $usage{$_};
	    print FILE (exists $usage{$_}) ? "$usage{$_}," : "0,";
	}
	print FILE
	    ",", scalar keys %usage,
	    ",$total,$gene,",
	    $gb->{$id}->{product};
	if($omit){ chop $omit; print FILE "(omitting /$omit/)"; }
	close(FILE);
    }

    if($output eq 'f2'){
	open(FILE,">>$filename");
	foreach my $pos1 (qw(t c a g)){
	    print FILE "\n"; my %cnt;
	    foreach my $pos3 (qw(t c a g)){
		print FILE "\n";
		foreach my $pos2 (qw(t c a g)){
		    my $codon = $pos1.$pos2.$pos3;
		    my $aa = $Table{$codon};
		    my $CODON = uc($codon); $CODON =~ s/T/U/g;
		    print FILE $cnt{$aa} ? "   ," : "$_1to3{$aa},";
		    $cnt{$aa} ++ if($aa ne "/");
		    my $value = ($aa ne "/") ? sprintf("%.3f", $usage{$aa.$codon}) : "     ";
		    print FILE "$CODON,$value,,";
		}
	    }
	}
	print FILE "\n\n$gene,", $gb->{$id}->{product}, ",(used universal genetic code),,";

	# exception
	foreach(sort keys %usage){
	    next if($_ =~ /Agca|Agcc|Agcg|Agct|Ctgc|Ctgt|Dgac|Dgat|Egaa|Egag|Fttc|Fttt|Ggga|Gggc|Gggg|Gggt|Hcac|Hcat|Iata|Iatc|Iatt|Kaaa|Kaag|Lcta|Lctc|Lctg|Lctt|Ltta|Lttg|Matg|Naac|Naat|Pcca|Pccc|Pccg|Pcct|Qcaa|Qcag|Raga|Ragg|Rcga|Rcgc|Rcgg|Rcgt|Sagc|Sagt|Stca|Stcc|Stcg|Stct|Taca|Tacc|Tacg|Tact|Vgta|Vgtc|Vgtg|Vgtt|Wtgg|Ytac|Ytat/);
	    my $CODON = uc(substr($_, 1, 3)); $CODON =~ s/T/U/g;
	    my $value = sprintf((int($usage{$_}) - $usage{$_}) ? "%.3f" : "%d", $usage{$_});
	    print FILE "$_1to3{substr($_, 0, 1)},$CODON,$value,,";
	}
	close(FILE);
    }
    
    &_codon_usage_table(\%usage, -output=>$output, -filename=>"codon_usage_table_$id\_$data.png")
	if($output =~ /g|show/ && $id !~ /FEATURE/ && $data !~ /A/);

    return \%usage;
}


=head2 _codon_usage_table

 _codon_usage_table ver.20030401
 Author: Haruo Suzuki (haruo@g-language.org)
 Usage: NULL = &_codon_usage_table(pointer HASH);
 Options:
  -output    output option (default: 'show')
  -filename  output filename (default: 'codon_usage_table.png')
 Description: This program constructs codon use table
 History:
  20030401 minor change
  20010721 first release (Author: Koya Mori)

=cut

sub _codon_usage_table {
    &opt_default(output=>'show', filename=>'codon_usage_table.png');
    my @args = opt_get(@_);
    my $usage = shift @args;
    my $output = opt_val("output");
    my $filename = opt_val("filename");

    my $im = new GD::Image(500, 550);
    my $white = $im->colorAllocate(255, 255, 255);
    my $black = $im->colorAllocate(0, 0, 0);
    my $blue = $im->colorAllocate(0, 0, 255);
    my $red = $im->colorAllocate(255, 0, 0);
    my $yellow = $im->colorAllocate(200, 200, 0);
    my $green = $im->colorAllocate(0, 150, 0);

    my $x;
    my $y;
    foreach $x ((10,50,450,490)){ $im->line($x,10,$x,450,$black); }
    foreach $x ((150,250,350)){ $im->line($x,30,$x,450,$black); }
    $im->line(50,30,450,30,$black); 
    foreach $y ((10,50,150,250,350,450)){ $im->line(10,$y,490,$y,$black); }

    $im->string(gdSmallFont, 15, 25, "first", $black);
    $im->string(gdSmallFont, 233, 15, "second", $black);
    $im->string(gdSmallFont, 455, 25, "third", $black);

    $y = 95;
    foreach(qw(U C A G)){ $im->string(gdSmallFont,30,$y,$_,$black);$y += 100; }
    $x = 90;
    foreach(qw(U C A G)){ $im->string(gdSmallFont,$x,35,$_,$black);$x += 100; }
    $y = 65;
    foreach(1..4){ foreach(qw(U C A G)){ $im->string(gdSmallFont,470,$y,$_,$black);$y += 20; } $y += 20; }

    my ($codon, $p1st, $p2nd, $p3rd, $aa, $value, $color);
    foreach $codon (keys %Table){
	$p1st = substr($codon, 0, 1);
	$y = 65 if($p1st eq 't');
	$y = 165 if($p1st eq 'c');
	$y = 265 if($p1st eq 'a');
	$y = 365 if($p1st eq 'g');
	$p2nd = substr($codon, 1, 1);
	$x = 60 if($p2nd eq 't');
	$x = 160 if($p2nd eq 'c');
	$x = 260 if($p2nd eq 'a');
	$x = 360 if($p2nd eq 'g');
	$p3rd = substr($codon, 2, 1);
	$y += 20 if($p3rd eq 'c');
	$y += 40 if($p3rd eq 'a');
	$y += 60 if($p3rd eq 'g');
	$aa = $Table{$codon};
	if($codon eq 'taa'){ $value = 'Ochre'; } 
	elsif($codon eq 'tag'){ $value = 'Amber'; } 
	elsif($codon eq 'tga'){ $value = 'Opal'; } 
	else{
	    $value = $usage->{$aa.$codon};
	    $value = sprintf( (int($value) - $value) ? "%.3f" : "%d", $value );
	}

	$codon = uc($codon);
	$codon =~ s/T/U/g;
	if($aa =~ /[DE]/){ $color = $blue; }
	elsif($aa =~ /[RKH]/){ $color = $red; }
	elsif($aa =~ /[NQSTY]/){ $color = $yellow; }
	elsif($aa =~ /[AGVLIPFMWC]/){ $color = $green; }
	else { $color = $black; } 
	$im->string(gdSmallFont,$x,$y,"$_1to3{$aa} $codon $value",$color);
    }
    $im->string(gdSmallFont,20,465,"blue acidic",$blue);
    $im->string(gdSmallFont,100,465,"red basic",$red);
    $im->string(gdSmallFont,170,465,"yellow neutral & polar",$yellow);
    $im->string(gdSmallFont,315,465,"green neutral & hydrophobic",$green);
    
    $im->string(gdSmallFont,20,485,"exception",$black);
    my $h = 100;
    my $v = 485;
    foreach(sort keys %{$usage}){
	next if($_ =~ /Agca|Agcc|Agcg|Agct|Ctgc|Ctgt|Dgac|Dgat|Egaa|Egag|Fttc|Fttt|Ggga|Gggc|Gggg|Gggt|Hcac|Hcat|Iata|Iatc|Iatt|Kaaa|Kaag|Lcta|Lctc|Lctg|Lctt|Ltta|Lttg|Matg|Naac|Naat|Pcca|Pccc|Pccg|Pcct|Qcaa|Qcag|Raga|Ragg|Rcga|Rcgc|Rcgg|Rcgt|Sagc|Sagt|Stca|Stcc|Stcg|Stct|Taca|Tacc|Tacg|Tact|Vgta|Vgtc|Vgtg|Vgtt|Wtgg|Ytac|Ytat/);
	$codon = uc(substr($_, 1, 3));
	$codon =~ s/T/U/g;
	$value = sprintf((int($usage->{$_}) - $usage->{$_}) ? "%.3f" : "%d", $usage->{$_});
	$aa = substr($_, 0, 1);
	$im->string(gdSmallFont,$h,$v,"$_1to3{$aa} $codon $value",$black);
	$v += 20;
	if($v == 545){ $h += 100; $v = 485; }
    }

    mkdir('graph', 0777);
    open(OUT, ">graph/$filename");
    binmode OUT;
    print OUT $im->png;
    close(OUT);
    
    msg_gimv("graph/$filename") if($output eq 'show');
}


=head2 entropy_cu

 entropy_cu ver.20030401
 Author: Haruo Suzuki (haruo@g-language.org)
 Usage: NULL = &entropy_cu(pointer G instance);
 Options:
  -output    output option (default: 'stdout')
  -filename  output filename (default: "entropy_cu.csv")
  -id        ID of a group of genes or a single gene (default:'All')
  -translate 1 when translates using regular codon table (default: 0)
  -del_key   regular expression to delete amino acid and codon keys
             (default:'[^ACDEFGHIKLMNPQRSTVWYacgtU]')
 Description:
  This program calculates Shannon entropy functions of amino acid
  and codon usage, and inputs in the G instance.
 Informations stored in the G instance:
  $gb->{$id}->{Haau}: entropy of amino acid usage
  $gb->{$id}->{Hcu}: entropy of codon usage
  $gb->{$id}->{Dcu}: D value (Konopka, 1984)
  $gb->{$id}->{Dsyn}: Dsyn value (Konopka, 1984)
  $gb->{legend_entropy_cu}
 Reference: Konopka (1984) J.Theor.Biol. 107:697-705.
 Requirements: sub codon_compiler();

=cut

sub entropy_cu {
    &opt_default(output=>'stdout', filename=>"entropy_cu.csv", id=>'All', translate=>0, del_key=>'[^ACDEFGHIKLMNPQRSTVWYacgtU]');
    my @args = opt_get(@_);
    my $gb = opt_as_gb(shift @args);
    my $output = opt_val("output");
    my $filename = opt_val("filename");
    my $id = opt_val("id");
    my $translate = opt_val("translate");
    my $del_key = opt_val("del_key");
    my $aacu = &codon_compiler($gb, -output=>'n', -id=>$id, -data=>'A1C1',
			       -translate=>$translate, -del_key=>$del_key);
    my $gene = ($id =~ /FEATURE/) ? $gb->{$id}->{gene} : "{$id}";
    
    my ($ndaa, $Haau, $He, $ndc, $Hcu);
    foreach (keys %{$aacu}){
	if($_ =~ /^[A-Z]$/){
	    $ndaa ++;
	    $Haau += - $aacu->{$_} * log($aacu->{$_}) / log(2);
	    $He += - $aacu->{$_} * log($aacu->{$_} / $gb->{degeneracy}->{$_}) / log(2);
	}
	if($_ =~ /^[A-Z][a-z]{3}$/){
	    $ndc ++;
	    $Hcu += - $aacu->{$_} * log($aacu->{$_}) / log(2);
	}
    }
    
    if($ndaa){ # input in the G instance
	$gb->{$id}->{Haau} = sprintf "%.4f", $Haau;
	$gb->{$id}->{Hcu} = sprintf "%.4f", $Hcu;
	$gb->{$id}->{Dcu} = sprintf "%.4f", (6 - $Hcu) / (6 - $Haau);
	$gb->{$id}->{Dsyn} = sprintf "%.4f", ($He - $Hcu) / ($He - $Haau);
    }
    
    if($output eq 'stdout'){
	unless($gb->{"legend_entropy_cu"}){
	    
	    msg_send("\n\n",
		     "Shannon information theoretic computation of amino acid and codon usage\n",
		     " Laa: length in amino acids\n",
		     " ndaa: number of distinct amino acids\n",
		     " ndc: number of distinct codons\n",
		     " Haau: entropy of amino acid usage\n",
		     " Hcu: entropy of codon usage\n",
		     " Dcu: Relative deviation from equiprobability in codon usage\n",
		     " Dsyn: Relative deviation from equiprobability in synonymous codon usage\n",
		     "-----------------------------------------------------------------------\n",
		     "Laa\tndaa\tndc\tHaau\tHcu\tDcu\tDsyn\tgene\n",
		     "-----------------------------------------------------------------------\n");
	    $gb->{"legend_entropy_cu"} = 1;
	}
	msg_send(
		 $gb->{$id}->{tnc}, "\t",
		 "$ndaa\t$ndc\t",
		 $gb->{$id}->{Haau}, "\t",
		 $gb->{$id}->{Hcu}, "\t",
		 $gb->{$id}->{Dcu}, "\t",
		 $gb->{$id}->{Dsyn}, "\t",
		 $gene, "\n");
    }
    
    if($output eq 'f'){
	open(FILE,">>$filename");
	unless($gb->{"legend_entropy_cu"}){
	    print FILE "\n\n",
	    "Shannon information theoretic computation of amino acid and codon usage\n",
	    " Laa: length in amino acids\n",
	    " ndaa: number of distinct amino acids\n",
	    " ndc: number of distinct codons\n",
	    " Haau: entropy of amino acid usage\n",
	    " Hcu: entropy of codon usage\n",
	    " Dcu: Relative deviation from equiprobability in codon usage\n",
	    " Dsyn: Relative deviation from equiprobability in synonymous codon usage\n",
	    "Laa,ndaa,ndc,Haau,Hcu,Dcu,Dsyn,gene,product\n";
	    $gb->{"legend_entropy_cu"} = 1;
	}
	print FILE
	    $gb->{$id}->{tnc}, ",",
	    "$ndaa,$ndc,",
	    $gb->{$id}->{Haau}, ",",
	    $gb->{$id}->{Hcu}, ",",
	    $gb->{$id}->{Dcu}, ",",
	    $gb->{$id}->{Dsyn}, ",",
	    $gene, ",",
	    $gb->{$id}->{product}, "\n";
	close(FILE);
    }
}

    
=head2 entropy_scu

 entropy_scu ver.20030401
 Author: Haruo Suzuki (haruo@g-language.org)
 Usage: NULL = &entropy_scu(pointer G instance);
 Options:
  -output    output option (default: 'stdout')
  -filename  output filename (default: "entropy_scu.csv")
  -id        ID of a group of genes or a single gene (default:'All')
  -translate 1 when translates using regular codon table (default: 0)
  -del_key   regular expression to delete amino acid and codon keys
             (default:'[^ACDEFGHIKLMNPQRSTVWYacgtU]')
 Description:
  This program calculates Shannon entropy from information theory of
  synonymous codon usage, and inputs in the G instance.
 Informations stored in the G instance:
  $gb->{$id}->{Hs}: simple sum of entropy (Zeeberg, 2002)
  $gb->{$id}->{Hw}: weighted sum of entropy (Wang et al., 2001)
  $gb->{$id}->{Ew}: weighted sum of relative entropy
  $gb->{"legend_entropy_scu"}
 Reference:
  Wang et al. (2001) Mol.Biol.Evol. 18:792-800.
  Zeeberg (2002) Genome Res. 12(6):944-955.
 Requirements: sub codon_compiler();

=cut

sub entropy_scu {
    &opt_default(output=>'stdout', filename=>"entropy_scu.csv", id=>'All', translate=>0, del_key=>'[^ACDEFGHIKLMNPQRSTVWYacgtU]');
    my @args = opt_get(@_);
    my $gb = opt_as_gb(shift @args);
    my $output = opt_val("output");
    my $filename = opt_val("filename");
    my $id = opt_val("id");
    my $translate = opt_val("translate");
    my $del_key = opt_val("del_key");

    my $aacu = &codon_compiler($gb, -output=>'n', -id=>$id, -data=>'A1C2',
			       -translate=>$translate, -del_key=>$del_key);
    my $gene = ($id =~ /FEATURE/) ? $gb->{$id}->{gene} : "{$id}";
    
    my $aaseq = $gb->{$id}->{translation};
    my $nk6 = $aaseq =~ tr/LRS/LRS/;             # 6-fold degenerate aa
    my $nk4 = $aaseq =~ tr/AGPTV/AGPTV/;         # 4-fold degenerate aa
    my $nk2 = $aaseq =~ tr/CDEFHKNQY/CDEFHKNQY/; # 2-fold degenerate aa
    $gb->{$id}->{p46} = sprintf "%.4f", ($nk4 + $nk6) / (length($aaseq) - 1);
    $gb->{$id}->{pk2} = sprintf "%.4f", $nk2 / (length($aaseq) - 1);
    $gb->{$id}->{pk4} = sprintf "%.4f", $nk4 / (length($aaseq) - 1);
    $gb->{$id}->{pk6} = sprintf "%.4f", $nk6 / (length($aaseq) - 1);
    
    my ($ndaa, $ndc, $Hs, $Hw, $Ew);
    my %Hi; # entropy for each amino acid
    my %Ei; # relative entropy for each amino acid
    
    foreach (keys %{$aacu}){
	$ndaa ++ if(length($_) == 1);
	if(length($_) == 4){
	    $ndc ++;
	    $Hi{substr($_, 0, 1)} += - $aacu->{$_} * log($aacu->{$_}) / log(2);
	}
    }
    
    foreach (keys %Hi){
	$Ei{$_} = $Hi{$_} / ( log($gb->{degeneracy}->{$_}) / log(2) ) if($gb->{degeneracy}->{$_} > 1);
    }
    $gb->{$id}->{Hi} = \%Hi;
    $gb->{$id}->{Ei} = \%Ei;
    
    foreach (keys %Hi){
	$Hs += $Hi{$_};
	$Hw += $Hi{$_} * $aacu->{$_};
	if($Ei{$_} > 1){ $Ew = -0.1; }
	else { $Ew += $Ei{$_} * $aacu->{$_}; }
    }
    
    if($ndaa){ # input in the G instance
	$gb->{$id}->{Hs} = sprintf "%.3f", $Hs;
	$gb->{$id}->{Hw} = sprintf "%.4f", $Hw;
	$gb->{$id}->{Ew} = sprintf "%.4f", $Ew;
    }
    
    my @aakeys = qw(A C D E F G H I K L M N P Q R S T V W Y);
    if($output eq 'stdout'){
	unless($gb->{"legend_entropy_scu"}){
	    msg_send("\n\n",
		     "Shannon information theoretic computation of synonymous codon usage\n",
		     " H(***): entropy for amino acid '***'\n",
		     " E(***): relative entropy for amino acid '***'\n",
		     " Laa: length in amino acids\n",
		     " ndaa: number of distinct amino acids\n",
		     " ndc: number of distinct codons\n",
		     " Hs: simple sum of entropy\n",
		     " Hw: weighted sum of entropy\n",
		     " Ew: weighted sum of relative entropy\n");
	    
	    foreach(@aakeys){ msg_send("H($_1to3{$_})\t"); }
	    foreach(@aakeys){ msg_send("E($_1to3{$_})\t"); }
	    msg_send("Laa\tndaa\tndc\tHs\tHw\tEw\tgene\n");
	    $gb->{"legend_entropy_scu"} = 1;
	}	
	msg_send("\n");
	foreach(@aakeys){
	    if(exists $Hi{$_}){
		$Hi{$_} = sprintf "%.4f", $Hi{$_};
		msg_send("$Hi{$_}\t");
	    }
	    else { msg_send("absent\t"); }
	}
	foreach(@aakeys){
	    if(exists $Hi{$_}){ 
		if($gb->{degeneracy}->{$_} > 1){ 
		    $Ei{$_} = sprintf "%.4f", $Ei{$_};
		    msg_send("$Ei{$_}\t");
		}
		else { msg_send("undef\t"); }
	    }
	    else { msg_send("absent\t"); }
	}	
	msg_send($gb->{$id}->{tnc}, "\t",
		 "$ndaa\t$ndc\t",
		 $gb->{$id}->{Hs}, "\t",
		 $gb->{$id}->{Hw}, "\t",
		 $gb->{$id}->{Ew}, "\t",
		 "$gene\n");	
    }
    
    if($output eq 'f'){
	open(FILE,">>$filename");
	unless($gb->{"legend_entropy_scu"}){
	    print FILE "\n\n",
	    " H(***): entropy for amino acid '***'\n",
	    " E(***): relative entropy for amino acid '***'\n",
	    " Laa: length in amino acids\n",
	    " ndaa: number of distinct amino acids\n",
	    " ndc: number of distinct codons\n",
	    " Hs: simple sum of entropy\n",
	    " Hw: weighted sum of entropy\n",
	    " Ew: weighted sum of relative entropy\n";
	    
	    print FILE "\ndegeneracy,";
	    foreach(@aakeys){ print FILE "$gb->{degeneracy}->{$_},"; }
	    print FILE "\n3-letter abbreviations,";
	    foreach(@aakeys){ print FILE "$_1to3{$_},"; }
	    print FILE "\n1-letter abbreviations,";
	    foreach(@aakeys){ print FILE "$_,"; }
	    print FILE "\nID,";
	    foreach(@aakeys){ print FILE "H($_1to3{$_}),"; }
	    print FILE ",ID,";
	    foreach(@aakeys){ print FILE "E($_1to3{$_}),"; }
	    print FILE ",ID,";
	    foreach(@aakeys){ print FILE "w($_1to3{$_}),"; }
	    print FILE ",ID,Laa,ndaa,ndc,Hs,Hw,Ew,gene,product";
	    $gb->{"legend_entropy_scu"} = 1;
	}
	
	printf FILE "\n$id,";
	foreach(@aakeys){
	    if(exists $Hi{$_}){ print FILE "$Hi{$_},"; }
	    else { print FILE "absent,"; }
	}
	print FILE ",$id,";
	foreach(@aakeys){
	    if(exists $Hi{$_}){ 
		if($gb->{degeneracy}->{$_} == 1){ print FILE "undef,"; }
		else { print FILE  "$Ei{$_},"; }
	    }
	    else { print FILE "absent,"; }
	}
	print FILE ",$id,";
	foreach(@aakeys){ printf FILE ("$aacu->{$_},"); }
	print FILE ",$id,",
	$gb->{$id}->{tnc}, ",",
	"$ndaa,$ndc,",
	$gb->{$id}->{Hs}, ",",
	$gb->{$id}->{Hw}, ",",
	$gb->{$id}->{Ew}, ",",
	$gene, ",",
	$gb->{$id}->{product};
	close(FILE);
    }
}


=head2 enc

 enc ver.20030401
 Author: Haruo Suzuki (haruo@g-language.org)
 Usage: scalar = &enc(pointer G instance);
 Options:
  -id        ID of a group of genes or a single gene (default:'All')
  -translate 1 when translates using regular codon table (default: 0)
  -del_key   regular expression to delete amino acid and codon keys
             (default:'[^ACDEFGHIKLMNPQRSTVWYacgtU]')
 Description:
  This program calculates the effective number of codons (Nc)
 Reference: Wright F (1990) Gene 87:23-29.
 Requirements: sub codon_compiler();

=cut

sub enc {
    &opt_default(id=>'All', translate=>0, del_key=>'[^ACDEFGHIKLMNPQRSTVWYacgtU]');
    my @args = opt_get(@_);
    my $gb = opt_as_gb(shift @args);
    my $id = opt_val("id");
    my $translate = opt_val("translate");
    my $del_key = opt_val("del_key");
    
    my %tbox;     # total number of codons in each amino acid box
    my $aa;       # amino acid
    my %relative; # relative use of each codon
    my %F;        # homozygosity (F) for each aa
    my %cntaa;    # Number of members (aa) for each SF type in the gene
    my %numaa;    # Number of members (aa) for each SF type in all genes
    my %sumF;     # sum of F value in the SF type
    my %aveF;     # average F value in the SF type
    my $enc;      # effective number of codons
    
    my $count = &codon_compiler($gb, -output=>'n', -id=>$id, -data=>'C0', 
				-translate=>$translate, -del_key=>$del_key);

    foreach (keys %{$count}){ $tbox{substr($_, 0, 1)} += $count->{$_}; }
    foreach (keys %{$count}){
	$aa = substr($_, 0, 1);
	if($tbox{$aa} - 1 == 0){ delete $tbox{$aa}; }
	else { $relative{$_} = $count->{$_} / $tbox{$aa}; }
    }
    
    foreach (keys %relative){ $F{substr($_, 0, 1)} += $relative{$_} ** 2; }
    foreach $aa (keys %F){
	$F{$aa} = ($tbox{$aa} * $F{$aa} - 1) / ($tbox{$aa} - 1);
    }
    
    foreach $aa (keys %F){
	if($F{$aa} > 0.0000001){
	    $sumF{$gb->{degeneracy}->{$aa}} += $F{$aa};
	    $cntaa{$gb->{degeneracy}->{$aa}} ++;
	}
    }
    
    foreach (values %{$gb->{degeneracy}}){ $numaa{$_} ++; }
    foreach (keys %numaa){
	if($_ == 1){ $aveF{$_} = 1; }
	elsif($cntaa{$_} && $sumF{$_}){
	    $aveF{$_} = $sumF{$_} / $cntaa{$_};
	}
	elsif($_ == 3 && $sumF{2} && $sumF{4}){
	    $aveF{3} = ($sumF{2} / $cntaa{2} + $sumF{4} / $cntaa{4}) / 2;
	}
	else { $enc = -999; }
    }
    unless($enc){ foreach (keys %numaa){ $enc += $numaa{$_} / $aveF{$_}; }}
    return ($enc > 61) ? 61.00 : sprintf "%.2f", $enc;
}


=head2 cbi

 cbi ver.20030401
 Author: Haruo Suzuki (haruo@g-language.org)
 Usage: scalar = &cbi(pointer G instance);
 Options:
  -id        ID of a group of genes or a single gene (default:'All')
  -translate 1 when translates using regular codon table (default: 0)
  -del_key   regular expression to delete amino acid and codon keys
             (default:'[^ACDEFGHIKLMNPQRSTVWYacgtU]')
 Description:
  This program calculates the codon bias index (CBI)
 Reference: Morton BR (1993) J.Mol.Evol. 37:273-280.
 Requirements: sub codon_compiler();

=cut

sub cbi {
    &opt_default(id=>'All', translate=>0, del_key=>'[^ACDEFGHIKLMNPQRSTVWYacgtU]');
    my @args = opt_get(@_);
    my $gb = opt_as_gb(shift @args);
    my $id = opt_val("id");
    my $translate = opt_val("translate");
    my $del_key = opt_val("del_key");
    
    my $aacu = &codon_compiler($gb, -output=>'n', -id=>$id, -data=>'A1C4',
				  -translate=>$translate, -del_key=>$del_key);
    
    my %bias; # biases of individual amino acids
    my $cbi;
    foreach (keys %{$gb->{All}->{A1C4}}){
	my $aa = substr($_, 0, 1);
	$bias{$aa} += ($aacu->{$_} - 1) ** 2 / ($gb->{degeneracy}->{$aa} - 1) if($aacu->{$aa} && length($_) == 4 && $gb->{degeneracy}->{$aa} > 1);
    }
    foreach (keys %bias){ $cbi += $aacu->{$_} * $bias{$_}; }
    return sprintf "%.4f", $cbi;
}


=head2 icdi

 icdi ver.20030401
 Author: Haruo Suzuki (haruo@g-language.org)
 Usage: scalar = &icdi(pointer G instance);
 Options:
  -id        ID of a group of genes or a single gene (default:'All')
  -translate 1 when translates using regular codon table (default: 0)
  -del_key   regular expression to delete amino acid and codon keys
             (default:'[^ACDEFGHIKLMNPQRSTVWYacgtU]')
 Description:
  This program calculates the intrinsic codon deviation index (ICDI)
 Reference: Freire-Picos MA et al. (1994) Gene 139:43-49.
 Requirements: sub codon_compiler();

=cut

sub icdi {
    &opt_default(id=>'All', translate=>0, del_key=>'[^ACDEFGHIKLMNPQRSTVWYacgtU]');
    my @args = opt_get(@_);
    my $gb = opt_as_gb(shift @args);
    my $id = opt_val("id");
    my $translate = opt_val("translate");
    my $del_key = opt_val("del_key");

    my $rscu;    # relative synonymous codon usage (RSCU) values
    my %tbox;    # total in each amino acid box
    my %Sk;      # Sk values of individual amino acids
    my $icdi;    # intrinsic codon deviation index (ICDI)
    my $ndaa;    # number of distinct amino acids
    
    $rscu = &codon_compiler($gb, -output=>'n', -id=>$id, -data=>'C3', 
			    -translate=>$translate, -del_key=>$del_key);
    foreach (keys %{$gb->{All}->{C3}}){
	$tbox{substr($_, 0, 1)} += $rscu->{$_};
    }
    foreach (keys %{$gb->{All}->{C3}}){
	my $aa = substr($_, 0, 1);
	$Sk{$aa} += ($rscu->{$_} - 1) ** 2 if($tbox{$aa} > 1);
	# exclude absent or non-degenerate amino acids
    }
    #next unless(%Sk); # e.g. NSP1 in S_cerevisiae/X.gbk
    foreach (keys %Sk){
	my $k = $gb->{degeneracy}->{$_};
	$icdi += $Sk{$_} / ($k * ($k - 1));
	$ndaa ++;
    }
    return sprintf "%.4f", $icdi /= $ndaa if($ndaa);
}


=head2 fop

 fop ver.20030401
 Author: Haruo Suzuki (haruo@g-language.org)
 Usage: NULL = &fop(pointer G instance);
 Options:
  -output    output option (default: 'stdout')
  -filename  output filename (default: 'fop.csv')
  -binary    pointer HASH of vectors of binary number, 1 or 0 refering to
             the optimal or nonoptimal codon. By default, four amino acids
             (Ile, Asn, Phe, and Tyr), where the optimal codons seem to be
              the same in all species, are used.
  -translate 1 when translates using regular codon table (default: 0)
 Description:
  This program calculates the frequency of optimal codons (Fop), and
  inputs in the G instance. i.e. Fop values will be accessible at 
  $gb->{"FEATURE$i"}->{fop}
 Reference:
  Ikemura (1981) J.Mol.Biol. 151:389-409.
  Ikemura (1985) Mol.Biol.Evol. 2(1):13-34.
  Kanaya et al. (1999) Gene 238:143-155.
 Requirements: none.

=cut

sub fop {
    &opt_default(output=>'stdout', filename=>'fop.csv', translate=>0);
    my @args = opt_get(@_);
    my $gb = opt_as_gb(shift @args);
    my $output = opt_val("output");
    my $filename = opt_val("filename");
    my $translate = opt_val("translate");

    # S.cerevisiae
    my %Sc = ('Lcta'=>0,'Lctc'=>0,'Lctg'=>0,'Lctt'=>0,'Ltta'=>0,'Lttg'=>1,
	      'Raga'=>1,'Ragg'=>0,'Rcga'=>0,'Rcgc'=>0,'Rcgg'=>0,'Rcgt'=>0, 
	      'Sagc'=>0,'Sagt'=>0,'Stca'=>0,'Stcc'=>1,'Stcg'=>0,'Stct'=>1,
	      'Agca'=>0,'Agcc'=>1,'Agcg'=>0,'Agct'=>1,
	      'Ggga'=>0,'Gggc'=>1,'Gggg'=>0,'Gggt'=>1,
	      'Pcca'=>0,'Pccc'=>0,'Pccg'=>0,'Pcct'=>0,
	      'Taca'=>0,'Tacc'=>1,'Tacg'=>0,'Tact'=>1,
	      'Vgta'=>0,'Vgtc'=>1,'Vgtg'=>0,'Vgtt'=>1,
	      'Iata'=>0,'Iatc'=>1,'Iatt'=>1,
	      'Naac'=>1,'Naat'=>0,
	      'Dgac'=>1,'Dgat'=>0,
	      'Ctgc'=>0,'Ctgt'=>0,
	      'Qcaa'=>0,'Qcag'=>0,
	      'Egaa'=>1,'Egag'=>0,
	      'Hcac'=>1,'Hcat'=>0,
	      'Kaaa'=>0,'Kaag'=>1,
	      'Fttc'=>1,'Fttt'=>0, 
	      'Ytac'=>1,'Ytat'=>0);
  
    # E.coli
    my %Ec = ('Lcta'=>0,'Lctc'=>0,'Lctg'=>1,'Lctt'=>0,'Ltta'=>0,'Lttg'=>0,
	      'Raga'=>0,'Ragg'=>0,'Rcga'=>0,'Rcgc'=>1,'Rcgg'=>0,'Rcgt'=>1, 
	      'Sagc'=>0,'Sagt'=>0,'Stca'=>0,'Stcc'=>0,'Stcg'=>0,'Stct'=>0,
	      'Agca'=>1,'Agcc'=>0,'Agcg'=>1,'Agct'=>1,
	      'Ggga'=>0,'Gggc'=>1,'Gggg'=>0,'Gggt'=>1,
	      'Pcca'=>0,'Pccc'=>0,'Pccg'=>1,'Pcct'=>0,
	      'Taca'=>0,'Tacc'=>1,'Tacg'=>0,'Tact'=>1, 
	      'Vgta'=>1,'Vgtc'=>0,'Vgtg'=>1,'Vgtt'=>1, # Ikemura (1985)
	      'Vgta'=>1,'Vgtc'=>0,'Vgtg'=>0,'Vgtt'=>1, # Kanaya et al.(1999)
	      'Iata'=>0,'Iatc'=>1,'Iatt'=>0,
	      'Naac'=>1,'Naat'=>0,
	      'Dgac'=>1,'Dgat'=>0,
	      'Ctgc'=>0,'Ctgt'=>0,
	      'Qcaa'=>0,'Qcag'=>1,
	      'Egaa'=>1,'Egag'=>0,
	      'Hcac'=>1,'Hcat'=>0,
	      'Kaaa'=>1,'Kaag'=>0,
	      'Fttc'=>1,'Fttt'=>0, 
	      'Ytac'=>1,'Ytat'=>0);

    my %four = ('Iatc'=>1,'Iatt'=>0, # 'Iata' is ignored
		'Naac'=>1,'Naat'=>0,
		'Fttc'=>1,'Fttt'=>0, 
		'Ytac'=>1,'Ytat'=>0);
        
    my $binary = opt_val("binary") ? opt_val("binary") : \%four;
    my %degeneracy;
    foreach (keys %{$binary}){ $degeneracy{substr($_, 0, 1)} ++; }
    
    if($output eq 'stdout'){
	msg_send("\n\n",
		 "optimal (1) and nonoptimal (0) codons\n",
		 "-------------------------------------\n"
		 );
	foreach (sort keys %$binary){ msg_send("$_\t"); }
	print "\n";
	foreach (sort keys %$binary){ msg_send("$binary->{$_}\t"); }
	msg_send("\n\n",
		 "Frequency of optimal codons (Fop; ratio of optimal codons to synonymous codons)\n",
		 " Laa: length in amino acids\n",
		 " Ls: length in silent sites (number of synonymous codons)\n",
		 "--------------------------------------------\n",
		 "Laa\tLs\tstart\tstop\tFop\tgene\n",
		 "--------------------------------------------\n");
    }
    if($output eq 'f'){
	open(FILE,">>$filename");
	print FILE
	    "Frequency of optimal codons (Fop; ratio of optimal codons to synonymous codons)\n",
	    " Laa: length in amino acids\n",
	    " Ls: length in silent sites (number of synonymous codons)\n",
	    "Laa,Ls,start codon,stop codon,Fop,gene,product\n";
    }

    foreach my $cds ($gb->cds()){
	#my $seq = substr(lc($gb->get_geneseq($cds)), $gb->{$cds}->{codon_start} - 1 + 3, -3);
	my $seq = substr($gb->get_geneseq($cds), $gb->{$cds}->{codon_start} - 1 + 3, -3);
	my $aaseq;
	my $i;

	if($translate){
	    while($i <= length($seq) - 3){
		my $codon = substr($seq, $i, 3);
		$aaseq .= ($codon =~ /[^acgt]/) ? '?' : $Table{$codon};
		$i += 3;
	    }
	}
	else { $aaseq = substr($gb->{$cds}->{translation}, 1); }

	my $Laa = length($aaseq);
	my $Ls;  # length in silent sites
	my $fop; # frequency of optimal codons
	
	while($aaseq){
	    my $aa = substr($aaseq, 0, 1);
	    substr($aaseq, 0, 1) = '';
	    my $codon = substr($seq, $i, 3);
	    if($degeneracy{$aa} > 1 && exists $binary->{$aa.$codon}){
		$Ls ++;
		$fop += $binary->{$aa.$codon};
	    }
	    $i += 3;
	}
	$gb->{$cds}->{fop} = sprintf "%.4f", $fop / $Ls if($Ls > 1);

	if($output eq 'stdout'){
	    msg_send("$Laa\t$Ls\t",
		     $gb->startcodon($cds), "\t",
		     $gb->stopcodon($cds), "\t",
		     $gb->{$cds}->{fop}, "\t",
		     $gb->{$cds}->{gene}, "\n");
	}
	if($output eq 'f'){
	    print FILE
		"$Laa,$Ls,",
		$gb->startcodon($cds), ",",
		$gb->stopcodon($cds), ",",
		$gb->{$cds}->{fop}, ",",
		$gb->{$cds}->{gene}, ",",
		$gb->{$cds}->{product}, "\n";
	}
    }
    close(FILE);
}


=head2 w_value

 w_value ver.20030401
 Author: Haruo Suzuki (haruo@g-language.org)
 Usage: pointer HASH_w_values = &w_value(pointer G instance); 
 Options:
  -in  regular expression to include genes in a reference set
       a reference set in several studies are in-built
       1: Nakamura & Tabata, 2: Sharp & Li, 3: Sakai et al.
       (default: 1) 
  -ex  regular expression to exclude genes from a reference set
       (default: '[Mm]itochondrial')
 Description:
  Calculates the W value necessary for CAI analysis.
  Returned value is a pointer HASH of W values.
 Reference:
  Sharp and Li (1987) Nucleic Acids Res. 15:1281-1295.
  Nakamura and Tabata (1997) Microb.Comp.Genomics 2:299-312.
  Sakai et al. (2001) J.Mol.Evol. 52:164-170.
 History:
  20030401 addition of -in & -out option
  20010830 first release (Author: Kazuharu "Gaou" Arakawa)
 Requirements: none.

=cut

sub w_value {
    &opt_default(output=>'stdout', filename=>"w_value.csv",
		 in=>'ribosomal.*protein', ex=>'[Mm]itochondrial');
    my @args=opt_get(@_);
    my $gb = opt_as_gb(shift @args);
    my $output = opt_val("output");
    my $filename = opt_val("filename");
    my $include;
    my $exclude = opt_val("ex");
    
    #&msg_error("CAI bases its measure on reference codon usage, i.e. usually that of ribosomoal proteins, and therefore is not usually applicable to organisms other than prokaryotes.") if($gb->{HEADER} !~ /Archaea|Bacteria/);

    if(opt_val("in") == 1){ $include = 'ribosomal.*protein'; }
    elsif(opt_val("in") == 2){ $include = 'ribosomal.*protein|outer membrane protein|^elongation factor'; }
    elsif(opt_val("in") == 3){ $include = 'ribosom.*protein|outer.*membrane|elongation|heat.*shock|RNA.*polymerase.*subunit'; }    
    else{ $include = opt_val("in"); }
    
    if($output eq 'stdout'){
	msg_send("\n",
		 "Reference set of highly expressed genes\n",
		 "-----------------------------------------\n",
		 "length\tgene\tproduct\n",
		 "-----------------------------------------\n");
    }
    if($output eq 'f'){
	open(FILE,">>$filename");
	print FILE 
	    "Reference set of highly expressed genes\n",
	    "length,gene,product\n";
    }

    my $aaseq;
    my $seq;
    my $aa;
    my $codon;
    my %count;
    my $i;
    my %max;
    my %w_val;
    my $cnt;

    foreach my $cds ($gb->cds()){
	next if ($gb->{$cds}->{note} !~ /$include/i &&
		 $gb->{$cds}->{product} !~ /$include/i); 
	next if ($gb->{$cds}->{product} =~ /$exclude/);

	$cnt ++;
	$aaseq = substr($gb->{$cds}->{translation}, 1);
	$seq = substr(lc($gb->get_geneseq($cds)), $gb->{$cds}->{codon_start} - 1 + 3, -3);
	while($aaseq){
	    $aa = substr($aaseq, 0, 1);
	    substr($aaseq, 0, 1) = '';
	    $codon = substr($seq, $i, 3);
	    $count{$aa.$codon} ++;
	    $i += 3;
	}
	$i = 0;

	if($output eq 'stdout'){
	    msg_send(length($gb->{$cds}->{translation}) - 1, "\t",
		     $gb->{$cds}->{gene}, "\t", 
		     $gb->{$cds}->{product}, "\n");
	}
	if($output eq 'f'){
	    print FILE 
		length($gb->{$cds}->{translation}) - 1, ",",
		$gb->{$cds}->{gene}, ",",
		$gb->{$cds}->{product}, "\n";
	}
    }

    #die("\nCould not define w values. No reference proteins found.") unless($cnt);
    &msg_error("\ncai: No reference proteins found (could not define W values).") unless($cnt);
    
    foreach (keys %count){
	$aa = substr($_, 0, 1);
	$max{$aa} = $count{$_} if($count{$_} > $max{$aa});
    }
    foreach (keys %count){
	$w_val{$_} = sprintf "%.4f", $count{$_} / $max{substr($_, 0, 1)};
    }
    
    if($output eq 'stdout'){
	msg_send("\nRelative adaptiveness (W) of each codon\n",
		 "-----------------------------------------\n",
		 "aa\tcodon\tW value\n",
		 "-----------------------------------------\n");
	foreach (sort keys %count){
	    $aa = substr($_, 0, 1);
	    $codon = substr($_, 1, 3);
	    msg_send("$aa\t$codon\t$w_val{$_}\n");
	}
    }
    if($output eq 'f'){
	print FILE
	    "\nRelative adaptiveness (W) of each codon\n",
	    "amino acid,codon,W value\n";
	foreach (sort keys %count){
	    $aa = substr($_, 0, 1);
	    $codon = substr($_, 1, 3);
	    print FILE "$aa,$codon,$w_val{$_}\n";
	}
	close(FILE);
    }

    return \%w_val;
}


=head2 cai

 cai ver.20030401
 Author: Haruo Suzuki (haruo@g-language.org)
 Usage: NULL = &cai(pointer G instance); 
 Options:
  -output      output option (default: stdout)
  -filename    output filename (default: "cai.csv")
  -translate   1 when translates using regular codon table (default: 0)
  -w_output    output option for W value (default: stdout)
  -w_filename  filename to output the W values (default: "w_value.csv")
  -w_values    pointer HASH of W values used in CAI calculation
  -w_absent    W value of codons absent from a reference set (default: "-") 
               a value of 0.5 was suggested by Sharp and Li (1987)
               "-" when excludes such codons from the calculation
 Description:
  This program calculates codon adaptation index (CAI) for each genes, 
  and inputs in the G instance. i.e. CAI values will be accessible at
  $gb->{"FEATURE$i"}->{cai};
 History:
  20030401 addition of -w_absent option
  20010830 first release (Author: Kazuharu "Gaou" Arakawa)
 Requirements: sub w_value()

=cut

sub cai {
    &opt_default(output=>'stdout', filename=>"cai.csv", translate=>0,
		 w_output=>'stdout', w_filename=>"w_value.csv", w_absent=>"-");
    my @args = opt_get(@_);
    my $gb = opt_as_gb(shift @args);
    my $output = opt_val("output");
    my $filename = opt_val("filename");
    my $translate = opt_val("translate");
    my $w_output = opt_val("w_output");
    my $w_fname = opt_val("w_filename");
    my $w_abs = opt_val("w_absent");
    my $w_val;

    $w_output = $output if($w_output eq 'stdout' && $output ne 'stdout');

    if(opt_val("w_values")){
	$w_val = opt_val("w_values");
    }else{
	$w_val = &w_value($gb, -output=>$w_output, -filename=>$w_fname);
    }
    
    if($output eq 'stdout'){
	msg_send("\n",
		 "Codon Adaptation Index (CAI; geometric mean of W values)\n",
		 "--------------------------------------------------------\n",
		 "length\tstart\tstop\tCAI\tgene\n",
		 "--------------------------------------------------------\n");
    }
    if($output eq 'f'){
	open(FILE,">>$filename");
	print FILE
	    "Codon Adaptation Index (CAI; geometric mean of W values)\n",
	    "length,start codon,stop codon,CAI,gene,product\n";
    }
    
    foreach my $cds ($gb->cds()){
	my $seq = substr(lc($gb->get_geneseq($cds)), $gb->{$cds}->{codon_start} - 1 + 3, -3); # excluding start and stop codons
	my $aaseq;
	my $i;
	my $codon;
	
	if($translate){
	    while($i <= length($seq) - 3){
		$codon = substr($seq, $i, 3);
		$aaseq .= ($codon =~ /[^acgt]/) ? '?' : $Table{$codon};
		$i += 3;
	    }
	    $i = 0;
	}
	else { $aaseq = substr($gb->{$cds}->{translation}, 1); }
	
	my $cai;
	my $L;
	for($i = 0; $i * 3 <= length($seq) - 3; $i ++){
	    my $aa = substr($aaseq, $i, 1);
	    $codon = substr($seq, $i * 3, 3);
	    if($w_val->{$aa.$codon} > 0){ $cai += log($w_val->{$aa.$codon}); }
	    elsif($w_abs){ next if($w_abs eq '-'); $cai += log($w_abs); }
	    else { $cai = 999; last; }
	    $L ++;
	}
	$gb->{$cds}->{cai} = ($cai == 999) ? 0 : sprintf "%.4f", exp($cai/$L) if($L);

	if($output eq 'stdout'){
	    msg_send("$L\t",
		     $gb->startcodon($cds), "\t",
		     $gb->stopcodon($cds), "\t",
		     $gb->{$cds}->{cai}, "\t",
		     $gb->{$cds}->{gene}, "\n");
	}
	if($output eq 'f'){
	    print FILE
		"$L,",
		$gb->startcodon($cds), ",",
		$gb->stopcodon($cds), ",",
		$gb->{$cds}->{cai}, ",",
		$gb->{$cds}->{gene}, ",",
		$gb->{$cds}->{product}, "\n";
	}
    }
    close(FILE);
}


=head2 phx

 phx ver.20030401
 Author: Haruo Suzuki (haruo@g-language.org)
 Usage: NULL = &phx(pointer G instance);
 Options:
  -output    output option (default: 'stdout')
  -filename  output filename (default: "phx.csv")
  -usage     pointer HASH of vectors of amino acid and codon usage
             of a set of highly expressed genes. By default,
             the ribosomal protein genes are used.
  -translate 1 when translates using regular codon table (default: 0)
  -del_key   regular expression to delete amino acid and codon keys
             (default:'[^ACDEFGHIKLMNPQRSTVWYacgtU]')
 Description:
  Calculates codon usage differences between gene classes for 
  characterizing predicted highly expressed (PHX) genes, and 
  inputs in the G instance. e.g. predicted expression level,  
  E(g), will be accessible at $gb->{"FEATURE$i"}->{pel}
 Informations stored in the G instance:
  $gb->{"FEATURE$i"}->{cudC}: B(g|C)
  $gb->{"FEATURE$i"}->{cudG}: B(g|G) 
  $gb->{"FEATURE$i"}->{pel}: E(g) [B(g|C) / B(g|G)]
  $gb->{"FEATURE$i"}->{phx}: 1 if a gene is PHX [i.e. E(g) > 1.05]
 References:
  Karlin and Mrazek (2000) J.Bacteriol. 182(18):5238-5250.
  Jansen et al. (2003) Nucleic Acids Res. 31(8):2242-2251.
 Requirements: sub codon_compiler()

=cut

sub phx {
    &opt_default(output=>'stdout', filename=>"phx.csv", usage=>"",
		 translate=>0, del_key=>'[^ACDEFGHIKLMNPQRSTVWYacgtU]');
    my @args = opt_get(@_);
    my $gb = opt_as_gb(shift @args);
    my $output = opt_val("output");
    my $filename = opt_val("filename");
    my $translate = opt_val("translate");
    my $del_key = opt_val("del_key");
    my $refset = opt_val("usage");
    
    my $cds;
    my $cnt;
    unless($refset){
	foreach $cds ($gb->cds()){
	    $gb->{$cds}->{RefSet} = ( ($gb->{$cds}->{product} =~ /ribosomal.*protein/i) ? 1 : 0 ); 
	    $cnt += $gb->{$cds}->{RefSet};
	}
    }
    if($cnt){
	$refset = &codon_compiler($gb, -output=>'n', -id=>'RefSet', -data=>'A1C2', -del_key=>$del_key);
    }
    else {
	&msg_error("\nphx: No reference proteins found.");
	$refset = &codon_compiler($gb, -output=>'n', -data=>'A1C2', -del_key=>$del_key);
    }
    
    if($output eq 'stdout'){
	msg_send("\n\n",
		 "Predicted highly expressed (PHX) genes\n",
		 " B(g|C): codon usage difference relative to collection of all genes\n",
		 " B(g|G): codon usage difference relative to a set of highly expressed genes\n",
		 " E(g): predicted expression level [B(g|C) / B(g|G)]\n",
		 " PHX = 1 if E(g) > 1.05\n",
		 "------------------------------------------------------------\n",
		 "length\tstart\tstop\tB(g|C)\tB(g|G)\tE(g)\tPHX\tgene\n",
		 "------------------------------------------------------------\n");
    }
    if($output eq 'f'){
	open(FILE,">>$filename");
	print FILE
	    "Predicted highly expressed (PHX) genes\n",
	    " B(g|C): codon usage difference relative to collection of all genes\n",
	    " B(g|G): codon usage difference relative to a set of highly expressed genes\n",
	    " E(g): predicted expression level [B(g|C) / B(g|G)]\n",
	    " PHX = 1 if E(g) > 1.05\n",
	    "length,start codon,stop codon,B(g|C),B(g|G),E(g),PHX,gene,product\n";
    }

    foreach $cds ($gb->cds()){
	my $aacu = &codon_compiler($gb, -output=>'n', -id=>$cds, -data=>'A1C2', -translate=>$translate, -del_key=>$del_key);

	# codon usage difference (cud) between gene classes
	my %boxC;
	my %boxG;
	my $cudC; # cud relative to collection of all genes
	my $cudG; # cud relative to a set of highly expressed genes
	foreach (keys %{$gb->{All}->{A1C2}}){
	    if(length($_) == 4){
		my $aa = substr($_, 0, 1);
		$boxC{$aa} += abs($aacu->{$_} - $gb->{All}->{A1C2}->{$_});
		$boxG{$aa} += abs($aacu->{$_} - $refset->{$_});
	    }
	}
	foreach (keys %boxC){
	    $cudC += $aacu->{$_} * $boxC{$_};
	    $cudG += $aacu->{$_} * $boxG{$_};
	}
	if($cudG){
	    $gb->{$cds}->{cudC} = sprintf "%.4f", $cudC;
	    $gb->{$cds}->{cudG} = sprintf "%.4f", $cudG;
	    $gb->{$cds}->{pel} = sprintf "%.4f", $cudC / $cudG;
	    $gb->{$cds}->{phx} = (($gb->{$cds}->{pel} > 1.05) ? 1 : 0);
	}

	if($output eq 'stdout'){
	    msg_send(length(substr($gb->{$cds}->{translation}, 1)), "\t",
		     $gb->startcodon($cds), "\t",
		     $gb->stopcodon($cds), "\t",
		     $gb->{$cds}->{cudC}, "\t",
		     $gb->{$cds}->{cudG}, "\t",
		     $gb->{$cds}->{pel}, "\t",
		     $gb->{$cds}->{phx}, "\t",
		     $gb->{$cds}->{gene}, "\n");
	}
	if($output eq 'f'){
	    print FILE
		length(substr($gb->{$cds}->{translation}, 1)), ",",
		$gb->startcodon($cds), ",",
		$gb->stopcodon($cds), ",",
		$gb->{$cds}->{cudC}, ",",
		$gb->{$cds}->{cudG}, ",",
		$gb->{$cds}->{pel}, ",",
		$gb->{$cds}->{phx}, ",",
		$gb->{$cds}->{gene}, ",",
		$gb->{$cds}->{product}, "\n";
	}
    }
    close(FILE);
}


=head2 cui

 cui ver.20031001
 Author: Haruo Suzuki (haruo@g-language.org)
 Usage: NULL = &cui(pointer G instance);
 Options:
  -output    output option (default: 'stdout')
  -filename  output filename (default: 'cui.csv')
  -list      pointer ARRAY of list of codon usage indices.
    ['Hcu','Dcu','Dsyn','Hs','Hw','Ew','enc','cbi','icdi','cai','phx']
 Description:
  Calculates indices of codon usage, and inputs in the G instance.
 Requirements:
  sub entropy_cu; sub entropy_scu; sub enc; sub cbi; sub icdi;
  sub fop; sub w_value; sub cai; sub phx;

=cut

sub cui {
    &opt_default(output=>'stdout',filename=>'cui.csv',list=>['Ew','cai','gene']);
    my @args = opt_get(@_);
    my $gb = opt_as_gb(shift @args);
    my $output = opt_val("output");
    my $filename = opt_val("filename");
    my $list = opt_val("list");
    my %func;
    foreach(@$list){
	if($_ =~ /Hcu|Dcu|Dsyn/){ $func{entropy_cu} ++; }
	elsif($_ =~ /Hs|Hw|Ew/){ $func{entropy_scu} ++; }
	elsif($_ =~ /enc|cbi|icdi|fop|cai|phx/){ $func{$_} ++; } 
    }
    
    foreach ($gb->cds()){
	&entropy_cu($gb, -output=>'n', -id=>$_) if($func{entropy_cu});
	&entropy_scu($gb, -output=>'n', -id=>$_) if($func{entropy_scu});
	$gb->{$_}->{enc} = &enc($gb, -id=>$_) if($func{enc});
	$gb->{$_}->{cbi} = &cbi($gb, -id=>$_) if($func{cbi});
	$gb->{$_}->{icdi} = &icdi($gb, -id=>$_) if($func{icdi});
    }
    &cai($gb, -output=>'n', -w_output=>'n') if($func{cai});
    &phx($gb, -output=>'n') if($func{phx});
    
    my $cds;
    if($output eq 'stdout'){
	msg_send("\n\nIndices of codon usage\n");
	foreach(@$list){ msg_send("$_\t"); }
	foreach $cds ($gb->cds()){
	    msg_send("\n");
	    foreach(@$list){ msg_send($gb->{$cds}->{$_}, "\t"); }
	}
    }
    if($output eq 'f'){
	open(FILE,">>$filename");
	print FILE "Indices of codon usage\n";
	foreach(@$list){ print FILE "$_,"; }
	foreach $cds ($gb->cds()){
	    print FILE "\n";
	    foreach(@$list){ print FILE "$gb->{$cds}->{$_},"; }
	}
    }
    close(FILE);
}







=head2 aaui

 aaui ver.20030401
 Author: Haruo Suzuki (haruo@g-language.org)
 Usage: NULL = &aaui(pointer G instance);
 Options:
  -output    output option (default: 'stdout')
  -filename  output filename (default: 'aaui.csv')
  -id        ID of a group of genes or a single gene (default:'All')
 Description:
  Calculates indices of amino acid usage (excluding formylmethionine),
  and inputs in the G instance.
 Informations stored in the G instance:
  $gb->{$id}->{ndaa}: number of distinct amino acids
  $gb->{$id}->{Haau}: entropy of amino acid usage
  $gb->{$id}->{mmw}: mean molecular weight
  $gb->{$id}->{gravy}: mean hydropathic indices of each amino acid
  $gb->{$id}->{aroma}: relative frequency of aromatic amino acids
  $gb->{$id}->{acidic}: relative frequency of acidic amino acids
  $gb->{$id}->{basic}: relative frequency of basic amino acids
  $gb->{$id}->{neutral}: relative frequency of neutral amino acids
  $gb->{$id}->{polar}: relative frequency of polar amino acids
  $gb->{legend_aaui};
 Reference: Lobry and Gautier (1994) Nucleic Acids Res. 22:3174-3180.
 Requirements: none.

=cut


sub aaui {
    &opt_default(output=>'stdout', filename=>'aaui.csv', id=>'All');
    my @args = opt_get(@_);
    my $gb = opt_as_gb(shift @args);
    my $output = opt_val("output");
    my $filename = opt_val("filename");
    my $id = opt_val("id");
    
    my $aaseq;
    if($id =~ /FEATURE/){ # for a single gene
	$aaseq = substr($gb->{$id}->{translation}, 1); # excluding formylMet
    }
    else { # for a group of genes
	foreach my $cds ($gb->cds()){
	    next if( $id ne 'All' && !($gb->{$cds}->{$id}) );
	    $aaseq .= substr($gb->{$cds}->{translation}, 1);
	}
    }

    my $Laa = $aaseq =~ tr[ACDEFGHIKLMNPQRSTVWY][ACDEFGHIKLMNPQRSTVWY];
    if($aaseq){
	my %count;
	$count{A} = $aaseq =~ tr/A/A/;
	$count{C} = $aaseq =~ tr/C/C/;
	$count{D} = $aaseq =~ tr/D/D/;
	$count{E} = $aaseq =~ tr/E/E/;
	$count{F} = $aaseq =~ tr/F/F/;
	$count{G} = $aaseq =~ tr/G/G/;
	$count{H} = $aaseq =~ tr/H/H/;
	$count{I} = $aaseq =~ tr/I/I/;
	$count{K} = $aaseq =~ tr/K/K/;
	$count{L} = $aaseq =~ tr/L/L/;
	$count{M} = $aaseq =~ tr/M/M/;
	$count{N} = $aaseq =~ tr/N/N/;
	$count{P} = $aaseq =~ tr/P/P/;
	$count{Q} = $aaseq =~ tr/Q/Q/;
	$count{R} = $aaseq =~ tr/R/R/;
	$count{S} = $aaseq =~ tr/S/S/;
	$count{T} = $aaseq =~ tr/T/T/;
	$count{V} = $aaseq =~ tr/V/V/;
	$count{W} = $aaseq =~ tr/W/W/;
	$count{Y} = $aaseq =~ tr/Y/Y/;
	
	my ($ndaa, $Haau, $mmw, $hydro);
	foreach (qw(A C D E F G H I K L M N P Q R S T V W Y)){
	    next if($count{$_} == 0);
	    $ndaa ++;
	    $Haau += - $count{$_}/$Laa * log($count{$_}/$Laa) / log(2);
	    $mmw += $count{$_} * $Mwt{$_};
            $hydro += $count{$_} * $KyteDoolittle{$_};
            #if($index == 1){ $hydro += $count{$_} * $KyteDoolittle{$_}; }
            #elsif($index == 2){ $hydro += $count{$_} * $HoppWoods{$_}; }
            #elsif($index == 3){ $hydro += $count{$_} * $Parkeretal{$_}; }
        }
        $gb->{$id}->{Laa} = $Laa;
	$gb->{$id}->{ndaa} = $ndaa;
        $gb->{$id}->{Haau} = sprintf "%.4f", $Haau;
        $gb->{$id}->{mmw} = sprintf "%.2f", $mmw / $Laa;
	$gb->{$id}->{gravy} = sprintf "%+.4f", $hydro / $Laa;
	$gb->{$id}->{aroma} = sprintf "%.4f", ($count{F} + $count{Y} + $count{W}) / $Laa;
	$gb->{$id}->{CMHFYW} = sprintf "%.4f", ($count{C} + $count{M} + $count{H} + $count{F} + $count{Y} + $count{W}) / $Laa; # these six residues have in common the property of being the preferential targets of reactive oxygen species and can act as sinks for radical fluxes through electron tranfer between amino acids in the protein
	$gb->{$id}->{acidic} = sprintf "%.4f", ($count{D} + $count{E}) / $Laa;
	$gb->{$id}->{basic} = sprintf "%.4f", ($count{R} + $count{K} + $count{H}) / $Laa;
	$gb->{$id}->{neutral} = sprintf "%.4f", ($count{N} + $count{Q} + $count{S} + $count{T} + $count{Y}) / $Laa;
	$gb->{$id}->{polar} = $gb->{$id}->{acidic} + $gb->{$id}->{basic} + $gb->{$id}->{neutral};
	#$gb->{$id}->{polar} = sprintf "%.4f", ($count{D} + $count{E} + $count{R} + $count{K} + $count{H} + $count{N} + $count{Q} + $count{S} + $count{T} + $count{Y}) / $Laa;
	#$gb->{$id}->{nonpolar} = sprintf "%.4f", ($count{A} + $count{G} + $count{V} + $count{L} + $count{I} + $count{P} + $count{F} + $count{M} + $count{W} + $count{C}) / $Laa;
    }

    my $gene = ($id =~ /FEATURE/) ? $gb->{$id}->{gene} : "{$id}";
    if($output eq 'stdout'){
	unless($gb->{legend_aaui}){
	    msg_send("\n\n",
		     "Indices of amino acid usage (excluding formylmethionine)\n",
		     " Laa: length in amino acids\n",
		     " ndaa: number of distinct amino acids\n",
		     " Haau: entropy of amino acid usage\n",
		     " mmw: mean molecular weight\n",
		     " gravy: mean hydropathic indices of each amino acid\n",
		     "----------------------------------------------------------------------------\n",
		     "Laa\tndaa\tHaau\tmmw\tgravy\taroma\tacidic\tbasic\tneutral\tgene\n",
		     "----------------------------------------------------------------------------\n");
	    $gb->{legend_aaui} = 1;
	}
	msg_send(
		 $gb->{$id}->{Laa}, "\t",
		 $gb->{$id}->{ndaa}, "\t",
		 $gb->{$id}->{Haau}, "\t",
		 $gb->{$id}->{mmw}, "\t",
		 $gb->{$id}->{gravy}, "\t",
		 $gb->{$id}->{aroma}, "\t",
		 $gb->{$id}->{acidic}, "\t",
		 $gb->{$id}->{basic}, "\t",
		 $gb->{$id}->{neutral}, "\t",
		 $gene, "\n");
    }
    if($output eq 'f'){
	open(FILE,">>$filename");
	unless($gb->{legend_aaui}){
	    print FILE
		"Indices of amino acid usage (excluding formylmethionine)\n",
		" Laa: length in amino acids\n",
		" ndaa: number of distinct amino acids\n",
		" Haau: entropy in amino acid usage\n",
		" mmw: mean molecular weight\n",
		" gravy: mean hydropathic indices of each amino acid\n",
		"Laa,ndaa,Haau,mmw,gravy,aroma,acidic,basic,neutral,polar,gene\n";
	    $gb->{legend_aaui} = 1;
	}
	print FILE
	    $gb->{$id}->{Laa}, ",",
	    $gb->{$id}->{ndaa}, ",",
	    $gb->{$id}->{Haau}, ",",
	    $gb->{$id}->{mmw}, ",",
	    $gb->{$id}->{gravy}, ",",
	    $gb->{$id}->{aroma}, ",",
	    $gb->{$id}->{acidic}, ",",
	    $gb->{$id}->{basic}, ",",
	    $gb->{$id}->{neutral}, ",",
	    $gb->{$id}->{polar}, ",",
	    $gene, "\n";
    }
    close(FILE);
}


=head2 bui

 bui ver.20030401
 Author: Haruo Suzuki (haruo@g-language.org)
 Usage: NULL = &bui(pointer G instance);
 Options:
  -output    output option (default: 'stdout')
  -filename  output filename (default: 'bui.csv')
  -id        ID of a group of genes or a single gene (default:'All')
  -translate 1 when translates using regular codon table (default: 0)
  -del_key   regular expression to delete amino acid and codon keys
             (default:'[^ACDEFGHIKLMNPQRSTVWYacgtU]')
  -position  codon position (default: '');
             '' when assessing overall base usage of the gene
             '1' when assessing base usage at 1st position of codons
             '2' when assessing base usage at 2nd position of codons
             '3' when assessing base usage at 3rd position of codons
 Description:
  Calculates indices of base usage (excluding start and stop codons),
  and inputs in the G instance. e.g. G+C content at 3rd position of
  synonymous codons will be accessible at $gb->{"FEATURE$i"}->{gcc3s}
 Informations stored in the G instance:
  $gb->{$id}->{"acgt$p"}: total number of bases (A+T+G+C)
  $gb->{$id}->{"a_c$p"}: A content [A/(A+T+G+C)]
  $gb->{$id}->{"c_c$p"}: C content [C/(A+T+G+C)]
  $gb->{$id}->{"g_c$p"}: G content [G/(A+T+G+C)]
  $gb->{$id}->{"t_c$p"}: T content [T/(A+T+G+C)]
  $gb->{$id}->{"ryr$p"}: purine/pyrimidine ratio [(A+G)/(T+C)]
  $gb->{$id}->{"gac$p"}: G+A content [(G+A)/(A+T+G+C)]
  $gb->{$id}->{"gcc$p"}: G+C content [(G+C)/(A+T+G+C)]
  $gb->{$id}->{"gtc$p"}: G+T content [(G+T)/(A+T+G+C)]
  $gb->{$id}->{"gcs$p"}: GC skew [(G-C)/(G+C)]
  $gb->{$id}->{"tas$p"}: TA skew [(T-A)/(T+A)]
  $gb->{$id}->{"Hbu$p"}: entropy of base usage
  $gb->{legend_bui}
 Requirements: none.

=cut

sub bui {
    &opt_default(output=>'stdout',filename=>'bui.csv',id=>'All',translate=>0,
		 del_key=>'[^ACDEFGHIKLMNPQRSTVWYacgtU]',position=>'');
    my @args = opt_get(@_);
    my $gb = opt_as_gb(shift @args);
    my $output = opt_val("output");
    my $filename = opt_val("filename");
    my $id = opt_val("id");
    my $translate = opt_val("translate");
    my $del_key = opt_val("del_key");
    my $p = opt_val("position");

    my $seq;
    my $aaseq;
    my $codon;
    my $i;
    
    if($id =~ /FEATURE/){ # for a single gene
	$seq = substr(lc($gb->get_geneseq($id)), $gb->{$id}->{codon_start} - 1 + 3, -3); # excluding start and stop codons	
	if($translate){
	    $i = 0;
	    while($i <= length($seq) - 3){
		my $codon = substr($seq, $i, 3);
		$aaseq .= ($codon =~ /[^acgt]/) ? '?' : $Table{$codon};
		$i += 3;
	    }
	}
	else { $aaseq = substr($gb->{$id}->{translation}, 1); }
	
	my $partseq;
	$i = 0;
	while($aaseq){
	    my $aa = substr($aaseq, 0, 1);
	    $partseq .= $p ? substr($seq, $i * 3 + $p - 1, 1) : substr($seq, $i * 3, 3) if($aa !~ /$del_key/);
	    substr($aaseq, 0, 1) = '';
	    $i ++;
	}
	$seq = $partseq;
    }
    
    else { # for a group of genes
	my $cumseq;
	foreach my $cds ($gb->cds()){
	    next if( $id ne 'All' && !($gb->{$cds}->{$id}) );
	    $seq = substr(lc($gb->get_geneseq($cds)), $gb->{$cds}->{codon_start} - 1 + 3, -3);	    
	    if($translate){
		$i = 0;
		while($i <= length($seq) - 3){
		    my $codon = substr($seq, $i, 3);
		    $aaseq .= ($codon =~ /[^acgt]/) ? '?' : $Table{$codon};
		    $i += 3;
		}
	    }
	    else { $aaseq = substr($gb->{$cds}->{translation}, 1); }
	    
	    my $partseq;
	    $i = 0;
	    while($aaseq){
		my $aa = substr($aaseq, 0, 1);
		$partseq .= $p ? substr($seq, $i * 3 + $p - 1, 1) : substr($seq, $i * 3, 3) if($aa !~ /$del_key/);
		substr($aaseq, 0, 1) = '';
		$i ++;
	    }
	    #print length($cumseq), " ";
	    $cumseq .= $partseq;
	}
	$seq = $cumseq;
    }

    my ($a, $c, $g, $t, $Hbu);
    if($seq){
	$a = $seq =~ tr/a/a/;
	$c = $seq =~ tr/c/c/;
	$g = $seq =~ tr/g/g/;
	$t = $seq =~ tr/t/t/;
	$gb->{$id}->{"acgt$p"} = $a+$c+$g+$t;
	$gb->{$id}->{"a_c$p"} = sprintf "%.4f", $a / ($a+$c+$g+$t);
	$gb->{$id}->{"c_c$p"} = sprintf "%.4f", $c / ($a+$c+$g+$t);
	$gb->{$id}->{"g_c$p"} = sprintf "%.4f", $g / ($a+$c+$g+$t);
	$gb->{$id}->{"t_c$p"} = sprintf "%.4f", $t / ($a+$c+$g+$t);
	
	if($t+$c){ $gb->{$id}->{"ryr$p"} = sprintf "%.4f", ($a+$g) / ($t+$c); }
	else { &msg_error("t + c = 0 in $id\n"); }
	
	if($g+$a){ $gb->{$id}->{"gas$p"} = sprintf "%+.4f", ($g-$a)/($g+$a); }
	else { &msg_error("g + a = 0 in $id\n"); }
	if($c+$t){ $gb->{$id}->{"cts$p"} = sprintf "%+.4f", ($c-$t)/($c+$t); }
	else { &msg_error("c + t = 0 in $id\n"); }
	
	if($g+$c){ $gb->{$id}->{"gcs$p"} = sprintf "%+.4f", ($g-$c)/($g+$c); }
	else { &msg_error("g + c = 0 in $id\n"); }
	if($t+$a){ $gb->{$id}->{"tas$p"} = sprintf "%+.4f", ($t-$a)/($t+$a); }
	else { &msg_error("t + a = 0 in $id\n"); }
	
	if($g+$t){ $gb->{$id}->{"gts$p"} = sprintf "%+.4f", ($g-$t)/($g+$t); }
	else { &msg_error("g + t = 0 in $id\n"); }
	if($a+$c){ $gb->{$id}->{"acs$p"} = sprintf "%+.4f", ($a-$c)/($a+$c); }
	else { &msg_error("a + c = 0 in $id\n"); }
	
	foreach($a,$c,$g,$t){ $Hbu -= $_/($a+$c+$g+$t) * log($_/($a+$c+$g+$t)) / log(2) if($_); }
	$gb->{$id}->{"Hbu$p"} = sprintf "%.4f", $Hbu;	
	
	my $gc = ($g+$c) / ($a+$c+$g+$t);
	my $gt = ($g+$t) / ($a+$c+$g+$t);
	$gb->{$id}->{"gac$p"} = sprintf "%.4f", ($g+$a) / ($a+$c+$g+$t);
	$gb->{$id}->{"gcc$p"} = sprintf "%.4f", $gc;
	$gb->{$id}->{"gtc$p"} = sprintf "%.4f", $gt;

	$gb->{$id}->{"Hgc$p"} = sprintf "%.4f", - ($gc * log($gc) + (1-$gc) * log(1-$gc)) / log(2) if(0 < $gc && $gc < 1);
	$gb->{$id}->{"Hgt$p"} = sprintf "%.4f", - ($gt * log($gt) + (1-$gt) * log(1-$gt)) / log(2) if(0 < $gt && $gt < 1);
    }
    
    my $gene = ($id =~ /FEATURE/) ? $gb->{$id}->{gene} : "{$id}";
    if($output eq 'stdout'){
	unless($gb->{legend_bui}){
	    msg_send("\n\n",
		     "Base usage indices (excluding start and stop codons)\n",
		     " ACGT: A$p + T$p + G$p + C$p\n",
		     " GAc: (G$p + A$p)/(A$p + T$p + G$p + C$p)\n",
		     " GCc: (G$p + C$p)/(A$p + T$p + G$p + C$p)\n",
		     " GTc: (G$p + T$p)/(A$p + T$p + G$p + C$p)\n",
		     " GCs: (G$p - C$p)/(G$p + C$p)\n",
		     " TAs: (T$p - A$p)/(T$p + A$p)\n",
		     " Hbu: entropy of base usage\n",
		     "-------------------------------------------------------------\n",
		     "ACGT\tGAc\tGCc\tGTc\tGCs\tTAs\tHbu\tgene\n",
		     "-------------------------------------------------------------\n");
	    $gb->{legend_bui} = 1;
	}
	msg_send($gb->{$id}->{"acgt$p"}, "\t",
		 $gb->{$id}->{"gac$p"}, "\t",
		 $gb->{$id}->{"gcc$p"}, "\t",
		 $gb->{$id}->{"gtc$p"}, "\t",
		 $gb->{$id}->{"gcs$p"}, "\t",
		 $gb->{$id}->{"tas$p"}, "\t",
		 $gb->{$id}->{"Hbu$p"}, "\t",
		 $gene, "\n");
    }
    if($output eq 'f'){
	open(FILE,">>$filename");
	unless($gb->{legend_bui}){
	    print FILE
		"Base usage indices (excluding start and stop codons)\n",
		" ACGT: A$p + T$p + G$p + C$p\n",
		" GAc: (G$p + A$p)/(A$p + T$p + G$p + C$p)\n",
		" GCc: (G$p + C$p)/(A$p + T$p + G$p + C$p)\n",
		" GTc: (G$p + T$p)/(A$p + T$p + G$p + C$p)\n",
		" GCs: (G$p - C$p)/(G$p + C$p)\n",
		" TAs: (T$p - A$p)/(T$p + A$p)\n",
		" Hbu: entropy of base usage\n",
		"ACGT,GAc,GCc,GTc,GCs,TAs,Hbu,gene\n";
	    $gb->{legend_bui} = 1;
	}
	print FILE
	    $gb->{$id}->{"acgt$p"}, ",",
	    $gb->{$id}->{"gac$p"}, ",",
	    $gb->{$id}->{"gcc$p"}, ",",
	    $gb->{$id}->{"gtc$p"}, ",",
	    $gb->{$id}->{"gcs$p"}, ",",
	    $gb->{$id}->{"tas$p"}, ",",
	    $gb->{$id}->{"Hbu$p"}, ",",
	    $gene, "\n";
	close(FILE);
    }
}



=head2 dinuc

 dinuc ver.20030401
 Author: Haruo Suzuki (haruo@g-language.org)
 Usage: NULL = &dinuc(pointer G instance);
 Options:
  -output    output option (default: 'stdout')
  -filename  output filename (default: 'dinuc.csv')
  -id        ID of a group of genes or a single gene (default:'All')
  -position  codon position or reading frame (default: '');
             '' when assessing all codon positions
             '12' when assessing the reading frame 1-2
             '23' when assessing the reading frame 2-3
             '31' when assessing the reading frame 3-1
 Description:
  Calculates dinucleotide usage (excluding start and stop codons),
  and inputs in the G instance. e.g. CG dinucleotide usage at the
  reading frame 1-2 will be accessible at $gb->{"FEATURE$i"}->{cg12}
  Dinucleotide usage was computed as the ratio of observed (O) to
  expected (E) dinucleotide frequencies: O/E
 Informations stored in the G instance:
  $gb->{legend_dinuc}
 References:
  Yew et al. (2004) Virus Genes 28:1,41-53.
 Requirements: none.

=cut

sub dinuc {
    &opt_default(output=>'stdout',filename=>'dinuc.csv',id=>'All',position=>'');
    my @args = opt_get(@_);
    my $gb = opt_as_gb(shift @args);
    my $output = opt_val("output");
    my $filename = opt_val("filename");
    my $id = opt_val("id");
    my $rf = opt_val("position");
    #if($rf==1){ $rf=12; } elsif($rf==2){ $rf=23; } elsif($rf==3){ $rf=31; }
    my $p = ($rf) ? substr($rf, 0, 1) : 1;
    my $q = ($rf) ? substr($rf, 1, 1) : 1;
    my $end = ($rf =~ /12|23/) ? 0 : 1;
    my %usg;
    my $seq;

    if($rf){
	if($id =~ /FEATURE/){ # for a single gene
	    $seq = substr(lc($gb->get_geneseq($id)), $gb->{$id}->{codon_start} - 1 + 3, -3); # excluding start and stop codons
	    for(my $i; $i * 3 <= length($seq) - 3 - $end; $i ++){		
		$usg{"mono$p"}->{substr($seq, $i * 3 + $p - 1, 1)} ++;
		$usg{"mono$q"}->{substr($seq, $i * 3 + $q - 1, 1)} ++;
		$usg{diobs}->{substr($seq, $i * 3 + $p - 1, 2).$rf} ++;
	    }
	}
	else { # for a group of genes
	    foreach my $cds ($gb->cds()){
		next if( $id ne 'All' && !($gb->{$cds}->{$id}) );
		$seq = substr(lc($gb->get_geneseq($cds)), $gb->{$cds}->{codon_start} - 1 + 3, -3);
		for(my $i; $i * 3 <= length($seq) - 3 - $end; $i ++){
		    $usg{"mono$p"}->{substr($seq, $i * 3 + $p - 1, 1)} ++;
		    $usg{"mono$q"}->{substr($seq, $i * 3 + $q - 1, 1)} ++;
		    $usg{diobs}->{substr($seq, $i * 3 + $p - 1, 2).$rf} ++;
		}
	    }
	}
    }
    else {
	if($id =~ /FEATURE/){ # for a single gene
	    $seq = substr(lc($gb->get_geneseq($id)), $gb->{$id}->{codon_start} - 1 + 3, -3); # excluding start and stop codons	    
	    for(my $i; $i <= length($seq) - 1 - $end; $i ++){		
		$usg{mono}->{substr($seq, $i + $p - 1, 1)} ++;
		$usg{diobs}->{substr($seq, $i + $p - 1, 2)} ++;
	    }
	}
	else { # for a group of genes
	    foreach my $cds ($gb->cds()){
		next if( $id ne 'All' && !($gb->{$cds}->{$id}) );
		$seq = substr(lc($gb->get_geneseq($cds)), $gb->{$cds}->{codon_start} - 1 + 3, -3);
		for(my $i; $i <= length($seq) - 1 - $end; $i ++){
		    $usg{mono}->{substr($seq, $i + $p - 1, 1)} ++;
		    $usg{diobs}->{substr($seq, $i + $p - 1, 2)} ++;
		}
	    }	
	}
    }
    
    my $total;
    foreach my $key (keys %usg){
	foreach (keys %{$usg{$key}}){ $total += $usg{$key}->{$_}; }
	foreach (keys %{$usg{$key}}){ $usg{$key}->{$_} =  $usg{$key}->{$_} / $total; }
	$total = 0;
    }
    
    my @keysAll = ("aa$rf","ac$rf","ag$rf","at$rf","ca$rf","cc$rf","cg$rf","ct$rf","ga$rf","gc$rf","gg$rf","gt$rf","ta$rf","tc$rf","tg$rf","tt$rf");   
    foreach (@keysAll){
	if($rf){ $usg{diexp}->{$_} = $usg{"mono$p"}->{substr($_,0,1)} * $usg{"mono$q"}->{substr($_,1,1)}; }
	else { $usg{diexp}->{$_} = $usg{mono}->{substr($_,0,1)} * $usg{mono}->{substr($_,1,1)}; }
    }
    
    my %dioe;
    foreach (@keysAll){
	if($usg{diexp}->{$_}){ 
	    $dioe{$_} = sprintf("%.3f", $usg{diobs}->{$_} / $usg{diexp}->{$_});
	    $gb->{$id}->{$_} = $dioe{$_}; # input in the G instance
	}
    }
    $gb->{$id}->{"dinuc$rf"} = \%dioe; # input in the G instance
    
    my $gene = ($id =~ /FEATURE/) ? $gb->{$id}->{gene} : "{$id}";
    
    if($output =~ /stdout/){
	unless($gb->{"legend_dinuc$p"}){
	    msg_send("\n\n");
	    foreach(@keysAll){ msg_send("$_\t"); }
	    msg_send("#\tgene");
	    $gb->{"legend_dinuc$p"} = 1;
	}
	msg_send("\n\n");
	foreach(@keysAll){ msg_send("$dioe{$_}\t"); }
	msg_send(scalar keys %dioe, "\t$gene");
    }    
    if($output eq 'f'){
	open(FILE,">>$filename");
	unless($gb->{"legend_dinuc$p"}){
	    print FILE "\nkeys,";
	    foreach(@keysAll){ print FILE "$_,"; }
	    print FILE "gene,";
	    $gb->{"legend_dinuc$p"} = 1;
	}
    	print FILE "\n$id,";
	foreach(@keysAll){
	    print FILE (exists $dioe{$_}) ? "$dioe{$_}," : "0,";
	}
	close(FILE);
    }
}


=head2 codon_pca

codon_pca ver.200503031
 Author: Haruo Suzuki (haruo@g-language.org)
 Usage: NULL = &codon_pca(pointer G instance);
 Options:
  -output    output option (default: 'show')
  -filename  output filename (default: 'pca.csv')
  -translate 1 when translates using regular codon table (default: 0)
  -del_key   regular expression to delete amino acid and codon keys
             (default:'[^ACDEFGHIKLMNPQRSTVWYacgtU]')
  -data      data matrix of amino acid and codon usage (default:'R1')
  -npc       number of principal components (default:'4')
  -parameter pointer ARRAY of list of gene parameters
    ['Laa','Haau','mmw','gravy','aroma','acidic','basic','neutral','polar',
     "Hbu$p","a_c$p","c_c$p","g_c$p","t_c$p","ryr$p",
     "gac$p","gcc$p","gtc$p","gcs$p","tas$p",
     "aa$_","ac$_","ag$_","at$_","ca$_","cc$_","cg$_","ct$_",
     "ga$_","gc$_","gg$_","gt$_","ta$_","tc$_","tg$_","tt$_",
     'Hcu','Dcu','Dsyn','Hs','Hw','Ew','enc','cbi','icdi',
     'fop','cai','pel']
 Description:
  Performs a principal component analysis (PCA) on codon usage and 
  analyzes correlations between the PCA axis scores and a number of
  gene parameters related to amino acid usage and base usage.

 Requirements:
  sub aaui; sub bui; sub dinuc;

=cut

sub codon_pca {
    &opt_default(output=>'show',filename=>'pca.csv',translate=>0,
		 del_key=>'[^ACDEFGHIKLMNPQRSTVWYacgtU]',data=>'R1',npc=>'4',
		 parameter=>['Laa','Hcu','Dcu','Dsyn','Hs','Hw','Ew','enc','cbi','icdi','fop','cai','pel','mmw','gravy','aroma','acidic','basic','neutral','polar','Haau']);

    my @args = opt_get(@_);
    my $gb = opt_as_gb(shift @args);
    my $output = opt_val("output");
    my $filename = opt_val("filename");
    my $translate = opt_val("translate");
    my $del_key = opt_val("del_key");
    my $data = opt_val("data");
    my $npc = opt_val("npc");
    my $parameter = opt_val("parameter");
    my %calc;
    foreach(@$parameter){
	if($_ =~ /mmw|gravy|aroma|acidic|basic|neutral|polar|Haau/){ $calc{aaui} ++; }
	elsif($_ =~ /Hcu|Dcu|Dsyn/){ $calc{entropy_cu} ++; }
	elsif($_ =~ /Hs|Hw|Ew/){ $calc{entropy_scu} ++; }
	elsif($_ =~ /pel/){ $calc{phx} ++; } 
	elsif($_ =~ /enc|cbi|icdi|fop|cai/){ $calc{$_} ++; }
    }

    if($data =~ /AA/){ $data = 'A1'; }
    elsif($data =~ /RF/){ $data = 'R2'; }
    elsif($data =~ /RSCU/){ $data = 'R3'; }

    ########## haruo
    #my @pp = ('','1','2','3');
    my @pp = ('1','2','3');
    
    foreach (@pp){ $parameter = [@$parameter, "gcc$_","gtc$_"]; }
    $parameter = [@$parameter, 'Hgc3','Hgt3'];

    #foreach (@pp){ $parameter = [@$parameter, "a_c$_","c_c$_","g_c$_","t_c$_","ryr$_","gac$_","gcc$_","gtc$_","gas$_","cts$_","gcs$_","tas$_","gts$_","acs$_"]; }
    #foreach my $rf ('','12','23','31'){ $parameter = [@$parameter, "aa$rf","ac$rf","ag$rf","at$rf","ca$rf","cc$rf","cg$rf","ct$rf","ga$rf","gc$rf","gg$rf","gt$rf","ta$rf","tc$rf","tg$rf","tt$rf"]; }
    ########## haruo
    
    &fop($gb, -output=>'n') if($calc{fop});
    &cai($gb, -output=>'n', -w_output=>'n') if($calc{cai});
    &phx($gb, -output=>'n') if($calc{phx});
    
    my $in = 'ribosomal.*protein|elongation.*factor';
    my $ex = 'modification|alanine|serine|transferase|synthase|acetylase|acetylating enzyme|methylase|precursor|regulat|initiates|RNA|DNA|transcription|selenocysteine-specific';
    
    my $inRp = 'ribosomal.*protein';
    my $exRp = 'modification|alanine|serine|transferase|synthase|acetylase|acetylating enzyme|methylase|precursor|regulat|initiates|RNA'; # 'ase|[cl]ation' # synth(et)?ase
    
    my $cds;
    foreach $cds ('All','High','leading','lagging',$gb->cds()){ ########## haruo
	$gb->{$cds}->{High} = (($gb->{$cds}->{product} =~ /$in/i &&
				$gb->{$cds}->{product} !~ /$ex/i ||
				$gb->{$cds}->{note} =~ /$in/i &&
				$gb->{$cds}->{note} !~ /$ex/i
				) ? 1 : 0 );
	
	$gb->{$cds}->{High} = (($gb->{$cds}->{product} =~ /$inRp/i &&
			      $gb->{$cds}->{product} !~ /$exRp/i ||
			      $gb->{$cds}->{note} =~ /$inRp/i &&
			      $gb->{$cds}->{note} !~ /$exRp/i
			      ) ? 1 : 0 ); ######################### thesisD
	
	unless(exists $gb->{$cds}->{$data}){
	    &codon_compiler($gb, -output=>'n', -translate=>$translate, -del_key=>$del_key, -data=>$data, -id=>$cds);
	}
	
	$gb->{$cds}->{Laa} = length($gb->{$cds}->{translation}) - 1;

	&entropy_cu($gb, -output=>'n', -translate=>$translate, -del_key=>$del_key, -id=>$cds) if($calc{entropy_cu});
	&entropy_scu($gb, -output=>'n', -translate=>$translate, -del_key=>$del_key, -id=>$cds) if($calc{entropy_scu});
	$gb->{$cds}->{enc} = &enc($gb, -translate=>$translate, -del_key=>$del_key, -id=>$cds) if($calc{enc});
	$gb->{$cds}->{cbi} = &cbi($gb, -translate=>$translate, -del_key=>$del_key, -id=>$cds) if($calc{cbi});
	$gb->{$cds}->{icdi} = &icdi($gb, -translate=>$translate, -del_key=>$del_key, -id=>$cds) if($calc{icdi});
	&aaui($gb, -output=>'n', -id=>$cds) if($calc{aaui});
	foreach (@pp){ &bui($gb, -output=>'n', -position=>$_, -del_key=>$del_key, -id=>$cds); }
	#foreach my $rf ('','12','23','31'){ &dinuc($gb, -output=>'n', -id=>$cds, -position=>$rf); } ########## haruo
	
	foreach(@$parameter){ $gb->{$cds}->{on} = 0 unless(exists $gb->{$cds}->{$_}); }
	
    }
    $parameter = [@$parameter, 'High'];
    #$parameter = [@$parameter, 'Rp','Ef','High','gene','product'];
        
    open(OUT, ">raw.csv");
    my @keysAll = sort keys %{$gb->{All}->{$data}};
    ########## haruo
    my @k_array;
    foreach(@keysAll){ push(@k_array, $gb->{degeneracy}->{substr($_, 0, 1)}); }
    my $k_join = join(',',@k_array);
    undef $k_join if($data =~ /A[01]/);
    ########## haruo
    my $nkey = scalar @keysAll;
    foreach(@keysAll){ print OUT "$_,"; }
    ########## haruo
    foreach(@$parameter){
	if($_ !~ /High|Ew|Hgc|Hgt/){
	    my $key = $_;
	    $key =~ tr[a-z][A-Z];
	    $key =~ s/LAA/Length/;
	    #print "\n$key";
	    $key =~ s/C1/1/;
	    $key =~ s/C2/2/;
	    $key =~ s/C3/3/;
	    #print "\t$key";
	    #if($key =~ /C3/){ print "\n$key"; $key =~ s/C3/3/g; print "\n$key"; }
	    print OUT "$key,";
	}
	else { print OUT "$_,"; }
    }
    #foreach(@$parameter){ print OUT "$_,"; }

    ########## haruo
    print OUT "\n";
    foreach $cds ('All','High','leading','lagging',$gb->cds()){ ########## haruo
	foreach(@keysAll){ print OUT (exists $gb->{$cds}->{$data}->{$_}) ? "$gb->{$cds}->{$data}->{$_}," : "0,"; }
	foreach(@$parameter){ print OUT "$gb->{$cds}->{$_},"; }
	#foreach(@$parameter){ print OUT (exists $gb->{$cds}->{$_}) ? "$gb->{$cds}->{$_}," : "0,"; }
	print OUT "\n";
    }
    close(OUT);
    
    my $rcmd= new Rcmd;
    my @result = $rcmd->exec(
			     "nkey <- $nkey",
			     "npc <- $npc",
			     "output <- \"$output\"",
			     'dat <- read.csv("raw.csv")',
			     'dat <- dat[,-ncol(dat)]',
			     ########## haruo
			     'cum <- dat[c(1,2,3,4),1:nkey]',
			     'dat <- dat[-c(1,2,3,4),]',
			     ########## haruo
			     'usg <- as.matrix(dat[,1:nkey])',
			     ########## haruo
			     'Ra <- as.dist(1 - cor(t(usg)))',
			     'mnRa <- mean(Ra); sdRa <- sd(Ra)',
			     'Ea <- dist(usg)',
			     'mnEa <- mean(Ea); sdEa <- sd(Ea)',
			     'usg.h <- usg[dat[,"High"] == 1,]',
			     'Rh <- as.dist(1 - cor(t(usg.h)))',
			     'mnRh <- mean(Rh); sdRh <- sd(Rh)',
			     'Eh <- dist(usg.h)',
			     'mnEh <- mean(Eh); sdEh <- sd(Eh)',
			     'usg.o <- usg[dat[,"High"] == 0,]',
			     'Ro <- as.dist(1 - cor(t(usg.o)))',
			     'Eo <- dist(usg.o)',
			     'mnRho <- (sum(Ra) - sum(Rh) - sum(Ro))/(length(Ra) - length(Rh) - length(Ro))',
			     'mnEho <- (sum(Ea) - sum(Eh) - sum(Eo))/(length(Ea) - length(Eh) - length(Eo))',
			     'Ew <- dat[, order(dimnames(dat)[[2]] != "Ew")[1]]',
			     'zzEw <- scale(Ew)',
			     'zEw <- mean(scale(Ew)[dat[,"High"]==1])',

			     'x <- "Hgc3"; y <- "Ew"',
			     #'EwHgc3 <- cor(dat[,x], dat[,y])^2',
			     'EwHgc3 <- cor(rank(dat[,x]), rank(dat[,y]))^2',
			     
			     'x <- "Hgt3"; y <- "Ew"',
			     #'EwHgt3 <- cor(dat[,x], dat[,y])^2',
			     'EwHgt3 <- cor(rank(dat[,x]), rank(dat[,y]))^2',
			     
			     'dat <- dat[, -order(dimnames(dat)[[2]] != "Ew")[1]]',
			     'dat <- dat[, -order(dimnames(dat)[[2]] != "Hgc3")[1]]',
			     'dat <- dat[, -order(dimnames(dat)[[2]] != "Hgt3")[1]]',

			     ########## haruo
			     'num <- dat[,(nkey+1):(ncol(dat)-1)]',
			     "kk <- c($k_join)", ########## haruo
			     # omits columns where SD is zero
			     'omit <- c()',
			     'for(i in 1:ncol(usg)) if(sd(usg[,i]) == 0) omit <- c(omit, i)',
			     'if(length(omit)){ usg <- usg[,-omit]; kk <- kk[-omit] }',
			     # function
			     # normalize = 0 covariance matrix
			     # normalize = 1 correlation matrix
			     'pca <- function(x, normalize = 0, npc = 3)',
			     '{',
			     'x <- as.matrix(x)',
			     'npc <- min(npc, ncol(x))',
			     'if(normalize) x <- scale(x)',
			     'eval <- (result <- eigen(var(x)))$values[1:npc]',
			     'evec <- result$vectors[, 1:npc]',
			     'contr <- eval / sum(result$values)',
			     'score <- x %*% evec',
			     'fl <- cor(x, score)',
			     'rownames(evec) <- rownames(fl) <- colnames(x)',
			     'names(contr) <- colnames(evec) <- colnames(fl) <- colnames(score) <- paste("PC", 1:npc)',
			     'list(contribution = contr, weight = evec, factor.loading = fl, score = score)',
			     '}',
			     # pca
			     'pc <- pca(usg, 0, npc)',
			     'gto <- apply(pc$s,2,var) > max(apply(usg,2,var))', # variance greater than original
			     'ngto <- sum(gto)',
			     'pcz <- apply(pc$s, 2, scale)',
			     'for(y in 1:ngto){ if(cor(Ew, pcz[,y]) < 0) pcz[,y] <- -pcz[,y] }',
			     'zz <- c(); zz <- round(abs(apply(pcz[dat[,"High"]==1,], 2, mean)), digits=4)',
			     #'rr <- cor(num, pcz[,1:npc])', # Pearson r
			     'rr <- cor(apply(num, 2, rank), apply(pcz[,1:npc], 2, rank))', # Spearman r
			     # major trends
			     'if(0) rr <- rbind(pc$f,rr)', ##### kumamushi
			     'key <- dimnames(rr)[[1]][ apply(abs(rr), 2, order)[nrow(rr),] ]',
			     # show plot
			     'if(output == "show"){',
			     'postscript("PCAplot.ps", horizontal=FALSE, width=10, height=10, pointsize=15)',
			     'par(mfrow=c(ceiling(sqrt(ngto)),ceiling(sqrt(ngto))))',
			     'for(y in 1:ngto){',
			     'x <- key[y]',
			     'r <- round(rr[x,y], digits=4)',
			     #'if(r < 0){ pcz[,y] <- -pcz[,y]; r <- -r }',
			     'if(sum(dat[,"High"]) > 1) main <- paste("z =", zz[y], "\nr =",r)',
			     'else main <- paste("r =",r)',
			     'plot(dat[,x],pcz[,y],xlab=x, ylab=paste("Z",y," (",round(pc$c[y]*100,digits=1),"%)"), type="n", main=main)',
			     'text(dat[dat[,"High"]==0,x],pcz[dat[,"High"]==0,y],label="+")',
			     'text(dat[dat[,"High"]==1,x],pcz[dat[,"High"]==1,y],label="o",col="red")',
			     '}',
			     '}',
			     # output file
			     'if(output == "f"){',
			     'out <- rbind(pc$c,pc$f)',
			     #'out <- round(rbind(pc$c,pc$f,rr), digits=4)',
			     'dimnames(out)[[1]][1] <- c("contribution")',
			     'write.table(out, file = "pca.csv", sep = ",", quote = F)',
			     '}',
			     #'write.table(cbind(pc$s,dat), file = "raw.csv", sep = ",", quote = F, row.names = F)',
			     ########## haruo
			     # cum
			     #'cum.z <- round(abs((is.vector(cum %*% pc$w) - apply(pc$s,2,mean)) / apply(pc$s,2,sd)), digits=4)',
			     # efk
			     'efk <- c(); for(k in c(2,4,6)) for(y in 1:npc) efk <- c(efk, cor(pc$s[,y], (usg[,kk == k] %*% pc$w[kk == k,])[,y]))',
			     'efk <- matrix(efk, 3, npc, byrow = T)',
			     # proportion of highly expressed genes at one extreme of the PCA axes
			     'pp <- c(); for(y in 1:npc) pp <- c(pp, max(sum(dat[order(-pc$s[,y]), "High"][1:50]), sum(dat[order(pc$s[,y]), "High"][1:50])))',
			     # output
			     'out <- round(rbind(pc$c,efk^2,rr^2), digits=4)',
			     'dimnames(out)[[1]][1:4] <- c("contr","k2","k4","k6")',
			     'val <- round(apply(rr^2, 2, max), digits=4)',
			     # result
			     'gpd <- key', # gene parameter detected
			     #'gpd[val < 0.65] = "nd"; zc <- 2.33', # dsyn.pl
			     'gpd[val < 0.5] = "nd"; zc <- 2.33', # pca.pl
			     'res <- gfd <- gpd',
			     'gfd[zz > zc & gfd != "nd"] = paste(gfd[zz > zc & gfd != "nd"], "(Expression)")',
			     'gfd[zz > zc & gfd == "nd"] = "Expression"',
			     'res[zz > zc & res != "nd"] = "two"',
			     'res[zz > zc & res == "nd"] = "Expression"',
			     'gpd[!gto] = "ns"; gfd[!gto] = "ns"; res[!gto] = "ns"',
			     #'if(sum(dat[,"High"])>1) out <- rbind(pp,zz,out)',
			     #
			     'usg2 <- usg[zzEw < -1.8, ]',
			     'pcz2 <- pcz[zzEw < -1.8, ]',
			     'usg2 <- usg2[apply(pcz2, 2, order)[1,], ]',
			     'cds <- dimnames(usg2)[[1]]',
			     #
			     'write.table(rbind(cds,gto,gpd,gfd,res,key,val,pp,zz,out), file = "haruo.csv", sep = ",", quote = F)',
			     ########## haruo
			     # return
			     #'as.vector(c(pc$c,pc$s,rr))',
			     'as.vector(c(EwHgc3,EwHgt3,mnRa,sdRa,mnEa,sdEa,mnRh,mnEh,mnRho,mnEho,zEw,
pc$c,pc$s,rr))',
			     );
    shift @result if($result[0] eq '');
    
    $gb->{EwHgc3} = sprintf "%.4f", shift @result;
    $gb->{EwHgt3} = sprintf "%.4f", shift @result;
    $gb->{mnRa} = sprintf "%.4f", shift @result;
    $gb->{sdRa} = sprintf "%.4f", shift @result;
    $gb->{mnEa} = sprintf "%.4f", shift @result;
    $gb->{sdEa} = sprintf "%.4f", shift @result;
    $gb->{mnRh} = sprintf "%.4f", shift @result;
    $gb->{mnEh} = sprintf "%.4f", shift @result;
    $gb->{mnRho} = sprintf "%.4f", shift @result;
    $gb->{mnEho} = sprintf "%.4f", shift @result;
    $gb->{zEw} = sprintf "%.4f", shift @result;

    for(1..$npc){
	$gb->{"PC$_"}->{contribution} = sprintf "%.3f", shift @result;
    }
    for(1..$npc){
	foreach $cds ($gb->cds()){
	    $gb->{$cds}->{"PC$_"} = sprintf "%+.3f", shift @result;
	}
    }
    for(1..$npc){
	foreach my $key (@$parameter){
	    next if($key =~ /High/);
	    $gb->{"PC$_"}->{$key} = sprintf "%+.3f", shift @result;
	}
    }
    
    msg_gimv("PCAplot.ps") if($output =~ /g|show/);
          
    if($output =~ /stdout|show/){
	msg_send("\n\nPrincipal Component Analysis (PCA) of data $data\n");
	msg_send("\nContribution of each principal component (PC)\n\t");
	for(1..$npc){ msg_send("PC$_\t"); }
	msg_send("\n\t");
	for(1..$npc){ msg_send($gb->{"PC$_"}->{contribution}, "\t"); }
	msg_send("\n\nCorrelations between the PC scores and other gene parameters\n\t");
	for(1..$npc){ msg_send("PC$_\t"); }
	foreach my $key (@$parameter){ 
	    next if($key =~ /High/);
	    msg_send("\n$key\t");
	    for(1..$npc){ msg_send($gb->{"PC$_"}->{$key}, "\t"); }
	}
    }
}

# Abbreviations
# A Ala Alanine
# C Cys Cysteine
# D Asp Aspartic acid
# E Glu Glutamic acid
# F Phe Phenylalanine
# G Gly Glycine
# H His Histidine
# I Ile Isoleucine
# K Lys Lysine
# L Leu Leucine 
# M Met Methionine
# N Asn Asparagine
# P Pro Proline
# Q Gln Glutamine
# R Arg Arginine
# S Ser Serine
# T Thr Threonine
#(U Sec Selenocysteine)
# V Val Valine
# W Trp Tryptophan
# Y Tyr Tyrosine
#(X     Others)



#codon_counter ver.20010721-01
#scripting by Koya Mori(mory@g-language.org)
#This program counts codons in CDS.
#options::
#-CDSid    CDS ID number(default:'')
#-filename output filename(default:'codon.csv')
#-option   "f" for file output, "stdout" for STDOUT output(default:'stdout')
#(pointer result)=&codon_counter(pointer Genome,  string key,  boolean debug);
sub codon_counter{
    &opt_default(CDSid=>"",filename=>'codon.csv',output=>'stdout');
    my @args=opt_get(@_);
    my $gb=opt_as_gb(shift @args);
    my $key=opt_val("CDSid");
    my $filename=opt_val("filename");
    my $output=opt_val("output");
    my %result;
    my $seq;
    my $i=1;
    my $q;

    if($key){
	if(defined(%{$gb->{"$key"}})){
	    $seq=$gb->get_geneseq("$key");
	    for($q=0;$q<length($seq);$q+=3){
		$result{substr($seq,$q,3)}++;
	    }
	}
    }
    else{
	foreach($gb->cds()){
	    $seq=$gb->get_geneseq("CDS$i");
	    for($q=0;$q<length($seq);$q+=3){
		$result{substr($seq,$q,3)}++;
	    }  
	    $i++;
	}
    }
    
    if($output eq "f"){
	_codon_amino_printer(\%result,-output=>"f",-filename=>"$filename");
    }
    if($output eq "stdout"){
	_codon_amino_printer(\%result,-output=>"stdout");
    }
    return \%result;
}


#amino_counter ver.20010721-01
#scripting by Koya Mori(mory@g-language.org)
#This program counts amino acid in CDS.
#options::
#-CDSid    CDS ID number(default:'')
#-filename output filename(default:'amino.csv')
#-output   "f" for file output, "stdout" for STDOUT output(default:'stdout')
#(pointer result)=&amino_counter(pointer Genome,  string key,  boolean debug);
sub amino_counter{
    &opt_default(CDSid=>"",filename=>'amino.csv',output=>'stdout');
    my @args=opt_get(@_);
    my $gb= shift @args;
    my $key=opt_val("CDSid");
    my $filename=opt_val("filename");
    my $output=opt_val("output");
    my %amino;
    my @tmp;
    my $num;

    if(length $key){
	if(defined(%{$gb->{"$key"}})){
	    $num=$gb->{"$key"}->{feature};
	    @tmp=split(//,$gb->{"FEATURE$num"}->{translation});
	    foreach(@tmp){
		$amino{$_}++;
	    }
	}
    }
    else{
	foreach my $cds ($gb->cds()){
	    @tmp=split(//,$gb->{$cds}->{translation});
	    foreach(@tmp){
		$amino{$_}++;
	    }
	}
    }
    
    if($output eq "f"){
	_codon_amino_printer(\%amino,-output=>"f",-filename=>"$filename");
    }
    if($output eq "stdout"){
	_codon_amino_printer(\%amino,-output=>"stdout");
    }

    return \%amino;
}


sub codon_amino_counter{
    my $gb=shift;
    my $key=shift;
    my %result;
    my $seq;
    my $amino;
    my $codon;
    my $num;
    my $q;
    my @tmp;
    
    if(defined(%{$gb->{"$key"}})){
	$seq=lc($gb->get_geneseq("$key"));
	$num=$gb->{"$key"}->{feature};
    }
    
    @tmp=split(//,$gb->{"FEATURE$num"}->{translation});
    for($q=0;$q<length($seq);$q+=3){
	$amino=shift @tmp;
	$amino="/" if($amino eq "");
	$codon=substr($seq,$q,3);
	$result{$amino}{$codon}++;
    }
    
    return %result;
}

    
#codon_usage ver.20010721-01
#scripting by Koya Mori(mory@g-language.org)
#This program calculates codon usage in one CDS.
#options::
#-CDSid    CDS ID number(default:'')
#-filename output filename(default:'amino.csv')
#-output   "f" for file output, "stdout" for STDOUT output(default:'stdout')
#(pointer result)=&codon_usage(pointer Genome,  string key,  boolean debug);
sub codon_usage{
    &opt_default(CDSid=>"",filename=>'codon_usage.csv',output=>'show',version=>1);
    my @args=opt_get(@_);

    my $gb=shift @args;
    my $key=opt_val("CDSid");
    my $filename=opt_val("filename");
    my $output=opt_val("output");
    my $version = opt_val("version");
    my $codon;
    my $amino;
    my %result;
    my %usage;
    
    if($key){
	%usage=codon_amino_counter($gb,$key);
    }
    else{
	foreach($gb->cds()){
	    %result=();
	    %result=codon_amino_counter($gb,$_);
	    foreach $amino (keys(%result)){
		foreach $codon (keys(%{$result{$amino}})){
		    $usage{$amino}{$codon}+=$result{$amino}{$codon};
		}
	    }
	}
    }
    
    if($output eq "f"){
	_codon_usage_printer(\%usage,-output=>"f",-filename=>"$filename");
    }
    if($output!~/[fn]/){
	_codon_usage_printer(\%usage,-output=>"stdout");
    }

    if ($version == 2){
	_codon_table2(\%usage,-output=>"$output") if($output=~/(g|show)/ && $key eq "");
    }else{
	_codon_table(\%usage,-output=>"$output") if($output=~/(g|show)/ && $key eq "");
    }

    return \%usage;
}


sub _codon_table{
    &opt_default(output=>"show",filename=>"codon_table.png");
    my @args=opt_get(@_);
    
    my $result=shift @args;
    my $filename=opt_val("filename");
    my $output=opt_val("output");
    my $x;
    my $y;
    my %amino;
    my %data;
    my %per;
    my $amino_total;
    my $codon;
    my $amino;
    my $v;
    my $h;
    my %exception;
    my $CoDoN;
    my %color;

    my $im = new GD::Image(500,550);
    my $white = $im->colorAllocate(255,255,255);
    my $black = $im->colorAllocate(0,0,0);
    my $red = $im->colorAllocate(255,0,0);
    my $yellow = $im->colorAllocate(200,200,0);
    my $green = $im->colorAllocate(0,150,0);
    my $blue = $im->colorAllocate(0,0,255);

    $color{D}=$yellow;
    $color{E}=$yellow;
    $color{R}=$red;
    $color{K}=$red;
    $color{H}=$red;
    $color{N}=$blue;
    $color{Q}=$blue;
    $color{S}=$blue;
    $color{T}=$blue;
    $color{Y}=$blue;
    $color{A}=$green;
    $color{G}=$green;
    $color{V}=$green;
    $color{L}=$green;
    $color{I}=$green;
    $color{P}=$green;
    $color{F}=$green;
    $color{M}=$green;
    $color{W}=$green;
    $color{C}=$green;
    $color{'/'}=$black;

    foreach((10,50,450,490)){
	$x=$_;
	for($y=10;$y<450;$y++){
	    $im->setPixel($x,$y,$black);
	} 
    }
    foreach((150,250,350)){
	$x=$_;
	for($y=30;$y<450;$y++){
	    $im->setPixel($x,$y,$black);
	} 
    }
    $y=30;
    for($x=50;$x<450;$x++){
	$im->setPixel($x,$y,$black);
    }
    foreach((10,50,150,250,350,450)){
	$y=$_;
	for($x=10;$x<490;$x++){
	    $im->setPixel($x,$y,$black);
	} 
    }

    $im->string(gdSmallFont,15,25,"first",$red);
    $im->string(gdSmallFont,233,15,"second",$green);
    $im->string(gdSmallFont,455,25,"third",$blue);
    $im->string(gdSmallFont,30,95,"T",$red);
    $im->string(gdSmallFont,30,195,"C",$red);
    $im->string(gdSmallFont,30,295,"A",$red);
    $im->string(gdSmallFont,30,395,"G",$red);
    $im->string(gdSmallFont,100,35,"T",$green);
    $im->string(gdSmallFont,200,35,"C",$green);
    $im->string(gdSmallFont,300,35,"A",$green);
    $im->string(gdSmallFont,400,35,"G",$green);
    $im->string(gdSmallFont,470,65,"T",$blue);
    $im->string(gdSmallFont,470,85,"C",$blue);
    $im->string(gdSmallFont,470,105,"A",$blue);
    $im->string(gdSmallFont,470,125,"G",$blue);
    $im->string(gdSmallFont,470,165,"T",$blue);
    $im->string(gdSmallFont,470,185,"C",$blue);
    $im->string(gdSmallFont,470,205,"A",$blue);
    $im->string(gdSmallFont,470,225,"G",$blue);
    $im->string(gdSmallFont,470,265,"T",$blue);
    $im->string(gdSmallFont,470,285,"C",$blue);
    $im->string(gdSmallFont,470,305,"A",$blue);
    $im->string(gdSmallFont,470,325,"G",$blue);
    $im->string(gdSmallFont,470,365,"T",$blue);
    $im->string(gdSmallFont,470,385,"C",$blue);
    $im->string(gdSmallFont,470,405,"A",$blue);
    $im->string(gdSmallFont,470,425,"G",$blue);

    foreach $amino (keys(%{$result})){
	$amino_total=0;
	foreach $codon (keys(%{$$result{$amino}})){
	    $amino_total+=$$result{$amino}{$codon};
	}
	foreach $codon (keys(%{$$result{$amino}})){
	    if($$result{$amino}{$codon} > $data{$codon}){
		if($data{$codon}!=""){
		    $exception{$codon}{amino}=$amino{$codon};
		    $exception{$codon}{per}=$per{$codon};
		}
		$data{$codon}=$$result{$amino}{$codon};
		$amino{$codon}=$amino;
		$per{$codon}=sprintf("%.3f",$$result{$amino}{$codon}/$amino_total);
	    }
	    else{
		$exception{$codon}{amino}=$amino;
		$exception{$codon}{per}=sprintf("%.3f",$$result{$amino}{$codon}/$amino_total);
	    }
	}
    }

    $im->string(gdSmallFont,60,65,"TTT  $amino{ttt}  $per{ttt}",$color{$amino{ttt}});
    $im->string(gdSmallFont,60,85,"TTC  $amino{ttc}  $per{ttc}",$color{$amino{ttc}});
    $im->string(gdSmallFont,60,105,"TTA  $amino{tta}  $per{tta}",$color{$amino{tta}});
    $im->string(gdSmallFont,60,125,"TTG  $amino{ttg}  $per{ttg}",$color{$amino{ttg}});
    $im->string(gdSmallFont,60,165,"CTT  $amino{ctt}  $per{ctt}",$color{$amino{ctt}});
    $im->string(gdSmallFont,60,185,"CTC  $amino{ctc}  $per{ctc}",$color{$amino{ctc}});
    $im->string(gdSmallFont,60,205,"CTA  $amino{cta}  $per{cta}",$color{$amino{cta}});
    $im->string(gdSmallFont,60,225,"CTG  $amino{ctg}  $per{ctg}",$color{$amino{ctg}});
    $im->string(gdSmallFont,60,265,"ATT  $amino{att}  $per{att}",$color{$amino{att}});
    $im->string(gdSmallFont,60,285,"ATC  $amino{atc}  $per{atc}",$color{$amino{atc}});
    $im->string(gdSmallFont,60,305,"ATA  $amino{ata}  $per{ata}",$color{$amino{ata}});
    $im->string(gdSmallFont,60,325,"ATG  $amino{atg}  $per{atg}",$color{$amino{atg}});
    $im->string(gdSmallFont,60,365,"GTT  $amino{gtt}  $per{gtt}",$color{$amino{gtt}});
    $im->string(gdSmallFont,60,385,"GTC  $amino{gtc}  $per{gtc}",$color{$amino{gtc}});
    $im->string(gdSmallFont,60,405,"GTA  $amino{gta}  $per{gta}",$color{$amino{gta}});
    $im->string(gdSmallFont,60,425,"GTG  $amino{gtg}  $per{gtg}",$color{$amino{gtg}});

    $im->string(gdSmallFont,160,65,"TCT  $amino{tct}  $per{tct}",$color{$amino{tct}});
    $im->string(gdSmallFont,160,85,"TCC  $amino{tcc}  $per{tcc}",$color{$amino{tcc}});
    $im->string(gdSmallFont,160,105,"TCA  $amino{tca}  $per{tca}",$color{$amino{tca}});
    $im->string(gdSmallFont,160,125,"TCG  $amino{tcg}  $per{tcg}",$color{$amino{tcg}});
    $im->string(gdSmallFont,160,165,"CCT  $amino{cct}  $per{cct}",$color{$amino{cct}});
    $im->string(gdSmallFont,160,185,"CCC  $amino{ccc}  $per{ccc}",$color{$amino{ccc}});
    $im->string(gdSmallFont,160,205,"CCA  $amino{cca}  $per{cca}",$color{$amino{cca}});
    $im->string(gdSmallFont,160,225,"CCG  $amino{ccg}  $per{ccg}",$color{$amino{ccg}});
    $im->string(gdSmallFont,160,265,"ACT  $amino{act}  $per{act}",$color{$amino{act}});
    $im->string(gdSmallFont,160,285,"ACC  $amino{acc}  $per{acc}",$color{$amino{acc}});
    $im->string(gdSmallFont,160,305,"ACA  $amino{aca}  $per{aca}",$color{$amino{aca}});
    $im->string(gdSmallFont,160,325,"ACG  $amino{acg}  $per{acg}",$color{$amino{acg}});
    $im->string(gdSmallFont,160,365,"GCT  $amino{gct}  $per{gct}",$color{$amino{gct}});
    $im->string(gdSmallFont,160,385,"GCC  $amino{gcc}  $per{gcc}",$color{$amino{gcc}});
    $im->string(gdSmallFont,160,405,"GCA  $amino{gca}  $per{gca}",$color{$amino{gca}});
    $im->string(gdSmallFont,160,425,"GCG  $amino{gcg}  $per{gcg}",$color{$amino{gcg}});

    $im->string(gdSmallFont,260,65,"TAT  $amino{tat}  $per{tat}",$color{$amino{tat}});
    $im->string(gdSmallFont,260,85,"TAC  $amino{tac}  $per{tac}",$color{$amino{tac}});
    $im->string(gdSmallFont,260,105,"TAA  $amino{taa}  $per{taa}",$color{$amino{taa}});
    $im->string(gdSmallFont,260,125,"TAG  $amino{tag}  $per{tag}",$color{$amino{tag}});
    $im->string(gdSmallFont,260,165,"CAT  $amino{cat}  $per{cat}",$color{$amino{cat}});
    $im->string(gdSmallFont,260,185,"CAC  $amino{cac}  $per{cac}",$color{$amino{cac}});
    $im->string(gdSmallFont,260,205,"CAA  $amino{caa}  $per{caa}",$color{$amino{caa}});
    $im->string(gdSmallFont,260,225,"CAG  $amino{cag}  $per{cag}",$color{$amino{cag}});
    $im->string(gdSmallFont,260,265,"AAT  $amino{aat}  $per{aat}",$color{$amino{aat}});
    $im->string(gdSmallFont,260,285,"AAC  $amino{aac}  $per{aac}",$color{$amino{aac}});
    $im->string(gdSmallFont,260,305,"AAA  $amino{aaa}  $per{aaa}",$color{$amino{aaa}});
    $im->string(gdSmallFont,260,325,"AAG  $amino{aag}  $per{aag}",$color{$amino{aag}});
    $im->string(gdSmallFont,260,365,"GAT  $amino{gat}  $per{gat}",$color{$amino{gat}});
    $im->string(gdSmallFont,260,385,"GAC  $amino{gac}  $per{gac}",$color{$amino{gac}});
    $im->string(gdSmallFont,260,405,"GAA  $amino{gaa}  $per{gaa}",$color{$amino{gaa}});
    $im->string(gdSmallFont,260,425,"GAG  $amino{gag}  $per{gag}",$color{$amino{gag}});

    $im->string(gdSmallFont,360,65,"TGT  $amino{tgt}  $per{tgt}",$color{$amino{tgt}});
    $im->string(gdSmallFont,360,85,"TGC  $amino{tgc}  $per{tgc}",$color{$amino{tgc}});
    $im->string(gdSmallFont,360,105,"TGA  $amino{tga}  $per{tga}",$color{$amino{tga}});
    $im->string(gdSmallFont,360,125,"TGG  $amino{tgg}  $per{tgg}",$color{$amino{tgg}});
    $im->string(gdSmallFont,360,165,"CGT  $amino{cgt}  $per{cgt}",$color{$amino{cgt}});
    $im->string(gdSmallFont,360,185,"CGC  $amino{cgc}  $per{cgc}",$color{$amino{cgc}});
    $im->string(gdSmallFont,360,205,"CGA  $amino{cga}  $per{cga}",$color{$amino{cga}});
    $im->string(gdSmallFont,360,225,"CGG  $amino{cgg}  $per{cgg}",$color{$amino{cgg}});
    $im->string(gdSmallFont,360,265,"AGT  $amino{agt}  $per{agt}",$color{$amino{agt}});
    $im->string(gdSmallFont,360,285,"AGC  $amino{agc}  $per{agc}",$color{$amino{agc}});
    $im->string(gdSmallFont,360,305,"AGA  $amino{aga}  $per{aga}",$color{$amino{aga}});
    $im->string(gdSmallFont,360,325,"AGG  $amino{agg}  $per{agg}",$color{$amino{agg}});
    $im->string(gdSmallFont,360,365,"GGT  $amino{ggt}  $per{ggt}",$color{$amino{ggt}});
    $im->string(gdSmallFont,360,385,"GGC  $amino{ggc}  $per{ggc}",$color{$amino{ggc}});
    $im->string(gdSmallFont,360,405,"GGA  $amino{gga}  $per{gga}",$color{$amino{gga}});
    $im->string(gdSmallFont,360,425,"GGG  $amino{ggg}  $per{ggg}",$color{$amino{ggg}});

    $im->string(gdSmallFont,15,465,"yellow  minus charge",$yellow);
    $im->string(gdSmallFont,165,465,"red  plus charge",$red);
    $im->string(gdSmallFont,285,465,"blue  noncharge",$blue);
    $im->string(gdSmallFont,400,465,"green  nonpolar",$green);

    $im->string(gdSmallFont,20,485,"exception",$black);
    $v=485;
    $h=100;
    foreach(sort keys(%exception)){
	$color{$exception{$_}{amino}}=$black if($color{$exception{$_}{amino}}=="");	
	$CoDoN=uc $_;
	$im->string(gdSmallFont,$h,$v,"$CoDoN  $exception{$_}{amino}  $exception{$_}{per}",$color{$exception{$_}{amino}});
	$v+=20;
	if($v == 545){
	    $v=485;
	    $h+=100;
	}
    }

    mkdir ("graph",0777);
    open(OUT,'>graph/'."$filename");
    binmode OUT;
    print OUT $im->png;
    close(OUT);
    
    msg_gimv("graph/$filename") if($output eq "show");
}


#_codon_table2 ver.20020902-01
#scripting by Kazuharu Gaou Arakawa(gaou@g-language.org)
#Inspire interface for the codon_usage()

sub _codon_table2{
    &opt_default(output=>"show",filename=>"codon_table.html");
    my @args = opt_get(@_);
    my $result = shift @args;
    my $filename = opt_val("filename");
    my $output = opt_val("output");

    my (%amino, %data, %per, $amino_total, $amino);
    my (%exception, %color);

    foreach my $tmp (qw(D E)){	$color{$tmp} = 'gold' }
    foreach my $tmp (qw(R K H)){  $color{$tmp} = 'red' }
    foreach my $tmp (qw(N Q S T Y)){  $color{$tmp} = 'blue'  }
    foreach my $tmp (qw(A G V L I P F M W C)){  $color{$tmp} = 'green'  }
    $color{'/'} = 'black';

    foreach $amino (keys(%{$result})){
	$amino_total = 0;
	foreach my $codon (keys(%{$$result{$amino}})){
	    $amino_total += $$result{$amino}{$codon};
	}
	foreach my $codon (keys(%{$$result{$amino}})){
	    if ($$result{$amino}{$codon} > $data{$codon}){
		if ($data{$codon} != ''){
		    $exception{$codon}{amino} = $amino{$codon};
		    $exception{$codon}{per} = $per{$codon};
		}
		$data{$codon} = $$result{$amino}{$codon};
		$amino{$codon} = $amino;
		$per{$codon} = sprintf("%.3f", $$result{$amino}{$codon} / $amino_total);
	    }
	    else{
		$exception{$codon}{amino} = $amino;
		$exception{$codon}{per} = sprintf("%.3f",$$result{$amino}{$codon} / $amino_total);
	    }
	}
    }

    mkdir ('graph', 0777);
    open(FILE, '>graph/' . $filename);
    print FILE qq(
		  <HTML>
		  <HEAD><TITLE>Codon Table v.2</TITLE>
		  <style type="text/css">
		  <!-- 
		      FONT{font-size:8pt} 
		  -->
		  </style>
		  </HEAD>
		  <BODY bgcolor="white">
		  <table border=0 cellpadding=0 cellspacing=0><tr><td align=center valign=top>
		  <table border rules=all cellspacing=0 cellpadding=6>
		  <tr><td align=center rowspan=2><font color="red">first</font></td>
		      <td colspan=4 align=center><font color="green">second</font></td>
		      <td align=center rowspan=2><font color="blue">third</font></td>
		  </tr><tr>
		      <td align=center><font color="green">T</font></td>
		      <td align=center><font color="green">C</font></td>
		      <td align=center><font color="green">A</font></td>
		      <td align=center><font color="green">G</font></td>
		  </tr>
		  );

    foreach my $first (qw(T C A G)){
	print FILE qq(<tr><td align=center><font color="red">$first</font></td>);
	foreach my $second (qw(T C A G)){
	    print FILE qq(<td>);
	    foreach my $third (qw(T C A G)){
		my $triplet = $first . $second . $third;
		print FILE qq(<table border=0 cellpadding=4 width="100\%"><tr>);
		print FILE '<td align=center><font color="', $color{$amino{lc($triplet)}};
		print FILE '">', $triplet, '</td>', "\n";
		print FILE '<td align=center><font color="', $color{$amino{lc($triplet)}};
		print FILE '">', $amino{lc($triplet)}, '</td>', "\n";
		print FILE '<td align=center><font color="', $color{$amino{lc($triplet)}};
		print FILE '">', $per{lc($triplet)}, '</td>', "\n";
	    }
	    print FILE qq(</tr></table></td>);
	}
	print FILE qq(
		      <td align=center>
		      <table border=0 cellpadding=4>
		      <tr><td><font color="blue">T</font></td></tr>
		      <tr><td><font color="blue">C</font></td></tr>
		      <tr><td><font color="blue">A</font></td></tr>
		      <tr><td><font color="blue">G</font></td></tr>
		      </table>
		      </td>
		      );
    }
    print FILE qq(
		  </table><BR>
		  <font color="gold" style="font-size:9pt">yellow minus charge</font>
		  \&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;
		  <font color="red" style="font-size:9pt">red plus charge</font>
                  \&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;
		  <font color="blue" style="font-size:9pt">blue noncharge</font>
		  \&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;
		  <font color="green" style="font-size:9pt">green nonpolar</font>
		  \&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;
		  <a href="http://prowl.rockefeller.edu/aainfo/contents.htm"
		  style="font-size:9pt" target="_blank">(learn more here)</a>
		  <BR><BR>
		  <div align=left>exceptions:</div>   
		  <table border=0 cellpadding=10 cellspacing=0><tr>
		  );

    my $i = 0;
    foreach my $triplet (sort keys(%exception)){
	$color{$exception{$triplet}{amino}} = 'black' 
	    if($color{$exception{$triplet}{amino}} eq '');	
	print FILE '<td><font color="', $color{$exception{$triplet}{amino}};
	print FILE '">', uc $triplet, '&nbsp;&nbsp;&nbsp;', 
	$exception{$triplet}{amino}, '&nbsp;&nbsp;&nbsp;';
	print FILE $exception{$triplet}{per}, '</font></td>';
	if ($i == 4){
	    print FILE '</tr><tr>';
	    $i = -1;
	}
	$i ++;
    }
    print FILE qq(
		  </tr></table>
		  <BR>
		  <form method="GET" action="http://www.kazusa.or.jp/codon/cgi-bin/spsearch.cgi">
		  Search <a href="http://www.kazusa.or.jp/codon/">Kazusa Codon Usage Database</a>: 
		  <input size="20" name="species">
		  <input type=hidden name="c" value="i">
		  <input type=submit value="Submit">
		  </form>
		  );

    print FILE "</td></tr></table></body></html>";
    close(FILE);
    
    msg_gimv("graph/$filename") if($output eq "show");
}


#codon_amino_printer ver.20010722-01
#scripting by Koya Mori(mory@g-language.org)
#This program prints result of codon or amino counter.
#&codon_amino_printer(pointer codon_counter,  boolean debug);
sub _codon_amino_printer{
    &opt_default(output=>"stdout");
    my @args=opt_get(@_);
    my $result=shift @args;
    my $printer=opt_val("output");
    my $filename=opt_val("filename");
    my $total;

    
    if($printer eq "f"){
	open(FILE,">>$filename");
	foreach(sort keys(%$result)){
	    print FILE $_,",",$result->{$_},"\n";
	    $total+=$result->{$_};
	}
	print FILE "total,",$total,"\n";
	print FILE "\n\n";
	close(FILE);
    }
    else{
	foreach(sort keys(%$result)){
	    &msg_send($_," -> ",$result->{$_},"\n");
	    $total+=$result->{$_};
	}
	&msg_send("total -> ",$total,"\n");    
    }
}


#codon_usage_printer ver.20010722-01
#scripting by Koya Mori(mory@g-language.org)
#This program prints result of codon usage.
#&codon_usage_printer(pointer codon_usage,  boolean debug);
sub _codon_usage_printer{
    &opt_default(output=>"stdout");
    my @args=opt_get(@_);
    my $result=shift @args;
    my $printer=opt_val("output");
    my $filename=opt_val("filename");
    my $amino_total;
    my $key;
    my $key2;
    my $total;


    if($printer eq "f"){
	open(FILE,">>$filename");
	foreach $key (sort keys(%$result)){
	    $amino_total=0;
	    foreach $key2 (sort keys(%{$$result{$key}})){
		$amino_total+=$$result{$key}{$key2};
	    }
	    foreach $key2 (sort keys(%{$$result{$key}})){
		print FILE $key,",",$key2,",",$$result{$key}{$key2},",",sprintf("%.3f",$$result{$key}{$key2}/$amino_total),"\n";
		$total+=$$result{$key}{$key2};
	    }
	}
	print FILE "total,$total\n";
	print FILE "\n\n";
	close(FILE);
    }
    else{
	foreach $key (sort keys(%$result)){
	    $amino_total=0;
	    foreach $key2 (sort keys(%{$$result{$key}})){
		$amino_total+=$$result{$key}{$key2};
	    }
	    foreach $key2 (sort keys(%{$$result{$key}})){
		&msg_send($key," -> ",$key2," -> ",$$result{$key}{$key2},"   ",sprintf("%.3f",$$result{$key}{$key2}/$amino_total),"\n");
		$total+=$$result{$key}{$key2};
	    }
	}
	&msg_send("total -> $total\n");
    }
}


sub DESTROY {
    my $self = shift;
}

1;
__END__
# Below is the stub of documentation for your module. You better edit it!

=head1 NAME

G::Seq::Codon - Perl extension for blah blah blah

=head1 SYNOPSIS

  use G::Seq::Codon;
  blah blah blah

=head1 DESCRIPTION

Stub documentation for G::Seq::Codon was created by h2xs. It looks like the
author of the extension was negligent enough to leave the stub
unedited.

Blah blah blah.

=head1 AUTHOR

A. U. Thor, a.u.thor@a.galaxy.far.far.away

=head1 SEE ALSO

perl(1).

=cut
