#!/usr/bin/env perl

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# This file is part of G-language Genome Analysis Environment package
#
#     Copyright (C) 2001-2007 Keio University
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# 
#   $Id: Annotation.pm,v 1.1 2002/07/30 17:44:27 gaou Exp $
#
# G-language GAE is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
# 
# G-language GAE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public
# License along with G-language GAE -- see the file COPYING.
# If not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# 
#END_HEADER
#
# written by Kazuharu Arakawa <gaou@sfc.keio.ac.jp> at
# G-language Project, Institute for Advanced Biosciences, Keio University.
#


package G::IO::Annotation;

use strict;
use G::Messenger;
use G::Seq::Primitive;

require Exporter;

our @ISA = qw(Exporter AutoLoader);
our @EXPORT = qw(
	     run_glimmer
	     annotate_with_glimmer
	     annotate_with_LORF
	     );


#::::::::::::::::::::::::::::::
#          Perldoc
#::::::::::::::::::::::::::::::


=head1 NAME

G::IO::Annotation

=head1 SYNOPSIS

 use G::IO::Annotation;
 G::IO::Annotation::annotate_with_glimmer();
 G::IO::Annotation::annotate_with_LORF();
   
=head1 DESCRIPTION

 Annotates the genome sequence. 
 Only serves immature functions only.
 Intended for internal use only.

=head1 AUTHOR

Kazuharu Arakawa, gaou@sfc.keio.ac.jp

=cut



#::::::::::::::::::::::::::::::
#   Let the code begin...
#::::::::::::::::::::::::::::::

sub run_glimmer {
    my $this = shift;
    my $file = shift;

    system("./run-glimmer2 $file");
}

sub annotate_with_glimmer {
    my $this = shift;
    my $file = shift;
    
    open (FASTA, $file);
    while(<FASTA>){
	if (/^\>/){
	    s/\>//;
	    my @hoge = split;
	    $this->{LOCUS}->{id} = $hoge[0];
	    next;
	}else{
	    s/[^a-zA-Z]//g;
	    $this->{SEQ} .= lc($_);
	}
    }
    close(FASTA);

    $this->{COMMENT} = "COMMENT     Annotated with G-language GAE. Arakawa et al. (2003) Bioinformatics\n";
    $this->{"CDS0"}->{dummy} = 1;
    $this->{"FEATURE0"}->{dummy} = 1;

    my $count = 0;
    my $cdscount = 0;
    open (GLIMMER, 'g2.coord') || die();
    while(<GLIMMER>){
	$count ++;
	$cdscount ++;
	my $nextcount = $count + 1;
	my @line = split;

	$this->{"FEATURE$count"}->{type} = 'gene';
	$this->{"FEATURE$count"}->{gene} = sprintf "g%04d", $cdscount;
	$this->{"FEATURE$count"}->{note} = sprintf "locus_tag: g%04d", $cdscount;
	$this->{"FEATURE$count"}->{on} = 1;

	$this->{"CDS$cdscount"}->{feature} = $count;
	$this->{"CDS$cdscount"}->{on} = 1;

	$this->{"FEATURE$nextcount"}->{cds} = $cdscount;
	$this->{"FEATURE$nextcount"}->{type} = 'CDS';
	$this->{"FEATURE$nextcount"}->{gene} = sprintf "g%04d", $cdscount;
	$this->{"FEATURE$nextcount"}->{function} = "orf; Unknown";
	$this->{"FEATURE$nextcount"}->{note} = "predicted by Glimmer 2.2";
	$this->{"FEATURE$nextcount"}->{codon_start} = 1;
	$this->{"FEATURE$nextcount"}->{transl_table} = 11;
	$this->{"FEATURE$nextcount"}->{product} = "orf, hypothetical protein";
	$this->{"FEATURE$nextcount"}->{protein_id} = "N/A";
	$this->{"FEATURE$nextcount"}->{db_xref} = "N/A";
	$this->{"FEATURE$nextcount"}->{on} = 1;

	if ($line[1] > $line[2]){
	    $this->{"CDS$cdscount"}->{start} = $line[2];
	    $this->{"CDS$cdscount"}->{end} = $line[1];
	    $this->{"CDS$cdscount"}->{direction} = "complement";
	    $this->{"FEATURE$count"}->{start} = $line[2];
	    $this->{"FEATURE$count"}->{end} = $line[1];
	    $this->{"FEATURE$count"}->{direction} = "complement";
	    $this->{"FEATURE$nextcount"}->{start} = $line[2];
	    $this->{"FEATURE$nextcount"}->{end} = $line[1];
	    $this->{"FEATURE$nextcount"}->{direction} = "complement";
	}else{
	    $this->{"CDS$cdscount"}->{start} = $line[1];
	    $this->{"CDS$cdscount"}->{end} = $line[2];
	    $this->{"CDS$cdscount"}->{direction} = "direct";
	    $this->{"FEATURE$count"}->{start} = $line[1];
	    $this->{"FEATURE$count"}->{end} = $line[2];
	    $this->{"FEATURE$count"}->{direction} = "direct";
	    $this->{"FEATURE$nextcount"}->{start} = $line[1];
	    $this->{"FEATURE$nextcount"}->{end} = $line[2];
	    $this->{"FEATURE$nextcount"}->{direction} = "direct";
	}

	$this->{"FEATURE$nextcount"}->{translation} = 
	    translate($this->get_geneseq("FEATURE$nextcount"));
	    $count ++;
    }
    close(GLIMMER);
}



sub annotate_with_LORF {
    my $this = shift;
    my $gb = shift;
    my $seq = $gb->{SEQ};
    my ($start,$end,$i);
    my $count = 0;

    $this->{"CDS0"}->{dummy} = 1;
    for my $key (qw/LOCUS HEADER COMMENT FEATURE0 SEQ/){
	$this->{$key} = $gb->{$key};
    }

    $this->{FEATURE0}->{on} = 1 unless(defined $this->{FEATURE0});

    for ($i = 0; $i <= 1; $i ++){
	$seq = complement($gb->{SEQ}) if ($i);
	$start = 0;
	$end = 0;
	
	while(0 <= ($start = index($seq, 'atg', $start + 1))){
	    next if ($start < $end && ($start - $end + 1) % 3 == 0);

	    for my $codon (qw/tag taa tga/){
		my $tmp = $start;
		my $flag = 0;

		while(0 <= ($tmp = index($seq, $codon, $tmp +1))){
		    if (($tmp - $start + 1) % 3 == 0 && $tmp - $start > 49){
			$count ++;
			$end = $tmp;
			$flag = 1;

			if ($i){
			    $this->{"FEATURE$count"}->{start} = length($gb->{SEQ}) - $end + 1;
			    $this->{"FEATURE$count"}->{end} = length($gb->{SEQ}) - $start + 1;
			    $this->{"FEATURE$count"}->{direction} = "complement";
			}else{
			    $this->{"FEATURE$count"}->{start} = $start + 1;
			    $this->{"FEATURE$count"}->{end} = $end + 1;
			    $this->{"FEATURE$count"}->{direction} = "direct";
			}
			
			$this->{"FEATURE$count"}->{type} = "CDS";
			$this->{"FEATURE$count"}->{gene} = "FEATURE$count";
			$this->{"FEATURE$count"}->{feature} = $count;
			$this->{"FEATURE$count"}->{cds} = $count;
			$this->{"FEATURE$count"}->{on} = 1;
			$this->{"CDS$count"} = $this->{"FEATURE$count"};
			
			last;
		    }
		}
		last if ($flag);
	    }
	}
    }
}


1;


