#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# This file is part of G-language Genome Analysis Environment package
#
#     Copyright (C) 2001-2011 Keio University
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# 
# G-language GAE is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
# 
# G-language GAE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public
# License along with G-language GAE -- see the file COPYING.
# If not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# 
#END_HEADER
#
# written by Kazuharu Arakawa <gaou@sfc.keio.ac.jp> at
# G-language Project, Institute for Advanced Biosciences, Keio University.
#

package G::Shell::EUtils;

use strict;
use base qw(Exporter);
use XML::Simple;

use G::Shell::Help;

our @EXPORT = qw(
		 pubmed
		 entrez
		 );

#:::::::::::::::::::::::::::::::::
#       Perldoc
#:::::::::::::::::::::::::::::::::

=head1 NAME

  G::Shell::EUtils - G-language Shell helper module for searching NCBI.

=head1 DESCRIPTION

  This class is a part of G-language Genome Analysis Environment, 
  providing functions for searching NCBI with EUtilities.

=cut

#::::::::::::::::::::::::::::::
#          Variables
#::::::::::::::::::::::::::::::

my $esearch  = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?';
my $esummary = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?';
my %einfo;

map {$einfo{$_} = 1} qw(
			pubmed protein nucleotide nuccore nucgss nucest 
			structure genome books cancerchromosomes cdd gap domains gene genomeprj 
			gensat geo gds homologene journals mesh ncbisearch nlmcatalog omia omim 
			pmc popset probe proteinclusters pcassay pccompound pcsubstance snp 
			taxonomy toolkit unigene unists
			);

#::::::::::::::::::::::::::::::
#    Let the code begin...
#::::::::::::::::::::::::::::::

=head2 pubmed

  Name: pubmed   -   search PubMed in G-language Shell

  Description:
    Search PubMed with keyword through EUtilities. This is 
    intended for quick lookup through G-language Shell, so 
    only top ten hits are reported.

  Usage:
    pubmed <keyword>

        eg. pubmed arakawa k[au] keio[ad] g-language

 Options:
   None.

  Author: 
    Kazuharu Arakawa (gaou@sfc.keio.ac.jp)

  History:
   20070601-01 initial posting

=cut

sub pubmed {
    my $keyword = shift;

    unless($keyword){
	help('pubmed');
	return;
    }

    entrez('pubmed', $keyword);
}



=head2 entrez

  Name: entrez   -   search NCBI Entrez in G-language Shell

  Description:
    Search NCBI Entrez with keyword through EUtilities. 
    This is intended for quick lookup through G-language Shell, 
    so only top ten hits are reported.

  Usage:
    entrez <database> <keyword>

    <database> is the name of NCBI database to search with the
    <keyword>. Examples are: pubmed, protein, nucleotide, genome

  Example:
    G >entrez genome carsonella rudii PV
       2 entries found in GENOME: (Showing up to 10 hits)

       1. Accession Number:   NC_008512
          Candidatus Carsonella ruddii PV, complete genome

       2. Accession Number:   NC_002952
          Staphylococcus aureus subsp. aureus MRSA252, complete genome

    G >$gb = new G("NC_008512");
    Retrieving sequence from REFSEQ...


    Accession Number: NC_008512

      Length of Sequence :    159662
               A Content :     66734 (41.80%)
               T Content :     66481 (41.64%)
               G Content :     12946 (8.11%)
               C Content :     13501 (8.46%)
                  Others :         0 (0.00%)
              AT Content :    83.44%
              GC Content :    16.56%


 Options:
   None.

  Author: 
    Kazuharu Arakawa (gaou@sfc.keio.ac.jp)

  History:
   20070601-01 initial posting

=cut

sub entrez {
    require LWP::UserAgent;

    my $db = shift;
    my $keyword = shift;

    unless(length($db) || $keyword){
	help('entrez');
	return;
    }

    unless($einfo{$db}){
	print STDERR "  Database \"$db\" is not available.\n";
	return;
    }

    my $ua = new LWP::UserAgent;

    my %params = (
		  email   => 'gaou@sfc.keio.ac.jp',
		  tool    => 'g-language.org',
		  db      => $db,
		  retmax  => 10,
		  retmode => 'xml',
		  term    => $keyword,
		  );

    my $url = $esearch . join('&', map {"$_=$params{$_}"} keys %params);

    my $request = new HTTP::Request('GET', $url);
    my $response = $ua->request($request);

    if ($response->is_success) {
	my $xml = XMLin($response->content);

	my $entryText = 'entries';
	$entryText = "entry" if ($xml->{Count} == 1);

	printf "    %d %s found in %s: (Showing up to 10 hits)\n\n", $xml->{Count}, $entryText, uc($db);

	my $id;
	if(ref($xml->{IdList}->{Id}) eq 'ARRAY'){
	    $id = join(',', @{$xml->{IdList}->{Id}});
	}else{
	    $id = $xml->{IdList}->{Id};
	}

	my %params = (
		      email   => 'gaou@sfc.keio.ac.jp',
		      tool    => 'g-language.org',
		      db      => $db,
		      retmax  => 10,
		      retmode => 'xml',
		      id      => $id,
		      );

	my $url = $esummary . join('&', map {"$_=$params{$_}"} keys %params);

	my $request = new HTTP::Request('GET', $url);
	my $response = $ua->request($request);

	if ($response->is_success) {
	    my $xml = XMLin($response->content);

	    my $i = 1;

	    if(ref($xml->{DocSum}) eq 'HASH'){
		my $entry = $xml->{DocSum};
		my %info;

		foreach my $item (@{$entry->{Item}}){
		    if($item->{Name} eq 'AuthorList'){
			if(ref($item->{Item}) eq 'HASH'){
			    $info{FirstAuthor} = $item->{Item}->{content};
			}else{
			    $info{FirstAuthor} = $item->{Item}->[0]->{content};
			}
		    }else{
			$info{$item->{Name}} = $item->{content};
		    }
		}
		
		if($db eq 'pubmed'){
		    printf "    %2d. PMID:   %s\n", $i, $entry->{Id};
		    printf "        %s et al. \"%s\"\n        %s  %s\n\n", $info{FirstAuthor}, $info{Title}, $info{Source}, $info{SO};
		}else{
		    printf "    %2d. Accession Number:   %s\n", $i, $info{Caption};
		    printf "        %s\n\n", $info{Title};
		}
	    }else{
		foreach my $entry (@{$xml->{DocSum}}){
		    my %info;
		    
		    foreach my $item (@{$entry->{Item}}){
			if($item->{Name} eq 'AuthorList'){
			    if(ref($item->{Item}) eq 'HASH'){
				$info{FirstAuthor} = $item->{Item}->{content};
			    }else{
				$info{FirstAuthor} = $item->{Item}->[0]->{content};
			    }
			}else{
			    $info{$item->{Name}} = $item->{content};
			}
		    }
		    
		    if($db eq 'pubmed'){
			printf "    %2d. PMID:   %s\n", $i, $entry->{Id};
			printf "        %s et al. \"%s\"\n        %s  %s\n\n", $info{FirstAuthor}, $info{Title}, $info{Source}, $info{SO};
		    }else{
			printf "    %2d. Accession Number:   %s\n", $i, $info{Caption};
			printf "        %s\n\n", $info{Title};
		    }
		    
		    $i ++;
		}
	    }
	} else {
	    print $response->error_as_HTML;
	}

    } else {
	print $response->error_as_HTML;
    }
}

1;
