#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# This file is part of G-language Genome Analysis Environment package
#
#     Copyright (C) 2001-2008 Keio University
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# 
#   $Id: ReL8.pm,v 1.2 2002/07/30 17:40:56 gaou Exp $
#
# G-language GAE is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
# 
# G-language GAE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public
# License along with G-language GAE -- see the file COPYING.
# If not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# 
#END_HEADER
#
# written by Tatekimi Matsuzaki <tero@g-language.org> at
# G-language Project, Institute for Advanced Biosciences, Keio University.
#

package G::System::ReL8;

use SubOpt;
use G::Tools::Fasta;
use G::Tools::SIM4;
use G::Tools::Cap3;
use G::Tools::Blast;
use G::Messenger;

use strict("vars");
use base qw(Exporter);
use SelfLoader;

our @EXPORT = qw(
		 rmpolya
		 redundancy
		 redundancy_fasta
		 redundancy_sim4
		 redundancy_cap3
		 cluster
		 cap3_parse
		 blast_parse
		 sim4_parse
		 fasta_parse
		 file_maker  
		 file_maker_fasta
		 Totals
		 output_maker
);

__DATA__

#::::::::::::::::::::::::::::::
#        Methods Start
#::::::::::::::::::::::::::::::

sub file_maker_fasta{
    &opt_default(directory_name=>"ReL8/",sdirectory_name=>'query_fasta_files',split=>'on');

    local $/ = ">";

    my $this = shift @_;
    my $file = shift @_;

    my @args=&opt_get(@_);
    my $directory_name=&opt_val("directory_name");
    my $sdirectory_name=&opt_val("sdirectory_name");
    my $split=&opt_val("split");
    my @filelist=();
    my @idlist;
    my $id_number=0;
    my $id_name='';
    my $switch=0;
    my $i = 0;
    my %seq;
    my $p;


    mkdir($directory_name,0777)if(opendir(DIR,$directory_name) != 1);
    mkdir($directory_name.$sdirectory_name,0777);
    chdir($directory_name.$sdirectory_name);

    open(FILE1,$file);
    open(FILE2,'>master.seq');
    open(FILE3,'>ID_list.txt');

    while(<FILE1>){

	$i++;

	next if($i == 1);

	$id_number++;
	split(/[\n]/,$_,2);

	$id_name = $_[0];
	$id_name =~ s/^\s+//g;

        $this->{$id_number}->{ID} = $id_name;
        $this->{$id_name}->{NUM} = $id_number;

        push(@filelist,$id_name);

	$this->{$id_number}->{SEQ} = $_[1];
	$this->{$id_name}->{SEQ} = $_[1];

        $this->{$id_number}->{SEQ} =~ s/[^a-zA-Z]//g;
        $this->{$id_name}->{SEQ} =~ s/[^a-zA-Z]//g;



	open(FILE,">".$this->{$id_number}->{ID}.'.seq') if($split ne 'off');
	print FILE ">$this->{$id_number}->{ID}\n".uc($this->{$id_number}->{SEQ})."\n";
	print FILE3 $id_number."       ".$this->{$id_number}->{ID}."        ".length($this->{$id_number}->{SEQ})."\n";
	print FILE2 ">$this->{$id_number}->{ID}\n".uc($this->{$id_number}->{SEQ})."\n";

	close(FILE);
	msg_send(".");
    }


    close(FILE1);
    close(FILE2);
    close(FILE3);

    chdir('../');

    return(@filelist);

}



sub file_maker{
    
    &opt_default(directory_name=>"/tmp/ReL8/",sdirectory_name=>time());
    
    my @args=&opt_get(@_);
    my $gb = shift @args;
    my $directory_name=&opt_val("directory_name");
    my $sdirectory_name=&opt_val("sdirectory_name");
    my @filename=();
    my $id_number=0;
    
    mkdir($directory_name,0777)if(opendir(DIR,$directory_name) != 1);
    mkdir($directory_name.$sdirectory_name,0777);
    chdir($directory_name.$sdirectory_name);

    open(FILE2,'>master.seq');
    open(FILE3,'>ID_list.txt');

    do{
	$filename[$id_number]=$gb->{LOCUS}->{id};
	print FILE3 $id_number."       ".$gb->{LOCUS}->{id}."\n";
	open(FILE,'>'.$id_number.'.seq');
	print FILE ">$id_number"._.length($gb->{SEQ})."\n".uc($gb->{SEQ})."\n";
	print FILE2 ">$id_number"._.length($gb->{SEQ})."\n".uc($gb->{SEQ})."\n";
	close(FILE);
	$id_number++;
    }while($gb->next_locus("no msg"));
    $gb->rewind_genome();
    $gb->next_locus("no msg");
    close(FILE2);
    close(FILE3);
    return (@filename);
}

sub Totals{
    &opt_default(style=>"Remove",output=>"STDOUT");
    my @args = &opt_get(@_);
    my $tmp = shift @args;
    my $style = &opt_val("style");
    my %Re_list =%$tmp;
    my @list=();
    my %tmp2=();
    my $best;
    my $best_name;
    my $moin;

    my $hohoho;
    
    $best = 1 if($style eq "Remove");

   
    foreach $moin (keys %Re_list){
	my @tmp2 = @{$Re_list{$moin}};
	$best = 1 if($style eq "Remove");
	$best_name=();
	foreach (@tmp2){
	    if(/(\w+)\_(\d+)/){
		if($style eq "Remove"){
		   if($best < $2){
		       $tmp2{$best_name}++ if($best_name);
		       $best = $2;
		       $best_name = $1;
		   }else{
		       $tmp2{$1}++ if($best_name);
		   }
	       }elsif($style eq "Include"){
		   $tmp2{$1}++ if($best_name);
	       }
	    }
	}
    }
    @list = keys %tmp2;
    foreach $hohoho(@list){
	msg_send($hohoho,"\n");
    }
    return (@list);
}

sub output_maker{
    &opt_default(output=>"STDOUT",file_name=>"default.gbk");
    my @args = &opt_get(@_);
    my $gb = shift @args;
    my $Re_name = shift @args;  
    my $output =  &opt_val("output"); 
    my $file_name = &opt_val("file_name");
    my $flag=0;

    do{
	$flag=0;
	foreach(@$Re_name){ 
	    $flag=1 if($gb->{LOCUS}->{id} eq $_);    
	} 
	$gb->make_gb($file_name, "attach")if($flag==0);
    }while($gb->next_locus("no msg"));
}

sub cluster{
   &opt_default(output=>"STDOUT");
   my @args = &opt_get(@_);
   my $tmp = shift @args;
   my %Re_list =%$tmp;

   my @list = keys %Re_list;
   my $list_name1;
   my $list_name2;
   my $list_name3;
   my $list_name4;

   my @sub_class=();
   my %class=();

   my $m=0;

   foreach $list_name1 (@list){
       open(FILE,'>'.$list_name1.'-class.seq');
       foreach $list_name2 (@{$Re_list{$list_name1}}){
	   push(@sub_class,$list_name2);
	   open(FILE2,$list_name2.'.seq');
	   
	   while(<FILE2>){
	       print FILE $_;
	   }
	   
	   close(FILE2);
	   
	   $m=0;
	   
	   foreach $tmp (@list){
	       $m++;
	       splice(@list,$m,1)if($tmp eq $list_name2);
	   }
       }
       $class{$list_name1}=[@sub_class];
       close(FILE);
   }

   foreach $list_name3 (keys %class){
       foreach $list_name4 (@{$class{$list_name3}}){
	   _sim4($list_name4.".seq",$list_name3.'-class.seq',-output=>$list_name4."-".$list_name3."-class.sim");
       }
   }  
}

sub blast_parse{
    &opt_default(output=>"STDOUT",Identities=>"NULL",db_file=>"master.seq");
    my @args=opt_get(@_);
    my $file_number = shift;
    my $Identities = &opt_val("Identities");
    my $db_file = &opt_val("db_file");
    my $i=0;
    my %Re_list;
    my $flag=0;
    my $tmp = 0;
    my $m=0;
    my %score=();
    my @R=();
    
    while( $i < $file_number){
	$score{bunsi}=0;
	$score{bunbo}=0;
	$flag=0;
	open(FILE,$i.".bla");
	while(<FILE>){
	    if(/^Sequences producing significant alignments\:/){
		$flag=1;
	    }elsif(/^(\d+)\_(\d+) +\d+ +(\S+)/ && $flag == 1){
		push(@R,$1."_".$2);
		msg_send($_);
	    }elsif(/^\>(\S+)/){
		if($score{bunbo}!=0){
		    if($Identities ne "NULL"){
			$m=0;
			if($score{bunsi}/$score{bunbo}*100 < $Identities){
			    foreach $tmp (@R){
				$m++;
				splice(@R,$m,1)if($tmp eq $1);
			    }
			}
		    }
		}
		$score{bunsi}=0;
		$score{bunbo}=0;   
		$flag++;
		$tmp=$1;
	    }elsif(/^ Identities \= (\d+)\/(\d+) +\((\d+)\%\)/){
		$score{bunsi}=$score{bunsi}+$1;
		$score{bunbo}=$score{bunbo}+$2;        
	    }elsif(/^  Database\: +$db_file/ && $flag >1){
		if($score{bunbo}!=0){
		    if($Identities ne "NULL"){
			$m=0;
			if($score{bunsi}/$score{bunbo}*100 < $Identities){
			    foreach $tmp (@R){
				$m++;
				splice(@R,$m,1)if($tmp eq $1);
			    }
			}
		    }
		}
		msg_send($i,"  ",@R,"\n");
		$Re_list{$i}= [@R];
		@R=();
		last;
	    }
	}
	$i++;
    }
    return (%Re_list);   
}

sub fasta_parse{
    &opt_default(output=>"STDOUT",Identities=>"NULL");
    my @args=opt_get(@_);
    my $file_number = shift;
    my $Identities = &opt_val("Identities");
    my $i=0;
    my %Re_list;
    my $flag=0;
    my @R=();
    
    while($i< $file_number){
	$flag=0;
	open(FILE,$i.".fst");
	while(<FILE>){
	    if(/The best scores are\:/){
		$flag=1;
	    }elsif(/(\w+)\_(\d+) +\(\d+\) +\[f\] +\d+ +\d+ +(\S+)/ && $flag == 1){
		push(@R,$1."_".$2);
	    }elsif(/^\>\>\w+/ & $flag == 1){
		$Re_list{$i}= [@R];
		@R=();
		last;
	    }
	}
	$i++;
    }
    return (%Re_list);
}

sub cap3_parse{

}

sub sim4_parse{ 
    &opt_default(value=>"0.99",output=>"STDOUT");
    my $file_number = shift;
    my @args=opt_get(@_);
    my $value = &opt_val("value");
    my $flag;   
    my $seq1_name;
    my $seq2_name;
    my $seq1_leng=1;
    my $seq2_leng;
    my $score=0;
    my @R=();
    my $name=();
    my $i=0;
    my %Re_list=();   
    
    while($i < $file_number){
       	open(FILE,$i.".sim");
	$flag = 1;
	while(<FILE>){
	    if(/^seq1 +\= +(\w+)\.seq\, +(\d+) +bp/){
		if($score > $value && $flag > 1){
		    push(@R,$seq2_name."_".$seq2_leng);
		}
		elsif($score > $value && $flag == 1){
		    push(@R,$seq1_name."_".$seq1_leng);
		}
		$flag=1;
		$seq1_name=$1;
		$seq1_leng=$2;
		$score = 0;
	    }elsif(/^seq2 +\= +\w+\.seq\ +\((\w+)\_\d+\)\, +(\d+) +bp/){    
		$seq2_name=$1;
		$seq2_leng=$2;
	    }elsif(/\(complement\)/){
		$flag=0;
	    }elsif(/^(\d+)\-(\d+) +\(\d+\-\d+\) +(\d+)\%/ && $flag >0){
		if($seq1_leng < $seq2_leng){
		    $score=$score+(($2-$1+1)/100*$3/$seq1_leng);
		}else{
		    $score=$score+(($2-$1+1)/100*$3/$seq2_leng);
		}
		$flag++;
	    } 
	}
	if($score > $value && $flag > 1){
	    push(@R,$seq2_name."_".$seq2_leng);
	}	
	$Re_list{$i} = [@R];
	@R=();
	$i++;
    }
    return(%Re_list);
}

sub redundancy{
    &opt_default(qsub=>"off",analysis_method=>"fasta",directory_name=>"/tmp/ReL8/",sdirectory_name=>time(),value=>"1e-50",output=>"default.gbk",clustering=>"ON",identities=>"NULL");
   
    my @args=&opt_get(@_);
    my $gb = shift @args;  
    my $qsub=opt_val("qsub");
    my $analysis_method = &opt_val("analysis_method");
    my $directory_name = &opt_val("directory_name");
    my $sdirectory_name = &opt_val("sdirectory_name");
    my $output=&opt_val("output");
    my $value=&opt_val("value");
    my $clustering=&opt_val("clustering");
    my $identities=&opt_val("identities");
    my @filename=();
    my $i=0;
    my $change_name;
    my @file_list=();
    my %tmp=();
    my @tmp2=();
    
    
    @filename = file_maker($gb,-sdirectory_name=>$sdirectory_name,-directory_name=>$directory_name);
    
    
    _formatdb("master.seq",-p=>"F",-o=>"T")if( $analysis_method eq "blast");
    while($i < $#filename + 1){
	msg_send(".");
	_blast("master.seq","$i".".seq",-p=>"blastn",-o=>$i.".bla",-e=>$value)if( $analysis_method eq "blast");
	_fasta('-Q','-n',-O=>$i.".fst",-E=>$value,$i.".seq","master.seq")if( $analysis_method eq "fasta");
	_sim4($i.".seq",'master.seq',-output=>$i.".sim")if( $analysis_method eq "sim4");
#	_cap3($i.".seq",'master.seq',-output=>$i.".cap",-qsub=$qsub)if( $analysis_method eq "cap3");
	$i++;
    }
    
    %tmp = fasta_parse($#filename+1)if( $analysis_method eq "fasta");
    
    %tmp = sim4_parse($#filename+1,-value=>$value)if( $analysis_method eq "sim4");
    
    %tmp = blast_parse($#filename+1,-value=>$value,-identities=>"90")if( $analysis_method eq "blast");

    %tmp = cap3_parse($#filename+1,-value=>$value,-qsub=>$qsub)if( $analysis_method eq "cap3");
    @tmp2 = Totals(\%tmp);
    foreach $change_name (@tmp2){ 
	push(@file_list,$filename[$change_name]);
    }
    
    output_maker($gb,\@file_list,-file_name=>$output);
    msg_send("Finish !! output file is ",$output,"\n");
    
}





sub redundancy_fasta{
    &opt_default(directory_name=>"/tmp/ReL8/",sdirectory_name=>time(),value=>"1e-50",output=>"~/asimo/default.gbk",filename=>"redundancy_fasta.lst");
   
    my @args=&opt_get(@_);
    my $gb = shift @args;  
    my $directory_name=&opt_val("directory_name");
    my $sdirectory_name=&opt_val("sdirectory_name");
    my $output=opt_val("output");
    my $filename=opt_val("filename");
    my $value=opt_val("value");
    my @filename=();
    my %tmp=();
    my @tmp2=();

    @filename = file_maker($gb,-sdirectory_name=>$sdirectory_name,-directory_name=>$directory_name);
   
    foreach(@filename){
       _fasta('-Q','-n',-O=>$_.".fst",-E=>$value,$_.".seq","master.seq");
	
    }
    %tmp = fasta_parse(\@filename);
    @tmp2 = Totals(\%tmp);
    output_maker($gb,\@tmp2,-file_name=>$output);
    msg_send("Finish !! output file is ",$output,"\n");
}

sub redundancy_sim4{
    &opt_default(directory_name=>"/tmp/ReL8/",sdirectory_name=>time(),value=>0.98,output=>"~/asimo/default.gbk",filename=>"redundancy_sim4.lst" );
    
    my @args=&opt_get(@_);
    my $gb = shift @args;
    my $directory_name=&opt_val("directory_name");
    my $sdirectory_name=&opt_val("sdirectory_name");
    my $output=opt_val("output");
    my $filename=opt_val("filename");
    my $value=opt_val("value");
    my @filename=();
    my %tmp=();
    my @tmp2=();  
 
    chdir($directory_name.$sdirectory_name);

    @filename = &file_maker($gb,-sdirectory_name=>$sdirectory_name,-directory_name=>$directory_name);

    foreach(@filename){
	msg_send(".");
	_sim4($_.".seq",'master.seq',-output=>$_.".sim");
    }
    %tmp = sim4_parse(\@filename,-value=>$value,);
    @tmp2 = Totals(\%tmp);
    

    &output_maker($gb,\@tmp2,-file_name=>$output);
    msg_send("Finish !! output file is ",$output,"\n");
}

sub redundancy_cap3{
    &opt_default(qsub=>"off",directory_name=>"/tmp/ReL8/",sdirectory_name=>time(),value=>0.98,output=>"~/asimo/default.gbk",filename=>"redundancy_cap3.lst" );
    
    my @args=&opt_get(@_);
    my $gb = shift @args;
    my $qsub=opt_val("qsub");
    my $directory_name=&opt_val("directory_name");
    my $sdirectory_name=&opt_val("sdirectory_name");
    my $output=opt_val("output");
    my $filename=opt_val("filename");
    my $value=opt_val("value");
    my @filename=();
    my %tmp=();
    my @tmp2=();  
 
    chdir($directory_name.$sdirectory_name);

    @filename = &file_maker($gb,-sdirectory_name=>$sdirectory_name,-directory_name=>$directory_name);

    foreach(@filename){
	msg_send(".");
	_cap3($_.".seq",'master.seq',-output=>$_.".cap",-qsub=>$qsub);
    }
    %tmp = cap3_parse(\@filename,-value=>$value,);
    @tmp2 = Totals(\%tmp);
    

    &output_maker($gb,\@tmp2,-file_name=>$output);
    msg_send("Finish !! output file is ",$output,"\n");
}



#remove poly A site ver.1.00.0
#scripting by Keitaro Senda(t99537ks@sfc.keio.ac.jp)
#This program remove poly A site in the end of sepuence.
#options::
#-filename output file name (default:"polyA_removed_files" directory or "INPUTFILE.rmp" file)
#-log_file log file name(default:rmpolya.log)
#-analysis_type  analysis type "-d" all file in directory , "-f" file (default:Auto select)
#-over_write additional write mode "off" additional , "on" overwrite(default:'on')
#-capital "uc" Capitalize sequence , "lc" Not Capitalize(default:'uc')
#-min_length  sleshold of minimam length of poly A detected(default:'5')
#-check_window  basepair width of each check of poly A (default:'4') 
#-check_threshold  threshold of maximum abundance of base A finishing poly A cut(default:'3')
#-poly_nucleotide  poly nucleotide which is removed(default:'A')
#&rmPolya(read dir[or file] , write dir[or file]);

sub rmpolya{

####
#remove poly A tail program 
    
#### variables ####
    my(%opt);
    my(@arg);
    my(@read_seq);
    my($rdir),my(@rfile),my($wdir),my($wfile);
    my($header_flg),my($tmp),my($ch);
    my($i),my($j),my($t),my($end),my($paend),my($point),my($pacheck);
    my($num_entry);
#### error action ####

#### input action ####

    &opt_default(filename => "polyA_removed_files",
		 log_file => "rmpolya.log",
		 #analysis_type => "-d" ,
		 over_write => "on" ,
		 capital => "uc" ,
		 min_length => "5",
		 check_window => "4",
		 check_threshold => "3",
		 poly_nucleotide => "A");
    
    @arg = opt_get(@_);
    @read_seq = ();
    %opt = ();
    
## AUTO SELECT ##
    if(-d $arg[0]){
	$opt{analysis_type} = "-d";
    $opt{filename} = opt_val("filename");
    }elsif(-f $arg[0] || -l $arg[0]){
	$opt{analysis_type} = "-f";
	$opt{filename} = substr($arg[0],rindex($arg[0],"/")+1).".rmp";
    }
## AUTO SELECT END ##
    
    
    $opt{log_file} = opt_val("log_file");
#$opt{analysis_type} = opt_val("analysis_type");
    $opt{over_write} = opt_val("over_write");
    $opt{capital} = opt_val("capital");
    
    $opt{min_length} = opt_val("min_length");
    $opt{check_window} = opt_val("check_window");
    $opt{check_threshold} = opt_val("check_threshold");
    $opt{poly_nucleotide} = opt_val("poly_nucleotide");
    
## OPTION ERROR ##
    if($opt{check_threshold} > $opt{check_window}){
	$opt{check_threshold} = $opt{check_window};
	print "Now changed \"check_threshold\" to $opt{check_window} ,\n";
	print "because the value is too large(larger than check_window).\n\n";
	print "Will you continue ? (input \"yes\" or \"no\"):";
	exit(1) if(<STDIN> =~ /no/i);
	print "OK. This program will continue" if(<STDIN> =~ /yes/i);
	print "Will you continue ? (input \"yes\" or \"no\"):" if(<STDIN> !~ /yes/i);
    }
    if(length($opt{poly_nucleotide}) != 1){
	print "Please input one nucleotide (Option \"poly_nucleotide\")\n";
	exit(1);
    }
    if($opt{poly_nucleotide} !~ /[a-zA-Z]/ ){
	print "Please input alphabet (Option \"poly_nucleotide\")\n";
	exit(1);
    }
## OPTION ERROR END ##
    
## DIRECTORY or FILE PROCESS ##
    if($opt{analysis_type} eq "-d"){ #derectroy anlalysis#
	$rdir=$arg[0];
	$wdir=$opt{filename};
	$rdir .= "/" if($rdir !~ /\/$/);
	$wdir .= "/" if($wdir !~ /\/$/);
	print "$rdir , $wdir\n";
	opendir(R_DIR ,"$rdir") || die($!);
	if(-d $wdir){
	    opendir(W_DIR ,"$wdir") || die($!);
	}
	else{
	    mkdir($wdir , 0755) || die($!);
	    opendir(W_DIR ,"$wdir") || die($!);
	}
	@rfile=readdir(R_DIR);
	
	## remove ./ ../ process ##
	$i=0;
	foreach(@rfile){
	    unless($_ =~ /^\.{1,2}/){
		$rfile[$i] = $_;
		$i++;
	    }
	}
	$j = $#rfile;
	while($i <= $j){
	    pop(@rfile);
	    $i++;
	}
	## remove ./ ../ process END ##
    }
    elsif($opt{analysis_type} eq "-f"){ #file analysis#
	$rfile[0]=$arg[0];
	$wfile=$opt{filename};
    }
    else{
	print "Don't exist READ FILE or READ DIRECTORY\n";
	exit(1);
    }
    
    open(LOGFILE , "> $opt{log_file}") || die($!);
    
## DIRECTORY or FILE PROCESS END ##
    
    foreach(%opt){
	print $_."\n";
	print LOGFILE $_."\n";
    }
    
## ANALYSIAS START ##
    foreach(@rfile){
	print "......";
	if($opt{analysis_type} eq "-d"){
	    $wfile = "$wdir"."$_";
	$_ = "$rdir"."$_";
	}
	print(LOGFILE "------------$_---------------\n");
	print(LOGFILE "------------$wfile---------------\n");
	open(R_FILE ,"$_") || die($!);

	## PROCESS OF OPTION : over write ##
	if($opt{over_write} =~ /on/i){
	    open(W_FILE, "> $wfile") || die($!);
	}
    elsif($opt{over_write} =~ /off/i){
	if(-f $wfile || -l $wfile){
	    rename("$wfile","$wfile~") || die($!);
	}
	open(W_FILE, "> $wfile") || die($!);
    }
	else{
	    print "Please input \"on\" or \"off\" on the option -over_write";
	    exit(1);
	}
    ## PROCESS OF OPTION END ##
	
	$header_flg = 0;
	while(<R_FILE>){
	    chomp;

	    ### Select Process ###
	    if($_ =~ /^>/){
		if($header_flg == 0){
		    $header_flg = 1;
	    }
		else{
		    $header_flg = 3;
		}
		$num_entry++;
	    }
	    elsif($header_flg > 0){
		$header_flg = 2;
	    }
	    ### Select Process END ###
	    
	    #print $header_flg;

	    ### Each Process ###
	    if($header_flg == 0){
		next;
	    }
	    elsif($header_flg == 2){
		#### Function 2 ####
		
		$tmp = $_;
		chomp($tmp);
	    $tmp =~ s/[^a-zA-Z]+//g;
		$tmp =~ tr/a-z/A-Z/ if($opt{capital} eq "uc"); 
		
		## get one nuc ##
		for($t=0; $t < length($tmp);$t++){
		$ch = substr($tmp ,$t ,1);
		
		if($opt{capital} eq "uc"){
		    if($ch =~ /[A-Z]/){
			push(@read_seq,$ch);
		    }
		}elsif($opt{capital} eq "lc"){
		    if($ch =~ /[a-zA-Z]/){
			push(@read_seq,$ch);
		    }
		}else{
		    print "Please input \"uc\" or \"lc\"";
		    exit(1);
		}
	    }
		
	    }
	elsif($header_flg == 3){
	    
	    #### Function 3 ####
	    
	    for($i=$#read_seq ; $i>0 ; $i--){
		$end=0;
		if($read_seq[$i] ne $opt{poly_nucleotide}){
		    $paend=$i;
		    $point=$i;
		    while($end == 0){
			$pacheck=0;
			for($j=1; $j<=$opt{check_window}; $j++){
			    if($read_seq[$point-$j] eq $opt{poly_nucleotide}){
				$pacheck++;
			    }
			}
			if($pacheck < $opt{check_threshold}){$end=2;}
			if($pacheck == $opt{check_threshold}){$point=$point-$opt{check_window}}
			if($pacheck > $opt{check_threshold}){$end=1;}
		    }
		}
		last if($end==2);
	    }
	    
	    ## OUT PUT ACTION ##
	    
	    ## STOP CODON TAA ##
	    #if($read_seq[$paend] eq T){
	    #if($read_seq[$paend+1] eq A){
	    #if($read_seq[$paend+2] eq A){
	    #$paend = $paend+2;
	    #}
	    #}
	    #}
	    ## STOP CODON TAA END ##
	    
	    if($#read_seq - $paend >= $opt{min_length}){
		print(W_FILE "$read_seq[0](rmp$opt{poly_nucleotide})\n");
		for($i=1; $i<=$paend; $i++){
		    print(W_FILE "$read_seq[$i]");
		}
		print(W_FILE "\n");
		
		## LOG OUTPUT ##
		print(LOGFILE "$read_seq[0]\n");
		print(LOGFILE "From ");
		print(LOGFILE $paend+1);
		print(LOGFILE " bp to the end,");
		print(LOGFILE $#read_seq - $paend);
		print(LOGFILE " bp of poly$opt{poly_nucleotide} was removed in total $#read_seq bp.\n");
		## LOG OUTPUT END ##
		
	    }
	    else{
		print(W_FILE "$read_seq[0]\n");
		for($i=1 ; $i <= $#read_seq ; $i++){
		    print(W_FILE "$read_seq[$i]");
		}
		print(W_FILE "\n");
	    }
	    @read_seq = ();	    
	    
	    ## OUT PUT ACTION END ##
	    
	    
	    $header_flg = 1;
	}
	    ### Each Process END ###

	    
	    if($header_flg == 1){    
		## get header ##
	    $read_seq[0] = $_;
	}
	    
    }
	
	#### Function 3 on EOF####
    
	for($i=$#read_seq ; $i>0 ; $i--){
	$end=0;
	if($read_seq[$i] ne $opt{poly_nucleotide}){
	    $paend=$i;
	    $point=$i;
	    while($end == 0){
		$pacheck=0;
		for($j=1; $j<=$opt{check_window}; $j++){
		    if($read_seq[$point-$j] eq $opt{poly_nucleotide}){
			$pacheck++;
		    }
		}
		if($pacheck < $opt{check_threshold}){$end=2;}
		if($pacheck == $opt{check_threshold}){$point=$point-$opt{check_window}}
		if($pacheck > $opt{check_threshold}){$end=1;}
	    }
	}
	last if($end==2);
    }

	## OUT PUT ACTION ##
	
	## STOP CODON TAA ##
	#if($read_seq[$paend] eq T){
	#if($read_seq[$paend+1] eq A){
	#if($read_seq[$paend+2] eq A){
	#$paend = $paend+2;
	#}
	#}
	#}
	## STOP CODON TAA END ##
	
	if($#read_seq - $paend >= $opt{min_length}){
	    print(W_FILE "$read_seq[0](rmp$opt{poly_nucleotide})\n");
	    for($i=1; $i<=$paend; $i++){
		print(W_FILE "$read_seq[$i]");
	    }
	    print(W_FILE "\n");
	    
	    ## LOG OUTPUT ##
	    print(LOGFILE "$read_seq[0]\n");
	    print(LOGFILE "From ");
	    print(LOGFILE $paend+1);
	    print(LOGFILE " bp to the end,");
	    print(LOGFILE $#read_seq - $paend);
	    print(LOGFILE " bp of poly$opt{poly_nucleotide} was removed in total $#read_seq bp.\n");
	    ## LOG OUTPUT END ##
	    
	}
	else{
	    print(W_FILE "$read_seq[0]\n");
	    for($i=1 ; $i <= $#read_seq ; $i++){
		print(W_FILE "$read_seq[$i]");
	    }
	    print(W_FILE "\n");
	}
	@read_seq = ();	    
	
    ## OUT PUT ACTION END ##
	
	#### finish action ####
	print(LOGFILE "\nTotal $num_entry cDNA entry in this file\n\n");
	
	close R_FILE;
	close W_FILE;
    }
    
    closedir R_DIR;
    closedir W_DIR;
    
}

1;

















