#!/usr/bin/env perl

#
# rss2imap - an IMAP-based RSS aggreagor
#
# Copyright (C) 2004 Taku Kudo <taku@chasen.org>
#               2005 Yoshinari Takaoka <mumumu@mumumu.org>
#     All rights reserved.
#     This is free software with ABSOLUTELY NO WARRANTY.
#
# You can redistribute it and/or modify it under the terms of the
# GPL2, GNU General Public License version 2.
#
# $Id: Rss2imap.pm,v 1.3 2005/10/02 11:55:42 mumumu-org Exp $
#

require 5.008_000;
use strict;
use Mail::IMAPClient;
use XML::RSS;
use HTTP::Date;
use RSS2IMAPLIB::Unicode;
use RSS2IMAPLIB::Config;
use RSS2IMAPLIB::Common;
use Data::Dumper;

package RSS2IMAPLIB::Rss2imap;
{

    our $VERSION = undef;

    #    rss2imap config init.
    our $config_obj = undef;
    our $GLOABL_CONFIG = undef;
    our $SITE_CONFIG   = undef;


    sub new {

        my $this = shift;
        $config_obj = shift;

        $GLOABL_CONFIG = $config_obj->get_global_configall();
        $SITE_CONFIG   = $config_obj->get_site_configall();
        $VERSION       = $config_obj->get_version();

        bless $GLOABL_CONFIG, $this;
    }


    sub connect {

        my $this = shift;
        my $ssl_sock = undef;

        if ( $this->{'use-ssl'} ) {

            eval 'use IO::Socket::SSL';
            if( $this->is_error() ) {
                print "you specify use SSL but dont install IO::Socket::SSL.\n";
                print "please install it via cpan.\n";
                exit();
            }

            $ssl_sock = IO::Socket::SSL->new("$this->{host}:$this->{port}")
            or die "could not connect to the imap server over ssl.";
        }

        my $imap = Mail::IMAPClient->new(
                        Socket   => ( $ssl_sock ? $ssl_sock : undef ),
                        Server   => $this->{host},
                        User     => $this->{user},
                        Port     => $this->{port},
                        Password => $this->{password},
                        Peek      => 1,
                        Authmechanism => ($this->{'cram-md5'} ? "CRAM-MD5" : undef));

        die "imap client initialize failed. maybe you dont specify proper option...\n" if(!$imap);

        if ($this->{debug}) {
            $imap->Debug(1);
            $imap->Debug_fh();
        }

        if ( $this->{'use-ssl'} ) {
            $imap->State(1);    #connected
            $imap->login();     #if ssl enabled, login required because it is bypassed.
        }

        #   authentication failure. sorry.
        if( !$imap->IsAuthenticated() ) {
            print "Authentication failure, sorry.\n";
            print "connected to : $this->{host}:$this->{port}\n";
            exit();
        }

        $this->{imap} = $imap;
        die "$@ $this->{user}\@$this->{host}\n" unless ($imap);

    }


    sub connect_test {
        my $this = shift;
        $this->connect ();
        $this->{imap}->close ();
    }


    sub run {

        my $this = shift;
        my $site_config_list = $config_obj->parse_url_list( @{$this->{list}} );
        $this->connect ();

        for my $site_config (@{$site_config_list}) {

            $this->{site_config} = $site_config;

            for my $url (@{$site_config->{url}}) {
                my $rss = $this->get_rss ($url);
                next unless ($rss);
                $this->send   ($rss);
                $this->expire ($rss);
            }
        }

        $this->{imap}->close ();
    }


    sub get_rss {

        my $this    = shift;
        my $link    = shift;
        my $imap    = $this->{imap};
        my $folder  = $this->get_real_folder_name ($this->{'last-modified-folder'});
        my $headers = {};

        #    start site processing....
        print "processing $link ....\n";

        $this->select ($folder);
        my $message_id = sprintf ('%s@%s', $link, $this->{host});
        my @search = $imap->search ("UNDELETED HEADER message-id \"$message_id "
                                  . "\" HEADER x-rss-aggregator \"rss2imap-checker\"");

        if ($this->is_error()) {
            warn "WARNNING: $@\n";
        }

        if (my $latest = $this->get_latest_date (\@search))  {
            $headers = { 'If-Modified-Since' => HTTP::Date::time2str ($latest) };
        }

        my $common = RSS2IMAPLIB::Common->new();
        my @rss_and_response = $common->getrss_and_response( $link, $headers );
        return unless (defined @rss_and_response);

        $imap->delete_message (@search); # delete other messages;

        my $content = $rss_and_response[0];
        my $response = $rss_and_response[1];

        my $rss = new XML::RSS;
        eval { $rss->parse($content); };
        if ($this->is_error()) {
            warn "\nWARNNING: $@ folder of this contents will not be created\n"
               . "$link\n\n";
            return undef;
        }

        print "modified $link\n";

        # copy session information
        $rss->{'rss2imap:last-modified'} = HTTP::Date::time2str ($response->last_modified);
        $rss->{'rss2imap:message-id'}    = $message_id;
        $rss->{'rss2imap:rss-link'}      = $link;

        return $rss;
    }


    sub send {

        my $this = shift;
        my $rss  = shift;
        my $imap = $this->{imap};

        my @items;
        my $type = $this->{site_config}->{type};
        if ($type eq "channel") {
            @items = ($rss->{channel}); # assume that item == rss->channel
        } elsif ($type eq "items") {
            @items = @{$rss->{items}};
        } else {
            warn "WARNNING: unknown type [$type]\n";
            return;
        }

        my ($folder) = $this->apply_template ($rss, undef, 1, $this->{site_config}->{folder});
        $folder = $this->get_real_folder_name ($folder);
        $this->select ($folder);

        my @append_items;
        my @delete_mail;

        for my $item (@items) {

            my $message_id  = $this->gen_message_id ($rss, $item);

            if ($this->{site_config}->{expire} > 0) {
                my $rss_date = $this->get_date ($rss, $item);
                next if (time() - HTTP::Date::str2time ($rss_date) > $this->{site_config}->{expire} * 60 * 60 * 24);
            }

            my @search = $imap->search ("NOT DELETED HEADER message-id \"$message_id\" HEADER x-rss-aggregator \"rss2imap\"");

            if ($this->is_error()) {
                warn "WARNNING: $@\n";
                next;
            }

            if (@search == 0) {
                print " appending $message_id\n";
                push @append_items, $item;
            } else {
                my $rss_date = $this->get_rss_date ($rss, $item);
                next unless ($rss_date); # date filed is not found, we ignore it.
                my $latest = $this->get_latest_date (\@search);
                if (HTTP::Date::str2time ($rss_date) > $latest) {
                    print " updateing $message_id\n";
                    push @delete_mail, @search;
                    push @append_items, $item;
                } else {
                    print " skip $message_id\n";
                }
            }
        }

        # delete items
        if ($this->{site_config}->{'sync'}) {
            my %found = ();
            for my $item (@items) {
                $found{$item->{link}} = 1;
            }

            my $link = $rss->{'rss2imap:rss-link'};
            my @search = $imap->search ("HEADER x-rss-link \"$link\" HEADER x-rss-aggregator \"rss2imap\"");

            for my $msg (@search) {
                my $link2 = $imap->get_header ($msg, "x-rss-item-link");
                $link2 =~ s/^\s*//g; $link2 =~ s/\s*$//g; # must trim spaces, bug of IMAP server?
                unless ($found{$link2}) {
                    print "  deleting $link2\n";
                    push @delete_mail, $msg;
                }
            }
        }

        # update all message
        $imap->delete_message (@delete_mail);
        for my $item (@append_items) {
            $this->send_item ($folder, $rss, $item);
        }

        $this->send_last_update ($rss);

        return;
    }


    sub expire {

        my $this   = shift;
        my $rss    = shift;
        my $expire = $this->{site_config}->{expire} || -1;
        my $imap = $this->{imap};

        return if ($expire <= 0);

        my ($folder, $expire_folder) = $this->apply_template ($rss, undef, 1,
                                       $this->{site_config}->{folder},
                                       $this->{site_config}->{'expire-folder'});
        $folder        = $this->get_real_folder_name ($folder);
        $expire_folder = $this->get_real_folder_name ($expire_folder); 
        my $key  =  Mail::IMAPClient->Rfc2060_date (time() - $expire * 60 * 60 * 24);

        my $query = (defined $this->{site_config}->{'expire-unseen'}) ? "SENTBEFORE $key" : "SEEN SENTBEFORE $key";
        $query .= " HEADER x-rss-aggregator \"rss2imap\"";

        $this->select ($folder);
        my @search = $imap->search ($query);

        if ($this->is_error()) {
            warn "WARNNING: $@\n";
            return;
        }

        return if (@search == 0);

        if ($expire_folder) {
            $this->create_folder ($expire_folder);
            for my $msg (@search) {
            print "  moving: $msg -> $expire_folder\n";
            $imap->move ($expire_folder, $msg);
            }
        } else {
            print "  deleting: [@search]\n";
            $imap->delete_message (@search);
        }
    }


    sub get_latest_date {

        my $this = shift;
        my $list = shift;
        my $header = shift || 'date';
        my $imap = $this->{imap};

        my $latest = -1;
        for my $msg (@{$list}) {
            my $date = $imap->get_header ($msg, $header);
            next unless ($date);
            $date = HTTP::Date::str2time ($date);
            $latest = $date if ($date > $latest);
        }

        return ($latest == -1) ? undef : $latest;
    }


    sub send_last_update {

        my $this = shift;
        my $rss = shift;

        my $message_id = $rss->{'rss2imap:message-id'};
        my $date       = $rss->{'rss2imap:last-modified'};
        my $link       = $rss->{'rss2imap:rss-link'};
        my $a_date     = scalar (localtime ());

        my $body =<<"BODY"
From: $SITE_CONFIG->{'from'}
Subject: $link
MIME-Version: 1.0
Content-Type: text/plain;
Content-Transfer-Encoding: 7bit
Content-Base: $link
Message-Id: $message_id
Date: $date
User-Agent: rss2imap version $VERSION
X-RSS-Link: $link
X-RSS-Aggregator: rss2imap-checker
X-RSS-Aggregate-Date: $a_date;
X-RSS-Last-Modified: $date

Link: $link
Last-Modified: $date
Aggregate-Date: $a_date
BODY
;
        my $folder = $this->get_real_folder_name ($this->{'last-modified-folder'});
        $this->{imap}->append_string ($folder, $body);
    }


    sub send_item {

        my $this   = shift;
        my $folder = shift;
        my $rss    = shift;
        my $item   = shift;

        my $headers   = $this->get_headers( $rss, $item );

        my $body = ($this->{'delivery-mode'} eq 'text') ?
                    $this->get_text_body( $rss, $item ) :
                    $this->get_html_body( $rss, $item );

        my $message = ($headers . $body);
        utf8::encode ( $message );
        $this->{imap}->append_string ($folder, $message);
    }


    sub get_headers {

        my $this   = shift;
        my $rss    = shift;
        my $item   = shift;

        my $date       = $this->get_date ($rss, $item);
        my $rss_date   = $this->get_rss_date ($rss, $item) || "undef";

        my $subject    = $this->{site_config}->{subject};
        my $from       = $this->{site_config}->{from};
        my $to         = $this->{site_config}->{to};
        my $message_id = $this->gen_message_id ($rss, $item);
        ($subject, $from) = $this->apply_template ($rss, $item, undef, $subject, $from);

        my $m_from    = RSS2IMAPLIB::Unicode::to_mime ($from);
        my $m_subject = RSS2IMAPLIB::Unicode::to_mime ($subject);
        my $m_to      = RSS2IMAPLIB::Unicode::to_mime ($to);
        my $a_date    = scalar(localtime ());
        my $l_date    = $rss->{'rss2imap:last-modified'} || $a_date;
        my $link      = $rss->{'rss2imap:rss-link'} || "undef";

        my $mime_type = ($this->{'delivery-mode'} eq 'text') ?
                        'text/plain' : 'text/html';

        my $return_headers =<<"BODY"
From: $m_from
Subject: $m_subject
To: $m_to
MIME-Version: 1.0
Content-Type: $mime_type; charset=UTF-8
Content-Transfer-Encoding: 8bit
Content-Base: $item->{link}
Message-Id: $message_id
Date: $date
User-Agent: rss2imap version $VERSION
X-RSS-Link: $link
X-RSS-Channel-Link: $rss->{channel}->{link}
X-RSS-Item-Link: $item->{link}
X-RSS-Aggregator: rss2imap
X-RSS-Aggregate-Date: $a_date
X-RSS-Last-Modified: $l_date;

BODY
;

        return $return_headers;
    }


    sub get_text_body {

        my $this = shift;
        my $rss  = shift;
        my $item = shift;

        my $subject    = $this->{site_config}->{subject};
        my $from       = $this->{site_config}->{from};
        my $desc       = $item->{description} || "";
        ($subject, $from) = $this->apply_template ($rss, $item, undef, $subject, $from);

        #  convert html tag to appropriate text.
        $subject = $this->rss_txt_convert( $subject );
        $desc    = $this->rss_txt_convert( $desc );
        $desc = "[ Description ] : $desc" if ($desc ne "");
        $subject =~ s/\n//g;
        $item->{link} =~ s/\n//g;

        my $return_text_body = <<"TEXT_BODY"
[ Title ] : $subject

$desc

[ Complete Story ] : $item->{link}
TEXT_BODY
;
        return $return_text_body;

    }


    sub get_html_body {

        my $this = shift;
        my $rss  = shift;
        my $item = shift;

        my $subject    = $this->{site_config}->{subject};
        my $from       = $this->{site_config}->{from};
        my $desc       = $item->{description} || "";
        ($subject, $from) = $this->apply_template ($rss, $item, undef, $subject, $from);

        my $return_body =<<"BODY"
<html>
<head>
<title>$subject</title>
<style type="text/css">
body {
      margin: 0;
      border: none;
      padding: 0;
}
iframe {
  position: fixed;
  top: 0;
  right: 0;
  bottom: 0;
  left: 0;
  border: none;
}
</style>
</head>
<body>
<iframe  width="100%" height="100%" src="$item->{link}">
$desc
</iframe>
</body>
</html>
BODY
;
        return $return_body;
    }


    sub rss_txt_convert {

        my $this   = shift;
        my $string = shift;

        return "" if ( !$string );

        $string =~ s/<br\s*.>/\n/ig;
        $string =~ s/<a .*?>([^<>]*)<\/a>/$1/ig;
        $string =~ s/<hr\s*[\/]*>/\n--------------------------\n/ig;
        $string =~ s/<[\/]*b\s*[^<>]*?>//ig;
        $string =~ s/<[\/]*font\s*[^<>]*?>/ /ig;
        $string =~ s/<[\/]*em\s*[^<>]*?>//ig;
        $string =~ s/<[\/]*i\s*[^<>]*?>//ig;
        $string =~ s/<p\s*[^<>]*?>//ig;
        $string =~ s/<\/\s*p\s*?>/\n/ig;
        $string =~ s/<p>([^<>]*?)<\/p>/$1\n/ig;
        $string =~ s/<[\/]*dl\s*[^<>]*?>//ig;
        $string =~ s/<[\/]*h\d*?\s*[^<>]*?>//ig;
        $string =~ s/<[\/]*code\s*[^<>]*?>//ig;
        $string =~ s/<dt>([^<>]*?)<\/dt>/$1\n/ig;
        $string =~ s/<dd>([^<>]*?)<\/dd>/$1\n/ig;
        $string =~ s/<[\/]*div\s*?[^<>]*?>//ig;
        $string =~ s/<[\/]*ul\s*[^<>]*?>//ig;
        $string =~ s/<[\/]*ol\s*[^<>]*?>//ig;
        $string =~ s/<li>(.*?)\n/* $1\n/ig;
        $string =~ s/<[\/]*pre\s*[^<>]*?>//ig;
        $string =~ s/<[\/]*blockquote\s*[^<>]*?>//ig;
        $string =~ s/<[\/]*strong\s*[^<>]*?>//ig;
        $string =~ s/<\/li>//ig;
        $string =~ s/&lt;/</g;
        $string =~ s/&gt;/>/g;
        $string =~ s/&amp;/&/g;
        $string =~ s/&#38;/&/g;
        $string =~ s/&mdash;/-/g;
        $string =~ s/&nbsp;/ /g;
        $string =~ s/&raquo;/>>/g;
        $string =~ s/&quot;/"/g;
        $string =~ s/&#160;/  /g;
        $string =~ s/&#39;/'/g;
        $string =~ s/#8221;/"/g;
        $string =~ s/&#8217;/'/g;
        $string =~ s/&#146;/'/g;
        $string =~ s/<\!--.*?-->//g;

        return $string;
    }


    # wrappers
    sub select {
        my $this   = shift;
        my $folder = shift;
        $this->create_folder ($folder);
        $this->{imap}->select ($folder) || warn "@!\n";
    }


    sub create_folder {
        my $this   = shift;
        my $folder = shift;
        my $imap = $this->{imap};
        unless ($imap->exists($folder)) {
            $imap->create ($folder) || warn "WARNNING: $@\n";
        }
    }


    # misc functions
    sub gen_message_id {
        my $this = shift;
        my $rss  = shift;
        my $item = shift;
        return sprintf ('%s@%s', $item->{link}, $this->{host});
    }


    sub is_error {

        #    if you use windows, FCNTL error will be ignored.
        if( !$@ || ( $^O =~ /Win32/ && $@ =~ /fcntl.*?f_getfl/ ) ) {
            return 0;
        }
        return 1;
    }


    sub get_rss_date {
        my $this = shift;
        my $rss  = shift;
        my $item = shift;
        return $item->{dc}->{date} || $rss->{channel}->{dc}->{date} || $rss->{'rss2imap:last-modified'};
    }


    sub get_date {
        my $this = shift;
        my $rss  = shift;
        my $item = shift;
        my $date = $this->get_rss_date ($rss, $item) || "";
        return HTTP::Date::time2str(HTTP::Date::str2time ($date));
    }


    sub get_real_folder_name {
        my $this = shift;
        my $str  = shift;
        if ($this->{prefix}) {
            $str = sprintf ("%s.%s", RSS2IMAPLIB::Unicode::to_utf8 ($this->{prefix}), $str);
        }
        return RSS2IMAPLIB::Unicode::to_utf7 ($str);
    }


    sub apply_template {

        my $this = shift;
        my $rss  = shift;
        my $item = shift;
        my $folder_flg = shift;
        my @from = @_;

        my %cnf;
        if ($rss) {
            $cnf{'channel:title'}       = $rss->{channel}->{title};
            $cnf{'channel:link'}        = $rss->{channel}->{link};
            $cnf{'channel:description'} = $rss->{channel}->{description};
            $cnf{'channel:dc:date'}     = $rss->{channel}->{dc}->{date} || "";
        }

        if ($item) {
            $cnf{'item:description'}  = $item->{description} || $rss->{channel}->{description};
            $cnf{'item:link'}         = $item->{link} || $rss->{channel}->{link};
            $cnf{'item:title'}        = $item->{title} || $rss->{channel}->{title};
            $cnf{'item:dc:date'}      = $item->{dc}->{date} || $item->{dc}->{date} || "";
            $cnf{'item:dc:subject'}   = $item->{dc}->{subject} || "";
            $cnf{'item:dc:creator'}   = $item->{dc}->{creator} || "";
        }

        $cnf{host}            = $this->{host};
        $cnf{user}            = $this->{user};
        $cnf{'last-modified'} = $rss->{'rss2imap:last-modified'};
        $cnf{'rss-link'}      = $rss->{'rss2imap:rss-link'};

        my @result;
        for my $from (@from) {
            if ($from) {
            for my $key (keys %cnf) {
                next unless ($cnf{$key});
                $cnf{$key} =~ s/\./:/g if ($folder_flg);
                my $key2 = "%{" . $key . "}";
                $from =~ s/$key2/$cnf{$key}/eg;
            }
            }
            push @result, $from;
        }

        return @result;
    }
}

1;