# RSS解析プログラム
# このプログラムは以下のサイトにあるものを利用させていただきました。 
# http://digit.que.ne.jp/work/index.cgi

use strict;
use Jcode;
use Time::Local;

sub parse_rss {
	my ($rss, $num) = @_;
	my @items = ();
	return unless ($rss);
	$num = 0 unless ($num =~ /^\d+$/);
	foreach my $item ($rss =~ /<item\b.*?>.*?<\/item>/gis) {
		my $parsed = {};
		foreach my $tag qw(title link description dc:date) {
			if ($item =~ /<$tag\b.*?>(.*?)<\/$tag>/is) {
					$parsed->{$tag} = &sanitize($1);
			}
		}
		$parsed->{'time'} = &date_to_time($parsed->{'dc:date'});
		push(@items, $parsed);
		last if ($num and @items >= $num);
	}
	return @items;
}

sub sanitize {
	my $str = shift;
	# remove tags and unescape
	my $re_tag_    = q{[^"'<>]*(?:"[^"]*"[^"'<>]*|'[^']*'[^"'<>]*)*(?:>|(?=<)|$(?!\n))}; #'};
	my $re_comment = '<!(?:--[^-]*-(?:[^-]+-)*?-(?:[^>-]*(?:-[^>-]+)*?)??)*(?:>|$(?!\n)|--.*$)';
	my $re_tag     = qq{$re_comment|<$re_tag_};
	$str =~ s/$re_tag//g;
	# resanitize
	my %unescaped = ('&lt;' => '<', '&gt;' => '>', '&quot;' => '"', '&apos;' => "'", '&copy;' => '(c)', '&amp;' => '&');
	my %escaped = ('<' => '&lt;', '>' => '&gt;', '"' => '&quot;', '&apos;' => "'", '&' => '&amp;');
	$str =~ s/&(lt|gt|quot|apos|copy|amp);/$unescaped{$1}/gio;
	$str =~ s/([<>"'&])/$escaped{$1}/go;
	return $str;
}

sub date_to_time {
	my $date = shift;
	if ($date =~ /^(\d{4})(?:-(\d{2})(?:-(\d{2})(?:T(\d{2}):(\d{2})(?::(\d{2})(?:\.(\d))?)?(Z|([+-]\d{2}):(\d{2}))?)?)?)?$/) {
		my ($year, $month, $day, $hour, $min, $sec) = ($1, ($2 ? $2 : 1), ($3 ? $3 : 1), $4, $5);
		my $offset = (abs($8) * 60 + $9) * ($8 >= 0 ? 60 : -60) if ($7);
		my $time   = ($7) ? &Time::Local::timegm($sec, $min, $hour, $day, $month - 1, $year) - $offset
				: &Time::Local::timelocal($sec, $min, $hour, $day, $month - 1, $year) - $offset;
		return $time;
	}
	return undef;
}

1;
