# antenna.rb $Revision: 1.9 $
# 
#
# Copyright (C) 2004  Michitaka Ohno <elpeo@mars.dti.ne.jp>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
# USA.

require 'net/http'
require 'timeout'
require 'time'
require 'cgi'
require 'nkf'

RANTENNA_VERSION = '0.0.5'

class Antenna
	def initialize( conf = nil )
		@generator = "Powered by rAntenna #{RANTENNA_VERSION} and Ruby #{RUBY_VERSION}"

		@dir = File::dirname( __FILE__ )

		eval( File::open( File.expand_path( conf||'antenna.conf', @dir ) ){|f| f.read }.untaint )

		@urls ||= []
		@rdf_path ||= 'index.rdf'

		begin
			require 'uconv'
			@rdf_encoding = 'UTF-8'
			@rdf_encoder = Proc::new {|s| Uconv.euctou8( s ) }
			@rdf_decoder = Proc::new {|s| Uconv.u8toeuc( s ) }
		rescue LoadError
			@rdf_encoding = 'EUC-JP'
			@rdf_encoder = Proc::new {|s| s }
			@rdf_decoder = Proc::new {|s| s }
		end

		@last_modified = Hash.new
		@last_detected = Hash.new
		@content_length = Hash.new
		@auth_url = Hash.new

		@input_procs = []
		@output_procs = []

		Dir::glob( File.join( @dir, 'plugin', '*.rb' ) ).sort.each do |file|
			instance_eval( File::open( file.untaint ){|f| f.read }.untaint )
		end
	end

	def antenna_url
		@antenna_url
	end

	def add_input_proc( block = Proc::new )
		@input_procs << block
	end

	def add_output_proc( block = Proc::new )
		@output_procs << block
	end

	def go_round
		@input_procs.each do |proc|
			proc.call
		end

		limittime = 10
		request_header = {'User-Agent' => "rAntenna #{RANTENNA_VERSION}"}

		@urls.each do |item|
			next if @last_modified[item[2]]
			next unless %r[^http://([^/]+)(/.*)?$] =~ (item[3]||item[2])
			path = $2||'/'
			host, port = $1.split( /:/ )
			port = '80' unless /^[0-9]+$/ =~ port
			timeout( limittime ) do
				begin
					Net::HTTP.version_1_1
					Net::HTTP.start( host.untaint, port.to_i.untaint ) do |http|
						response = http.head( path, request_header )
 						if response['Last-Modified'] then
							@last_modified[item[2]] = Time.parse( response['Last-Modified'] ).localtime
							@content_length[item[2]] = response['Content-Length'].to_i
							@last_detected[item[2]] = Time.now
							@auth_url[item[2]] = @antenna_url
						else
							response, = http.get( path, request_header )
							lm = get_last_modified( response.body )
							if lm then
								@last_modified[item[2]] = lm
								@content_length[item[2]] = response['Content-Length'].to_i
								@last_detected[item[2]] = Time.now
								@auth_url[item[2]] = @antenna_url
							end
						end
					end
				rescue Exception
				rescue
				end
			end
		end
	end

	def output( file = nil )
		output_file = File.expand_path( file||@rdf_path, @dir )

		if File.exist?( output_file ) then
			open( output_file ) do |f|
				buf = f.read
				begin
					buf = @rdf_decoder.call( buf )
				rescue
				end
				linkurl = nil
				buf.scan( /<([A-Za-z:]+)>([^<]*)<\/\1>/ ) do |tag|
					if tag[0] == 'link' then
						linkurl = tag[1]
					elsif linkurl && tag[0] == 'dc:date' then
						date = Time.parse( tag[1] ).localtime
						url = get_unlink( linkurl, date )
						@last_modified[url] = date unless @last_modified[url]
					end
				end
			end
		end

		sorted_urls = @urls.sort {|a, b| urls_compare( a, b ) }

		r = ""
		r << <<-RDF
<?xml version="1.0" encoding="#{@rdf_encoding}"?>
<?xml-stylesheet href="index.xsl" type="text/xsl" media="screen"?>

<rdf:RDF xmlns="http://purl.org/rss/1.0/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/">
<channel rdf:about="#{CGI::escapeHTML( @rdf_url )}">
<title>#{CGI::escapeHTML( @title )}</title>
<link>#{CGI::escapeHTML( @antenna_url )}</link>
<description>#{CGI::escapeHTML( @title )}</description>
<dc:date>#{Time.now.xmlschema}</dc:date>
<dc:language>ja</dc:language>
<dc:rights>#{CGI::escapeHTML( @copyright )}</dc:rights>
<dc:publisher>#{CGI::escapeHTML( @generator )}</dc:publisher>
<items>
<rdf:Seq>
RDF

		sorted_urls.each do |item|
			next unless item
			linkurl = get_link( item[2] )
			r << <<-RDF
<rdf:li rdf:resource="#{CGI::escapeHTML( linkurl )}"/>
RDF
		end

		r << <<-RDF
</rdf:Seq>
</items>
</channel>
RDF

		sorted_urls.each do |item|
			next unless item
			linkurl = get_link( item[2] )
			r << <<-RDF
<item rdf:about="#{CGI::escapeHTML( linkurl )}">
<title>#{CGI::escapeHTML( item[0] )}</title>
<link>#{CGI::escapeHTML( linkurl )}</link>
<description/>
<dc:creator>#{CGI::escapeHTML( item[1] )}</dc:creator>
RDF
			if @last_modified[item[2]] then
				r << <<-RDF
<dc:date>#{@last_modified[item[2]].xmlschema}</dc:date>
RDF
			end
			r << <<-RDF
</item>
RDF
		end

		r << <<-RDF
</rdf:RDF>
RDF

		open( output_file,  "w" ) do |f|
			f.print @rdf_encoder.call( r )
		end

		@output_procs.each do |proc|
			proc.call
		end
	end

	def urls_compare( a, b )
		atime = @last_modified[a[2]]
		btime = @last_modified[b[2]]
		if atime && btime then
			btime <=> atime
		elsif atime then
			-1
		elsif btime then
			1
		else
			0
		end
	end

	def get_link( url )
		return url unless @link_format && @last_modified[url]
		format =  @link_format.gsub( /%(antenna_url|url)%/ ) do
			($1 == 'url' ? url : eval( "@#{$1}" )).gsub( /%/, '%%' )
		end
		@last_modified[url].strftime( format )
	end

	def get_unlink( link, date = nil )
		return link unless @link_format
		format =  Regexp.escape( @link_format ).gsub( /%(antenna_url|url)%/ ) do
			$1 == 'url' ? '(.+)' : Regexp.escape( eval( "@#{$1}" ) ).gsub( /%/, '%%' )
		end
		if date then
			re = date.strftime( format )
		else
			re = format.gsub( /%(.)/ ) do
				$1 == '%' ? '%' : '.+'
			end
		end
		if /^#{re}$/ =~ link then
			$1||link
		else
			link
		end
	end

	def get_last_modified( str )
		lm = nil
		alter = nil
		data = NKF::nkf( '-m0 -e', str ).gsub( /<(?!meta|!--)[^>]*>/im, '' ).split( /\s*[\r\n]+\s*/ )
		data.each_index do |i|
			if /http-equiv=\"?last-modified.+content=\"([^\"]+)\"/i =~ data[i] then
				begin
					date = Time.parse( $1, nil ).localtime
					lm ||= date
					lm = date if date > lm
					break
				rescue Exception
				rescue
				end
			elsif /name=\"?wwwc.+content=\"([^\"]+)\"/i =~ data[i] then
				begin
					date = Time.parse( $1, nil ).localtime
					lm ||= date
					lm = date if date > lm
					break
				rescue Exception
				rescue
				end
			elsif /|update|modified/i =~ data[i] then
				s = (data[i-1]||'') + data[i] + (data[i+1]||'')
				begin
					if /(?:([0-9]+)ǯ\s*)?([0-9]+)\s*([0-9]+)/ =~ s then
						d = Time.parse( $1?"#{$1}-#{$2}-#{$3}":"#{$2}/#{$3}" )
					elsif /([0-9]+[\/\.\-])?[0-9]+[\/\.\-][0-9]+/ =~ s then
						d = Time.parse( $&.gsub( /[\.\-\/]/, $1?'-':'/' ) )
					elsif /[0-9]+-[A-Z][a-z]+-[0-9]+/ =~ s then
						d = Time.parse( $& )
					elsif / [0-9]{1,2} [A-Z][a-z]{2} [0-9]{4} / =~ s then
						d = Time.parse( $& )
					else
						next
					end
					if /([0-9]+)\s*([0-9]+)ʬ(?:\s*([0-9]+))?/ =~ s then
						t = Time.parse( "#{$1}:#{$2}:#{$3}" )
					elsif /[0-9]+:[0-9]+(:[0-9]+)?/ =~ s then
						t = Time.parse( $& )
					elsif /([0-9]+)([0-9]+)/ =~ s then
						t = Time.parse( "#{$1}:#{$2}" )
					else
						next
					end
					if /GMT/ =~ s then
						date = Time.gm( d.year, d.month, d.day, t.hour, t.min , t.sec ).localtime
					elsif /UTC/ =~ s then
						date = Time.utc( d.year, d.month, d.day, t.hour, t.min , t.sec ).localtime
					else
						date = Time.local( d.year, d.month, d.day, t.hour, t.min , t.sec )
					end
					if date < Time.now then
						lm ||= date
						lm = date if date > lm
					end
				rescue Exception
				rescue
				end
			elsif /\d+[\/\.\-]\d+[\/\.\-]\d+.+\d+:\d+(:\d+)?/ =~ data[i] then
				begin
					date = Time.parse( $&.gsub( /[\.\/]/, '-' ), nil )
					if date < Time.now then
						alter ||= date
						alter = date if date > alter
					end
				rescue Exception
				rescue
				end
			end
		end
		lm || alter
	end
end

