#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
# Retrieve video site URLs from redirection URLs or video collection sites.
#

import sys
import re
import StringIO
import gzip
import urllib2


class URLExpander():

  def __init__(self):
    self.useragent = None
    self.orig_url = None
    self.dest_url = None
    self.supported_sites = (
      ["^http://ime.nu/(.+)", "common"],
      ["^http://pinktower.com/(.+)", "common"],
      ["^http://b.hatena.ne.jp/entry/(?:s/)?(.+)", "common"],
      ["^http://www.g-idol.com/url.cgi/bbs/?(.+)", "common"],
      ["^http://www.nicovideo.jp/watch/(.+)", "nicovideo"],
      ["^http://www.youtube.com/watch_videos\?(.+)", "youtube2"],
      ["^http://www.youtube.com/view_play_list\?(.+)", "youtube3"],
      ["^http://www.youtube.com/v/(.+)", "youtube4"],
      ["^http://www.yourlifehost.jp/(video-.+)", "ylhjp"],
      ["^http://yourfilehost-navi.com/play/(.+)/", "yfhnavi"],)

  def expand(self, url):
    self.orig_url = url
    for site in self.supported_sites:
      match = re.compile(site[0]).search(self.orig_url)
      if (match):
        self.dest_url = match.group(1) # temp
        self.dest_url = getattr(self, "_expand_" + site[1])()
        return self.dest_url
    return None

  # Set User-Agent header for loading web pages.
  def set_useragent(self, useragent):
    self.useragent = useragent

  # A dummy method, return pre-parsed URL.
  def _expand_common(self):
    if (not re.compile("^https?://").search(self.dest_url)):
      self.dest_url = 'http://' + self.dest_url
    return self.dest_url

  # http://www.nicovideo.jp/
  def _expand_nicovideo(self):
    # Use 'Nico nico pedia' instead of 'Nico nico douga'.
    self.dest_url = 'http://dic.nicovideo.jp/v/' + self.dest_url
    return self.dest_url

  # http://www.youtube.com/watch_videos
  def _expand_youtube2(self):
    match = re.compile("index=(\d+)").search(self.dest_url)
    if (match):
      index = int(match.group(1))
    else:
      index = 0

    match = re.compile("video_ids=([^&]+)&").search(self.dest_url)
    if (match):
      video_ids = match.group(1).split("%2C")
      video_id = video_ids[index]
      self.dest_url = "http://www.youtube.com/watch?v=" + video_id
      return self.dest_url
    return None

  # http://www.youtube.com/view_play_list
  def _expand_youtube3(self):
    match = re.compile("v=([^&]+)").search(self.dest_url)
    if (match):
      video_id = match.group(1)
      self.dest_url = "http://www.youtube.com/watch?v=" + video_id
      return self.dest_url
    return None

  # http://www.youtube.com/v/VIDEO_ID
  def _expand_youtube4(self):
    match = re.compile("([^&]+)").search(self.dest_url)
    if (match):
      video_id = match.group(1)
      self.dest_url = "http://www.youtube.com/watch?v=" + video_id
      return self.dest_url
    return None

  # http://www.yourlifehost.jp/
  def _expand_ylhjp(self):
    opener = urllib2.build_opener()
    if (self.useragent):
      opener.addheaders = [('User-agent', self.useragent)]
    response = opener.open(self.orig_url)
    compressed = response.read()
    stream = StringIO.StringIO(compressed)
    gzipper = gzip.GzipFile(fileobj=stream)
    pattern = re.compile(
      "href=\"(http://www.yourfilehost.com/"
      "media.php\?cat=video&file=[^\"]+)\"")
    match = pattern.search(gzipper.read())
    if (match):
      self.dest_url = match.group(1)
      return self.dest_url
    return None

  # http://yourfilehost-navi.com/
  def _expand_yfhnavi(self):
    opener = urllib2.build_opener()
    if (self.useragent):
      opener.addheaders = [('User-agent', self.useragent)]
    response = opener.open(self.orig_url)
    pattern = re.compile(
      "url=(http://www.yourfilehost.com/media.php\?cat=video&file=[^\"]+)\"")
    match = pattern.search(response.read())
    if (match):
      self.dest_url = match.group(1)
      return self.dest_url
    return None

# test
if __name__ == '__main__':
  for url in sys.argv[1:]:
    expander = URLExpander()
    expander.set_useragent("Mozilla/5.0")
    dest_url = expander.expand(url)
    print dest_url
