<?php
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
/*
 * Copyright 2004-2007 Project Guarana Development Team
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/**
 * @package ficus.net
 */
/**
 * @file URISyntax.php
 * @brief URISyntax for php
 * @author <a href="mailto:sumi@wakhok.ac.jp">SUMI Masafumi</a>
 * @version $Id: URISyntax.php 2 2007-07-11 10:37:48Z ishitoya $
 * 
 * URISyntax for php
 */
require_once("ficus/collection/UnicodeIterator.php");
require_once("ficus/exception/URISyntaxException.php");
require_once("ficus/lang/Object.php");
require_once("ficus/lang/Unicode.php");
require_once("ficus/net/URICharacters.php");
require_once("ficus/XML/XMLUtils.php");
/**
 * @class Ficus_URISyntax
 */
class Ficus_URISyntax extends Ficus_Object implements Ficus_URICharacters {

    /**
     * URI pattern.
     */
    const URI_PATTERN = '/^(?:([^:\/\?#]+)(:))?(?:(\/\/)([^\/\?#]*))?([^\?#]*)(?:(\?)([^#]*))?(?:(#)(.*))?/';

    /**
     * Index of matches URI pattern.
     */
    const MATCH_SCHEME = 1;
    const MATCH_AUTHORITY = 4;
    const MATCH_PATH = 5;
    const MATCH_QUERY = 7;
    const MATCH_FRAGMENT = 9;

    /**
     * URI scheme pattern.
     */
    const SCHEME_PATTERN = '/^(?:[a-zA-Z][a-zA-Z0-9\+\-\.]*)$|^$/';

    /**
     * URI port pattern.
     */
    const PORT_PATTERN = '/^[0-9]*/';

    /**
     * IPv4 dec_octet pattern.
     */
    const DEC_OCTET_PATTERN = '/([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])/';

    /**
     * Alphabet char ranges.
     */
    protected static $alpha = array(
        array(self::C_CAPITAL_A, self::C_CAPITAL_Z),
        array(self::C_SMALL_A,   self::C_SMALL_Z)
    );

    /**
     * Digit char ranges.
     */
    protected static $digit = array(
        array(self::C_ZERO, self::C_NINE)
    );

    /**
     * Gen delimiters char ranges.
     */
    protected static $gen_delims = array(
        self::C_COLON,
        self::C_SLASH,
        self::C_QUESTION,
        self::C_SHARP,
        self::C_LEFT_SQUARE_BRACKET,
        self::C_RIGHT_SQUARE_BRACKET,
        self::C_ATMARK,
    );

    /**
     * Sub delimiters char ranges.
     */
    protected static $sub_delims = array(
        self::C_EXCLAMATION,
        self::C_DOLLAR,
        self::C_AMPERSAND,
        self::C_APOSTROPHE,
        self::C_LEFT_CURLY_BRACKET,
        self::C_RIGHT_CURLY_BRACKET,
        self::C_ASTERISK,
        self::C_PLUS,
        self::C_COMMA,
        self::C_SEMICOLON,
        self::C_EQUALS
    );

    /**
     * Instance it self.
     */
    protected static $self;

    /**
     * Constructor.
     */
    protected function __construct() {
    }

    /**
     * static constructor.
     */
    public static function createInstance() {
        if (!isset(self::$self)) {
            self::$self = new Ficus_URISyntax();
        }
        return self::$self;
    }

    /**
     * Get syntax type.
     */
    public function getSyntaxType() {
        return 'URI';
    }

    /**
     * Get unreserved chars.
     *
     * IRI can use UCS charactor in some component.
     *
     * @return array of unreserved chars.
     */
    public function getUnreservedChars() {
        return array_merge(self::$alpha,
                           self::$digit,
                           array(self::C_HYPHEN),
                           array(self::C_DOT),
                           array(self::C_UNDERBAR),
                           array(self::C_TILDE));
    }

    /**
     * Get pchars.
     *
     * @return array of pchars.
     */
    public function getPChars() {
        return array_merge(self::getUnreservedChars(),
                           self::$sub_delims,
                           array(self::C_ATMARK),
                           array(self::C_COLON));
    }

    /**
     * Get segment chars.
     *
     * @return array of segment chars.
     */
    public function getSegmentChars() {
        return array_merge(self::getUnreservedChars(),
                           self::$sub_delims,
                           array(self::C_ATMARK),
                           array(self::C_COLON),
                           array(self::C_SLASH));
    }

    /**
     * Get segment no colon chars.
     *
     * @return array of segment no colon chars.
     */
    public function getSegmentNCChars() {
        return array_merge(self::getUnreservedChars(),
                           self::$sub_delims,
                           array(self::C_ATMARK),
                           array(self::C_SLASH));
    }

    /**
     * Get fragment chars.
     *
     * @return array of fragment chars.
     */
    public function getFragmentChars() {
       return array_merge(self::getPChars(),
                           array(self::C_SLASH), array(self::C_QUESTION));
    }

    /**
     * Get query chars.
     *
     * IRI can use private charactor in query.
     *
     * @return array of query chars.
     */
    public function getQueryChars() {
        return self::getFragmentChars();
    }

    /**
     * Get userInfo chars.
     *
     * @return array of userInfo chars.
     */
    public function getUserInfoChars() {
        return array_merge(self::getUnreservedChars(),
                           self::$sub_delims,
                           array(self::C_COLON));
    }

    /**
     * Get IPvFuture chars.
     *
     * @return array of IPvFuture chars.
     */
    public function getIpvfutureChars() {
        return array_merge(self::getUnreservedChars(),
                           self::$sub_delims,
                           array(self::C_COLON));
    }

    /**
     * Get reg-name chars.
     *
     * @return array of reg-name chars.
     */
    public function getRegNameChars() {
        return array_merge(self::getUnreservedChars(),
                           self::$sub_delims);
    }

    /**
     * Validate char in ranges.
     *
     * @param $code string unicode character code.
     * @param $validChars array of array of unicode pairs valid char ranges.
     * @return boolean true if valid.
     */
    protected function validateChar($code, array $validChars = null) {
        foreach ($validChars as $charRange) {
            if (is_array($charRange)) {
                if ($charRange[0] <= $code && $code <= $charRange[1]) {
                    return true;
                }
            } else {
                if ($charRange == $code) {
                    return true;
                }
            }
        }
        return false;
    }

    /**
     * Validate string in char ranges.
     *
     * @param $str string string.
     * @param $validChars array of array of unicode pairs valid char ranges.
     * @return boolean true if valid.
     */
    public function validateString($str, array $validChars = null) {
        foreach (new Ficus_UnicodeIterator($str) as $code) {
            if (!self::validateChar($code, $validChars)) {
                return false;
            }
        }
        return true;
    }

    /**
     * encode invalid string.
     *
     * @param $str string string.
     * @param $validChars array of array of unicode pairs valid char ranges.
     * @return boolean true if valid.
     */
    public function encodeString($str, array $validChars = null) {
        if (is_null($str)) {
            return null;
        }
        $ret = '';
        foreach (new Ficus_MBStringIterator($str) as $char) {
            $code = Ficus_Unicode::getUnicode($char);
            if (self::validateChar($code, $validChars)) {
                $ret .= $char;
            } else {
                $ret .= $this->encodeChar($char);
            }
        }
        return $ret;
    }

    /**
     * Encode char.
     *
     * @param $char string UTF-8 character.
     * @return string encoded string.
     */
    protected function encodeChar($char) {
        $hex = strtoupper(bin2hex($char));
        // need 2 or 4 character.
        $hex = (strlen($hex) % 2 == 0) ? $hex : '0' . $hex;
        return '%' . join('%', str_split($hex, 2));
    }

    /**
     * Validate scheme.
     *
     * @param $host string scheme.
     * @return void.
     */
    public function validateScheme($scheme) {
        if (preg_match(self::SCHEME_PATTERN, $scheme) == 0) {
            throw new Ficus_URISyntaxException("Illegal charcter in {$this->getSyntaxType()} scheme : '{$scheme}'.");
        }
    }

    /**
     * Validate authority.
     *
     * @param $host string authority.
     * @return void.
     */
    public function validateAuthority($authority) {
        list($userInfo, $host) = preg_split('/@/', $authority, 2);
        list($host, $port) = preg_split('/:/', $host, 2);
        self::validateUserInfo($userInfo);
        self::validateHost($host);
        self::validatePort($port);
    }

    /**
     * Validate userInfo.
     *
     * @param $host string userInfo.
     * @return void.
     */
    public function validateUserInfo($userInfo) {
        // pre-replace pct-encoded
        $decoded = Ficus_XMLUtils::decodeURI($userInfo);
        if (!self::validateString($decoded, self::getUserInfoChars())) {
            throw new Ficus_URISyntaxException("Illegal charcter in {$this->getSyntaxType()} userInfo : {$userInfo}.");
        }
    }

    /**
     * Encode userInfo.
     *
     * @param $host string userInfo.
     * @return string encoded userInfo.
     */
    public function encodeUserInfo($userInfo) {
        return $this->encodeString($userInfo, self::getUserInfoChars());
    }

    /**
     * Validate host.
     *
     * @param $host string host.
     * @return void.
     */
    public function validateHost($host) {
        if (self::validateIPLiteral($host)) {
            return;
        }
        if (self::validateIPV4Address($host)) {
            return;
        }
        if (!self::validateRegName($host)) {
            throw new Ficus_URISyntaxException("Illegal charcter in {$this->getSyntaxType()} host : {$host}.");
        }
    }

    /**
     * Encode host.
     *
     * @param $host string host.
     * @return string encoded host.
     */
    public function encodeHost($host) {
        if (self::validateIPLiteral($host)) {
            return $host;
        }
        if (self::validateIPV4Address($host)) {
            return $host;
        }
        return $this->encodeString($host, self::getRegNameChars());
    }

    /**
     * Validate port.
     *
     * @param $host string port.
     * @return void.
     */
    public function validatePort($port) {
        if (preg_match(self::PORT_PATTERN, $port) == 0) {
            throw new Ficus_URISyntaxException("Illegal charcter in {$this->getSyntaxType()} port : {$port}.");
        }
    }

    /**
     * Validate path.
     *
     * @param $path string path.
     * @return void.
     */
    public function validatePath($path) {
        if (strlen($path) == 0) {
            return true;
        } else if ($path{0} == '/') {
            // path-abempty = *( "/" segment )
            // path-absolute = "/" [ segment-nz *( "/" sgement ) ]
            return $this->validateString($path, self::getSegmentChars());
        } else if ($noscheme) {
            // path-noscheme = segment-nz-nc *( "/" segment )
            $charset = self::getSegmentNCChars();
            foreach (preg_split('/\//', $path, 2) as $segment) {
                if (!$this->validateString($segment, $charset)) {
                    return false;
                }
                $charset = self::getSegmentChars();
            }
            return true;
        } else {
            // path-rootless = segment-nz *( "/" segment )
            return $this->validateString($path, self::getSegmentChars());
        }
    }

    /**
     * Encode path.
     *
     * @param $path string path.
     * @param $noscheme boolean true if no scheme.
     * @return string encoded path.
     */
    public function encodePath($path, $noscheme = false) {
        if (strlen($path) == 0) {
            // path-abempty = *( "/" segment )
            // path-empty = 0<pchar>
            return $path;
        } else if ($path{0} == '/') {
            // path-abempty = *( "/" segment )
            // path-absolute = "/" [ segment-nz *( "/" sgement ) ]
            return $this->encodeString($path, self::getSegmentChars());
        } else if ($noscheme) {
            // path-noscheme = segment-nz-nc *( "/" segment )
            $segments = array();
            $charset = self::getSegmentNCChars();
            foreach (preg_split('/\//', $path, 2) as $segment) {
                $segments []= $this->encodeString($segment, $charset);
                $charset = self::getSegmentChars();
            }
            return join('/', $segments);
        } else {
            // path-rootless = segment-nz *( "/" segment )
            return $this->encodeString($path, self::getSegmentChars());
        }
        return $path;
    }

    /**
     * Validate query.
     *
     * @param $host string query.
     * @return void.
     */
    public function validateQuery($query) {
        if (!self::validateString($query, self::getQueryChars())) {
            throw new Ficus_URISyntaxException("Illegal charcter in {$this->getSyntaxType()} query : {$query}.");
        }
    }

    /**
     * Encode query.
     *
     * @param $host string query.
     * @return string encoded query.
     */
    public function encodeQuery($query) {
        return $this->encodeString($query, self::getQueryChars());
    }

    /**
     * Validate fragment.
     *
     * @param $host string fragment.
     * @return void.
     */
    public function validateFragment($fragment) {
        if (!self::validateString($fragment, self::getFragmentChars())) {
            throw new Ficus_URISyntaxException("Illegal charcter in {$this->getSyntaxType()} fragment : {$fragment}.");
        }
    }

    /**
     * Encode fragment.
     *
     * @param $host string fragment.
     * @return string encoded fragment.
     */
    public function encodeFragment($fragment) {
        return $this->encodeString($fragment, self::getFragmentChars());
    }

    /**
     * Validate IP-literal.
     *
     * @param $host string hostname.
     * @return boolean true if valid.
     */
    public function validateIPLiteral($host) {
        if (preg_match('/^\[(.*)\]$/', $host, $matches) == 0) {
            return false;
        }
        if (Ficus_Unicode::at($matches[1], 0) == "v") {
            return self::validateIPvfuture($matches[1]);
        } else {
            return self::validateIPv6address($matches[1]);
        }
    }

    /**
     * Validate IPv6 address.
     *
     * @param $host string hostname.
     * @return boolean true if valid.
     */
    public function validateIPv6Address($host) {
        $groups = preg_split('/::/', $host);
        $left = preg_split('/:/', $groups[0]);
        if (sizeof($groups) > 1) {
            $v6 = array_merge($left, preg_split('/:/', $group[1]));
        } else {
            $v6 = $left;
        }
        $last = array_slice($v6, -1);
        if (preg_match('/\./', $last[0])) {
            $v4 = array_pop($v6);
            if (!self::validateIPV4address($v4)) {
                return false;
            }
            if (sizeof($v6) > 6) {
                return false;
            }
        } else if (sizeof($v6) > 8) {
            return false;
        }
        foreach ($v6 as $hex) {
            if (preg_match('/^[0-9a-fA-F]{1,4}$/', $hex) == 0) {
                return false;
            }
        }
        return true;
    }

    /**
     * Validate IPvfuture address.
     *
     * @param $host string hostname.
     * @return boolean true if valid.
     */
    public function validateIPvfuture($host) {
        if (preg_match('/^v[0-9A-Fa-f]\./', $host) == 0) {
            return false;
        }
        $last = mb_substr($host, 3, -1, Ficus_Unicode::UTF8);
        return self::validateString($last, self::getIpvfutureChars());
    }

    /**
     * Validate IPv4 address.
     *
     * @param $host string hostname.
     * @return boolean true if valid.
     */
    public function validateIPV4Address($host) {
        $ip = array_fill(0, 4, substr(self::DEC_OCTET_PATTERN, 1, -1));
        $IPV4Pattern = '/' . join('\\.', $ip) . '/';
        return preg_match($IPV4Pattern, $host) > 0;
    }

    /**
     * Validate reg-name.
     *
     * @param $host string reg-name.
     * @return bolean if valid.
     */
    public function validateRegName($host) {
        // pre-replace pct-encoded
        $decoded = Ficus_XMLUtils::decodeURI($host);
        return self::validateString($decoded, self::getRegNameChars());
    }

    /**
     * Split URI component.
     *
     * @param $uri IRI or URI.
     * @reutrn array of components.
     */
    public function splitComponent($uri) {
        if (preg_match(self::URI_PATTERN, $uri, $matches) == 0) {
            throw new Ficus_URISyntaxException("Syntax error: {$this->uri}");
        }
        $components = array();
        $components []= $this->matchURIComponent($matches, self::MATCH_SCHEME);
        $components []= $this->matchURIComponent($matches, self::MATCH_AUTHORITY);
        $components []= $matches[self::MATCH_PATH];
        $components []= $this->matchURIComponent($matches, self::MATCH_QUERY);
        $components []= $this->matchURIComponent($matches, self::MATCH_FRAGMENT);
        return $components;
    }

    /**
     * Get match URI component.
     *
     * @param $matches array matches of URI pattern.
     * @param $componentType int Index of matches URI pattern.
     * @return string matches component.
     */
    private function matchURIComponent($matches, $componentType) {
        static $delimiters = array(self::MATCH_SCHEME    => array(2, ':'),
                                   self::MATCH_AUTHORITY => array(3, '//'),
                                   self::MATCH_QUERY     => array(6, '?'), 
                                   self::MATCH_FRAGMENT  => array(8, '#'));
        $delim = $delimiters[$componentType];
        if (isset($matches[$delim[0]]) && $matches[$delim[0]] == $delim[1]) {
            return $matches[$componentType];
        } else {
            return null;
        }
    }

}
?>
