Commit a0014166 authored by Kseniya's avatar Kseniya
Browse files

rm IdnaConvert lib

parent fdd3a912
<?php
namespace Drupal\synhelper\Controller;
// {{{ license
// +----------------------------------------------------------------------+
// | This library is free software; you can redistribute it and/or modify |
// | it under the terms of the GNU Lesser General Public License as |
// | published by the Free Software Foundation; either version 2.1 of the |
// | License, or (at your option) any later version. |
// | |
// | This library is distributed in the hope that it will be useful, but |
// | WITHOUT ANY WARRANTY; without even the implied warranty of |
// | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
// | Lesser General Public License for more details. |
// | |
// | You should have received a copy of the GNU Lesser General Public |
// | License along with this library; if not, write to the Free Software |
// | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 |
// | USA. |
// +----------------------------------------------------------------------+
//
// }}}
/**
* Encode/decode Internationalized Domain Names.
*
* The class allows to convert internationalized domain names
* (see RFC 3490 for details) as they can be used with various registries worldwide
* to be translated between their original (localized) form and their encoded form
* as it will be used in the DNS (Domain Name System).
*
* The class provides two public methods, encode() and decode(), which do exactly
* what you would expect them to do. You are allowed to use complete domain names,
* simple strings and complete email addresses as well. That means, that you might
* use any of the following notations:
*
* - www.nörgler.com
* - xn--nrgler-wxa
* - xn--brse-5qa.xn--knrz-1ra.info
*
* Unicode input might be given as either UTF-8 string, UCS-4 string or UCS-4 array.
* Unicode output is available in the same formats.
* You can select your preferred format via {@link set_paramter()}.
*
* ACE input and output is always expected to be ASCII.
*
* @author Matthias Sommerfeld <mso@phlylabs.de>
* @copyright 2004-2011 phlyLabs Berlin, http://phlylabs.de
* @version 0.8.1 2011-12-19
*/
class IdnaConvert {
// NP See below.
// Internal settings, do not mess with them.
protected $_punycode_prefix = 'xn--';
protected $_invalid_ucs = 0x80000000;
protected $_max_ucs = 0x10FFFF;
protected $_base = 36;
protected $_tmin = 1;
protected $_tmax = 26;
protected $_skew = 38;
protected $_damp = 700;
protected $_initial_bias = 72;
protected $_initial_n = 0x80;
protected $_sbase = 0xAC00;
protected $_lbase = 0x1100;
protected $_vbase = 0x1161;
protected $_tbase = 0x11A7;
protected $_lcount = 19;
protected $_vcount = 21;
protected $_tcount = 28;
protected $_ncount = 588; // _vcount * _tcount
protected $_scount = 11172; // _lcount * _tcount * _vcount
protected $_error = FALSE;
protected static $_mb_string_overload = NULL;
// See {@link set_paramter()} for details of how to change the following
// settings from within your script / application
protected $_api_encoding = 'utf8'; // Default input charset is UTF-8
protected $_allow_overlong = FALSE; // Overlong UTF-8 encodings are forbidden
protected $_strict_mode = FALSE; // Behave strict or not
protected $_idn_version = 2003; // Can be either 2003 (old, default) or 2008
/**
* The constructor.
*
* @param array $options
* @return boolean
* @since 0.5.2
*/
public function __construct($options = FALSE) {
$this->slast = $this->_sbase + $this->_lcount * $this->_vcount * $this->_tcount;
// If parameters are given, pass these to the respective method.
if (is_array($options)) {
$this->set_parameter($options);
}
// populate mbstring overloading cache if not set.
if (self::$_mb_string_overload === NULL) {
self::$_mb_string_overload = (extension_loaded('mbstring')
&& (ini_get('mbstring.func_overload') & 0x02) === 0x02);
}
}
/**
* Sets a new option value. Available options and values:
* [encoding - Use either UTF-8, UCS4 as array or UCS4 as string as input ('utf8' for UTF-8,
* 'ucs4_string' and 'ucs4_array' respectively for UCS4); The output is always UTF-8]
* [overlong - Unicode does not allow unnecessarily long encodings of chars,
* to allow this, set this parameter to TRUE, else to FALSE;
* default is FALSE.]
* [strict - TRUE: strict mode, good for registration purposes - Causes errors
* on failures; FALSE: loose mode, ideal for "wildlife" applications
* by silently ignoring errors and returning the original input instead
*
* @param mixed Parameter to set (string: single parameter; array of Parameter => Value pairs)
* @param string Value to use (if parameter 1 is a string)
* @return boolean TRUE on success, FALSE otherwise
*/
public function set_parameter($option, $value = FALSE) {
if (!is_array($option)) {
$option = array($option => $value);
}
foreach ($option as $k => $v) {
switch ($k) {
case 'encoding':
switch ($v) {
case 'utf8':
case 'ucs4_string':
case 'ucs4_array':
$this->_api_encoding = $v;
break;
default:
$this->_error("Set Parameter: Unknown parameter $v for option $k");
return FALSE;
}
break;
case 'overlong':
$this->_allow_overlong = ($v) ? TRUE : FALSE;
break;
case 'strict':
$this->_strict_mode = ($v) ? TRUE : FALSE;
break;
case 'idn_version':
if (in_array($v, array('2003', '2008'))) {
$this->_idn_version = $v;
}
else {
$this->_error("Set Parameter: Unknown parameter $v for option $k");
}
break;
// Deprecated.
case 'encode_german_sz':
if (!$v) {
self::$NP['replacemaps'][0xDF] = array(0x73, 0x73);
}
else {
unset(self::$NP['replacemaps'][0xDF]);
}
break;
default:
$this->_error("Set Parameter: Unknown option $k");
return FALSE;
}
}
return TRUE;
}
/**
* Decode a given ACE domain name
* @param string Domain name (ACE string)
* [@param string Desired output encoding, see {@link set_parameter}]
* @return string Decoded Domain name (UTF-8 or UCS-4)
*/
public function decode($input, $one_time_encoding = FALSE) {
// Optionally set.
if ($one_time_encoding) {
switch ($one_time_encoding) {
case 'utf8':
case 'ucs4_string':
case 'ucs4_array':
break;
default:
$this->_error("Unknown encoding {$one_time_encoding}");
return FALSE;
}
}
// Make sure to drop any newline characters around.
$input = trim($input);
// Negotiate input and try to determine, whether it is a plain string,
// an email address or something like a complete URL
if (strpos($input, '@')) { // Maybe it is an email address
// No no in strict mode
if ($this->_strict_mode) {
$this->_error('Only simple domain name parts can be handled in strict mode');
return FALSE;
}
list ($email_pref, $input) = explode('@', $input, 2);
$arr = explode('.', $input);
foreach ($arr as $k => $v) {
if (preg_match('!^'.preg_quote($this->_punycode_prefix, '!').'!', $v)) {
$conv = $this->_decode($v);
if ($conv) $arr[$k] = $conv;
}
}
$input = join('.', $arr);
$arr = explode('.', $email_pref);
foreach ($arr as $k => $v) {
if (preg_match('!^'.preg_quote($this->_punycode_prefix, '!').'!', $v)) {
$conv = $this->_decode($v);
if ($conv) $arr[$k] = $conv;
}
}
$email_pref = join('.', $arr);
$return = $email_pref . '@' . $input;
} elseif (preg_match('![:\./]!', $input)) { // Or a complete domain name (with or without paths / parameters)
// No no in strict mode
if ($this->_strict_mode) {
$this->_error('Only simple domain name parts can be handled in strict mode');
return FALSE;
}
$parsed = parse_url($input);
if (isset($parsed['host'])) {
$arr = explode('.', $parsed['host']);
foreach ($arr as $k => $v) {
$conv = $this->_decode($v);
if ($conv) $arr[$k] = $conv;
}
$parsed['host'] = join('.', $arr);
$return =
(empty($parsed['scheme']) ? '' : $parsed['scheme'].(strtolower($parsed['scheme']) == 'mailto' ? ':' : '://'))
.(empty($parsed['user']) ? '' : $parsed['user'].(empty($parsed['pass']) ? '' : ':'.$parsed['pass']).'@')
.$parsed['host']
.(empty($parsed['port']) ? '' : ':'.$parsed['port'])
.(empty($parsed['path']) ? '' : $parsed['path'])
.(empty($parsed['query']) ? '' : '?'.$parsed['query'])
.(empty($parsed['fragment']) ? '' : '#'.$parsed['fragment']);
} else { // parse_url seems to have failed, try without it
$arr = explode('.', $input);
foreach ($arr as $k => $v) {
$conv = $this->_decode($v);
$arr[$k] = ($conv) ? $conv : $v;
}
$return = join('.', $arr);
}
} else { // Otherwise we consider it being a pure domain name string
$return = $this->_decode($input);
if (!$return) $return = $input;
}
// The output is UTF-8 by default, other output formats need conversion here
// If one time encoding is given, use this, else the objects property
switch (($one_time_encoding) ? $one_time_encoding : $this->_api_encoding) {
case 'utf8':
return $return;
break;
case 'ucs4_string':
return $this->_ucs4_to_ucs4_string($this->_utf8_to_ucs4($return));
break;
case 'ucs4_array':
return $this->_utf8_to_ucs4($return);
break;
default:
$this->_error('Unsupported output format');
return FALSE;
}
}
/**
* Encode a given UTF-8 domain name
* @param string Domain name (UTF-8 or UCS-4)
* [@param string Desired input encoding, see {@link set_parameter}]
* @return string Encoded Domain name (ACE string)
*/
public function encode($decoded, $one_time_encoding = FALSE)
{
// Forcing conversion of input to UCS4 array
// If one time encoding is given, use this, else the objects property
switch ($one_time_encoding ? $one_time_encoding : $this->_api_encoding) {
case 'utf8':
$decoded = $this->_utf8_to_ucs4($decoded);
break;
case 'ucs4_string':
$decoded = $this->_ucs4_string_to_ucs4($decoded);
case 'ucs4_array':
break;
default:
$this->_error('Unsupported input format: '.($one_time_encoding ? $one_time_encoding : $this->_api_encoding));
return FALSE;
}
// No input, no output, what else did you expect?
if (empty($decoded)) return '';
// Anchors for iteration
$last_begin = 0;
// Output string
$output = '';
foreach ($decoded as $k => $v) {
// Make sure to use just the plain dot
switch($v) {
case 0x3002:
case 0xFF0E:
case 0xFF61:
$decoded[$k] = 0x2E;
// Right, no break here, the above are converted to dots anyway
// Stumbling across an anchoring character
case 0x2E:
case 0x2F:
case 0x3A:
case 0x3F:
case 0x40:
// Neither email addresses nor URLs allowed in strict mode
if ($this->_strict_mode) {
$this->_error('Neither email addresses nor URLs are allowed in strict mode.');
return FALSE;
} else {
// Skip first char
if ($k) {
$encoded = '';
$encoded = $this->_encode(array_slice($decoded, $last_begin, (($k)-$last_begin)));
if ($encoded) {
$output .= $encoded;
} else {
$output .= $this->_ucs4_to_utf8(array_slice($decoded, $last_begin, (($k)-$last_begin)));
}
$output .= chr($decoded[$k]);
}
$last_begin = $k + 1;
}
}
}
// Catch the rest of the string
if ($last_begin) {
$inp_len = sizeof($decoded);
$encoded = '';
$encoded = $this->_encode(array_slice($decoded, $last_begin, (($inp_len)-$last_begin)));
if ($encoded) {
$output .= $encoded;
} else {
$output .= $this->_ucs4_to_utf8(array_slice($decoded, $last_begin, (($inp_len)-$last_begin)));
}
return $output;
} else {
if ($output = $this->_encode($decoded)) {
return $output;
} else {
return $this->_ucs4_to_utf8($decoded);
}
}
}
/**
* Removes a weakness of encode(), which cannot properly handle URIs but instead encodes their
* path or query components, too.
* @param string $uri Expects the URI as a UTF-8 (or ASCII) string
* @return string The URI encoded to Punycode, everything but the host component is left alone
* @since 0.6.4
*/
public function encode_uri($uri)
{
$parsed = parse_url($uri);
if (!isset($parsed['host'])) {
$this->_error('The given string does not look like a URI');
return FALSE;
}
$arr = explode('.', $parsed['host']);
foreach ($arr as $k => $v) {
$conv = $this->encode($v, 'utf8');
if ($conv) $arr[$k] = $conv;
}
$parsed['host'] = join('.', $arr);
$return =
(empty($parsed['scheme']) ? '' : $parsed['scheme'].(strtolower($parsed['scheme']) == 'mailto' ? ':' : '://'))
.(empty($parsed['user']) ? '' : $parsed['user'].(empty($parsed['pass']) ? '' : ':'.$parsed['pass']).'@')
.$parsed['host']
.(empty($parsed['port']) ? '' : ':'.$parsed['port'])
.(empty($parsed['path']) ? '' : $parsed['path'])
.(empty($parsed['query']) ? '' : '?'.$parsed['query'])
.(empty($parsed['fragment']) ? '' : '#'.$parsed['fragment']);
return $return;
}
/**
* Use this method to get the last error ocurred
* @param void
* @return string The last error, that occured
*/
public function get_last_error()
{
return $this->_error;
}
/**
* The actual decoding algorithm
* @param string
* @return mixed
*/
protected function _decode($encoded)
{
$decoded = array();
// find the Punycode prefix
if (!preg_match('!^'.preg_quote($this->_punycode_prefix, '!').'!', $encoded)) {
$this->_error('This is not a punycode string');
return FALSE;
}
$encode_test = preg_replace('!^'.preg_quote($this->_punycode_prefix, '!').'!', '', $encoded);
// If nothing left after removing the prefix, it is hopeless
if (!$encode_test) {
$this->_error('The given encoded string was empty');
return FALSE;
}
// Find last occurence of the delimiter
$delim_pos = strrpos($encoded, '-');
if ($delim_pos > self::byteLength($this->_punycode_prefix)) {
for ($k = self::byteLength($this->_punycode_prefix); $k < $delim_pos; ++$k) {
$decoded[] = ord($encoded{$k});
}
}
$deco_len = count($decoded);
$enco_len = self::byteLength($encoded);
// Wandering through the strings; init
$is_first = TRUE;
$bias = $this->_initial_bias;
$idx = 0;
$char = $this->_initial_n;
for ($enco_idx = ($delim_pos) ? ($delim_pos + 1) : 0; $enco_idx < $enco_len; ++$deco_len) {
for ($old_idx = $idx, $w = 1, $k = $this->_base; 1 ; $k += $this->_base) {
$digit = $this->_decode_digit($encoded{$enco_idx++});
$idx += $digit * $w;
$t = ($k <= $bias) ? $this->_tmin :
(($k >= $bias + $this->_tmax) ? $this->_tmax : ($k - $bias));
if ($digit < $t) break;
$w = (int) ($w * ($this->_base - $t));
}
$bias = $this->_adapt($idx - $old_idx, $deco_len + 1, $is_first);
$is_first = FALSE;
$char += (int) ($idx / ($deco_len + 1));
$idx %= ($deco_len + 1);
if ($deco_len > 0) {
// Make room for the decoded char
for ($i = $deco_len; $i > $idx; $i--) $decoded[$i] = $decoded[($i - 1)];
}
$decoded[$idx++] = $char;
}
return $this->_ucs4_to_utf8($decoded);
}
/**
* The actual encoding algorithm
* @param string
* @return mixed
*/
protected function _encode($decoded)
{
// We cannot encode a domain name containing the Punycode prefix
$extract = self::byteLength($this->_punycode_prefix);
$check_pref = $this->_utf8_to_ucs4($this->_punycode_prefix);
$check_deco = array_slice($decoded, 0, $extract);
if ($check_pref == $check_deco) {
$this->_error('This is already a punycode string');
return FALSE;
}
// We will not try to encode strings consisting of basic code points only
$encodable = FALSE;
foreach ($decoded as $k => $v) {
if ($v > 0x7a) {
$encodable = TRUE;
break;
}
}
if (!$encodable) {
$this->_error('The given string does not contain encodable chars');
return FALSE;
}
// Do NAMEPREP
$decoded = $this->_nameprep($decoded);
if (!$decoded || !is_array($decoded)) return FALSE; // NAMEPREP failed
$deco_len = count($decoded);
if (!$deco_len) return FALSE; // Empty array
$codecount = 0; // How many chars have been consumed
$encoded = '';
// Copy all basic code points to output
for ($i = 0; $i < $deco_len; ++$i) {
$test = $decoded[$i];
// Will match [-0-9a-zA-Z]
if ((0x2F < $test && $test < 0x40) || (0x40 < $test && $test < 0x5B)
|| (0x60 < $test && $test <= 0x7B) || (0x2D == $test)) {
$encoded .= chr($decoded[$i]);
$codecount++;
}
}
if ($codecount == $deco_len) return $encoded; // All codepoints were basic ones
// Start with the prefix; copy it to output
$encoded = $this->_punycode_prefix.$encoded;
// If we have basic code points in output, add an hyphen to the end
if ($codecount) $encoded .= '-';
// Now find and encode all non-basic code points
$is_first = TRUE;
$cur_code = $this->_initial_n;
$bias = $this->_initial_bias;
$delta = 0;
while ($codecount < $deco_len) {
// Find the smallest code point >= the current code point and
// remember the last ouccrence of it in the input
for ($i = 0, $next_code = $this->_max_ucs; $i < $deco_len; $i++) {
if ($decoded[$i] >= $cur_code && $decoded[$i] <= $next_code) {
$next_code = $decoded[$i];
}
}
$delta += ($next_code - $cur_code) * ($codecount + 1);
$cur_code = $next_code;
// Scan input again and encode all characters whose code point is $cur_code
for ($i = 0; $i < $deco_len; $i++) {
if ($decoded[$i] < $cur_code) {
$delta++;
} elseif ($decoded[$i] == $cur_code) {
for ($q = $delta, $k = $this->_base; 1; $k += $this->_base) {
$t = ($k <= $bias) ? $this->_tmin :
(($k >= $bias + $this->_tmax) ? $this->_tmax : $k - $bias);
if ($q < $t) break;
$encoded .= $this->_encode_digit(intval($t + (($q - $t) % ($this->_base - $t)))); //v0.4.5 Changed from ceil() to intval()
$q = (int) (($q - $t) / ($this->_base - $t));
}
$encoded .= $this->_encode_digit($q);
$bias = $this->_adapt($delta, $codecount+1, $is_first);
$codecount++;
$delta = 0;
$is_first = FALSE;
}
}
$delta++;
$cur_code++;
}
return $encoded;
}
/**
* Adapt the bias according to the current code point and position
* @param int $delta
* @param int $npoints
* @param int $is_first
* @return int
*/
protected function _adapt($delta, $npoints, $is_first)
{
$delta = intval($is_first ? ($delta / $this->_damp) : ($delta / 2));
$delta += intval($delta / $npoints);
for ($k = 0; $delta > (($this->_base - $this->_tmin) * $this->_tmax) / 2; $k += $this->_base) {
$delta = intval($delta / ($this->_base - $this->_tmin));
}
return intval($k + ($this->_base - $this->_tmin + 1) * $delta / ($delta + $this->_skew));
}
/**
* Encoding a certain digit
* @param int $d
* @return string
*/
protected function _encode_digit($d)
{
return chr($d + 22 + 75 * ($d < 26));
}
/**
* Decode a certain digit
* @param int $cp
* @return int
*/
protected function _decode_digit($cp)
{
$cp = ord($cp);
return ($cp - 48 < 10) ? $cp - 22 : (($cp - 65 < 26) ? $cp - 65 : (($cp - 97 < 26) ? $cp - 97 : $this->_base));
}
/**
* Internal error handling method
* @param string $error
*/
protected function _error($error = '')
{
$this->_error = $error;
}
/**
* Do Nameprep according to RFC3491 and RFC3454
* @param array Unicode Characters
* @return string Unicode Characters, Nameprep'd
*/
protected function _nameprep($input)
{
$output = array();
$error = FALSE;
//
// Mapping
// Walking through the input array, performing the required steps on each of
// the input chars and putting the result into the output array
// While mapping required chars we apply the cannonical ordering