unicode.inc 4.34 KB
Newer Older
1 2
<?php

3 4 5 6 7
/**
 * @file
 * Provides Unicode-related conversions and operations.
 */

8 9
use Drupal\Component\Utility\Unicode;
use Drupal\Component\Utility\String;
10

11
/**
12
 * Returns Unicode library status and errors.
13
 */
14 15
function unicode_requirements() {
  $libraries = array(
16 17 18
    Unicode::STATUS_SINGLEBYTE => t('Standard PHP'),
    Unicode::STATUS_MULTIBYTE => t('PHP Mbstring Extension'),
    Unicode::STATUS_ERROR => t('Error'),
19
  );
20
  $severities = array(
21 22 23
    Unicode::STATUS_SINGLEBYTE => REQUIREMENT_WARNING,
    Unicode::STATUS_MULTIBYTE => NULL,
    Unicode::STATUS_ERROR => REQUIREMENT_ERROR,
24
  );
25 26
  $failed_check = Unicode::check();
  $library = Unicode::getStatus();
27

28
  $requirements['unicode'] = array(
29
    'title' => t('Unicode library'),
30
    'value' => $libraries[$library],
31
    'severity' => $severities[$library],
32
  );
33 34 35
  $t_args = array('@url' => 'http://www.php.net/mbstring');
  switch ($failed_check) {
    case 'mb_strlen':
36
      $requirements['unicode']['description'] = t('Operations on Unicode strings are emulated on a best-effort basis. Install the <a href="@url">PHP mbstring extension</a> for improved Unicode support.', $t_args);
37
      break;
38

39
    case 'mbstring.func_overload':
40
      $requirements['unicode']['description'] = t('Multibyte string function overloading in PHP is active and must be disabled. Check the php.ini <em>mbstring.func_overload</em> setting. Please refer to the <a href="@url">PHP mbstring documentation</a> for more information.', $t_args);
41 42 43
      break;

    case 'mbstring.encoding_translation':
44
      $requirements['unicode']['description'] = t('Multibyte string input conversion in PHP is active and must be disabled. Check the php.ini <em>mbstring.encoding_translation</em> setting. Please refer to the <a href="@url">PHP mbstring documentation</a> for more information.', $t_args);
45 46 47
      break;

    case 'mbstring.http_input':
48
      $requirements['unicode']['description'] = t('Multibyte string input conversion in PHP is active and must be disabled. Check the php.ini <em>mbstring.http_input</em> setting. Please refer to the <a href="@url">PHP mbstring documentation</a> for more information.', $t_args);
49 50 51
      break;

    case 'mbstring.http_output':
52
      $requirements['unicode']['description'] = t('Multibyte string output conversion in PHP is active and must be disabled. Check the php.ini <em>mbstring.http_output</em> setting. Please refer to the <a href="@url">PHP mbstring documentation</a> for more information.', $t_args);
53 54
      break;
  }
55

56 57
  return $requirements;
}
58

59
/**
60
 * Prepares a new XML parser.
61
 *
62 63 64 65 66 67
 * This is a wrapper around xml_parser_create() which extracts the encoding
 * from the XML data first and sets the output encoding to UTF-8. This function
 * should be used instead of xml_parser_create(), because PHP 4's XML parser
 * doesn't check the input encoding itself. "Starting from PHP 5, the input
 * encoding is automatically detected, so that the encoding parameter specifies
 * only the output encoding."
68
 *
69 70
 * This is also where unsupported encodings will be converted. Callers should
 * take this into account: $data might have been changed after the call.
71
 *
72
 * @param $data
73
 *   The XML data which will be parsed later.
74
 *
75
 * @return
76
 *   An XML parser object or FALSE on error.
77 78
 *
 * @ingroup php_wrappers
79 80 81 82
 */
function drupal_xml_parser_create(&$data) {
  // Default XML encoding is UTF-8
  $encoding = 'utf-8';
83
  $bom = FALSE;
84 85 86

  // Check for UTF-8 byte order mark (PHP5's XML parser doesn't handle it).
  if (!strncmp($data, "\xEF\xBB\xBF", 3)) {
87
    $bom = TRUE;
88 89 90 91
    $data = substr($data, 3);
  }

  // Check for an encoding declaration in the XML prolog if no BOM was found.
92
  if (!$bom && preg_match('/^<\?xml[^>]+encoding="(.+?)"/', $data, $match)) {
93 94 95 96 97 98
    $encoding = $match[1];
  }

  // Unsupported encodings are converted here into UTF-8.
  $php_supported = array('utf-8', 'iso-8859-1', 'us-ascii');
  if (!in_array(strtolower($encoding), $php_supported)) {
99
    $out = Unicode::convertToUtf8($data, $encoding);
100
    if ($out !== FALSE) {
101
      $encoding = 'utf-8';
102
      $data = preg_replace('/^(<\?xml[^>]+encoding)="(.+?)"/', '\\1="utf-8"', $out);
103 104
    }
    else {
105
      \Drupal::logger('php')->warning('Could not convert XML encoding %s to UTF-8.', array('%s' => $encoding));
106
      return FALSE;
107 108 109 110 111 112 113
    }
  }

  $xml_parser = xml_parser_create($encoding);
  xml_parser_set_option($xml_parser, XML_OPTION_TARGET_ENCODING, 'utf-8');
  return $xml_parser;
}