Commit 154f2828 authored by catch's avatar catch

Issue #2849669 by alexpott, pwolanin, Munavijayalakshmi, dawehner: Fix...

Issue #2849669 by alexpott, pwolanin, Munavijayalakshmi, dawehner: Fix \Drupal\Component\Utility\Unicode() because of the Symfony mbstring polyfill
parent 2a7952f2
......@@ -46,7 +46,7 @@
*
* Correct:
* @code
* $my_substring = Unicode::substr($original_string, 0, 5);
* $my_substring = mb_substr($original_string, 0, 5);
* @endcode
*
* @}
......
......@@ -6,7 +6,6 @@
*/
use Drupal\Component\FileSystem\FileSystem as ComponentFileSystem;
use Drupal\Component\Utility\Unicode;
use Drupal\Component\Utility\UrlHelper;
use Drupal\Component\PhpStorage\FileStorage;
use Drupal\Component\Utility\Bytes;
......@@ -202,7 +201,7 @@ function file_create_url($uri) {
// HTTP and to https://example.com/bar.jpg when viewing a HTTPS page)
// Both types of relative URIs are characterized by a leading slash, hence
// we can use a single check.
if (Unicode::substr($uri, 0, 1) == '/') {
if (mb_substr($uri, 0, 1) == '/') {
return $uri;
}
else {
......
......@@ -12,7 +12,6 @@
use Drupal\Component\Utility\Crypt;
use Drupal\Component\Utility\Html;
use Drupal\Component\Render\MarkupInterface;
use Drupal\Component\Utility\Unicode;
use Drupal\Core\Cache\CacheableDependencyInterface;
use Drupal\Core\Config\Config;
use Drupal\Core\Config\StorageException;
......@@ -485,7 +484,7 @@ function theme_settings_convert_to_config(array $theme_settings, Config $config)
$config->set('favicon.mimetype', $value);
}
elseif (substr($key, 0, 7) == 'toggle_') {
$config->set('features.' . Unicode::substr($key, 7), $value);
$config->set('features.' . mb_substr($key, 7), $value);
}
elseif (!in_array($key, ['theme', 'logo_upload'])) {
$config->set($key, $value);
......
......@@ -5,7 +5,6 @@
* Theming for maintenance pages.
*/
use Drupal\Component\Utility\Unicode;
use Drupal\Core\Site\Settings;
/**
......@@ -29,7 +28,6 @@ function _drupal_maintenance_theme() {
require_once __DIR__ . '/file.inc';
require_once __DIR__ . '/module.inc';
require_once __DIR__ . '/database.inc';
Unicode::check();
// Install and update pages are treated differently to prevent theming overrides.
if (defined('MAINTENANCE_MODE') && (MAINTENANCE_MODE == 'install' || MAINTENANCE_MODE == 'update')) {
......
......@@ -2,8 +2,6 @@
namespace Drupal\Component\Diff\Engine;
use Drupal\Component\Utility\Unicode;
/**
* Class used internally by Diff to actually compute the diffs.
*
......@@ -134,7 +132,7 @@ public function diff($from_lines, $to_lines) {
* Returns the whole line if it's small enough, or the MD5 hash otherwise.
*/
protected function _line_hash($line) {
if (Unicode::strlen($line) > $this::MAX_XREF_LENGTH) {
if (mb_strlen($line) > $this::MAX_XREF_LENGTH) {
return md5($line);
}
else {
......
......@@ -2,8 +2,6 @@
namespace Drupal\Component\Diff\Engine;
use Drupal\Component\Utility\Unicode;
/**
* Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
*/
......@@ -64,7 +62,7 @@ public function addWords($words, $tag = '') {
}
if ($word[0] == "\n") {
$this->_flushLine($tag);
$word = Unicode::substr($word, 1);
$word = mb_substr($word, 1);
}
assert(!strstr($word, "\n"));
$this->group .= $word;
......
......@@ -3,7 +3,6 @@
namespace Drupal\Component\Diff;
use Drupal\Component\Diff\Engine\HWLDFWordAccumulator;
use Drupal\Component\Utility\Unicode;
/**
* @todo document
......@@ -35,7 +34,7 @@ protected function _split($lines) {
$words[] = "\n";
$stripped[] = "\n";
}
if (Unicode::strlen($line) > $this::MAX_LINE_LENGTH) {
if (mb_strlen($line) > $this::MAX_LINE_LENGTH) {
$words[] = $line;
$stripped[] = $line;
}
......
......@@ -6,7 +6,7 @@
"license": "GPL-2.0-or-later",
"require": {
"php": ">=5.5.9",
"drupal/core-utility": "^8.2"
"symfony/polyfill-mbstring": "~1.0"
},
"autoload": {
"psr-4": {
......
......@@ -3,7 +3,6 @@
namespace Drupal\Component\Render;
use Drupal\Component\Utility\Html;
use Drupal\Component\Utility\Unicode;
use Drupal\Component\Utility\UrlHelper;
/**
......@@ -107,7 +106,7 @@ public function __toString() {
* The length of the string.
*/
public function count() {
return Unicode::strlen($this->string);
return mb_strlen($this->string);
}
/**
......
......@@ -3,7 +3,6 @@
namespace Drupal\Component\Render;
use Drupal\Component\Utility\Html;
use Drupal\Component\Utility\Unicode;
/**
* Escapes HTML syntax characters to HTML entities for display in markup.
......@@ -43,7 +42,7 @@ public function __toString() {
* {@inheritdoc}
*/
public function count() {
return Unicode::strlen($this->string);
return mb_strlen($this->string);
}
/**
......
......@@ -2,8 +2,6 @@
namespace Drupal\Component\Render;
use Drupal\Component\Utility\Unicode;
/**
* Implements MarkupInterface and Countable for rendered objects.
*
......@@ -61,7 +59,7 @@ public function __toString() {
* The length of the string.
*/
public function count() {
return Unicode::strlen($this->string);
return mb_strlen($this->string);
}
/**
......
......@@ -23,7 +23,7 @@ public static function validateHex($hex) {
// Hash prefix is optional.
$hex = ltrim($hex, '#');
// Must be either RGB or RRGGBB.
$length = Unicode::strlen($hex);
$length = mb_strlen($hex);
$valid = $valid && ($length === 3 || $length === 6);
// Must be a valid hex value.
$valid = $valid && ctype_xdigit($hex);
......
......@@ -71,7 +71,7 @@ class Html {
public static function getClass($class) {
$class = (string) $class;
if (!isset(static::$classes[$class])) {
static::$classes[$class] = static::cleanCssIdentifier(Unicode::strtolower($class));
static::$classes[$class] = static::cleanCssIdentifier(mb_strtolower($class));
}
return static::$classes[$class];
}
......@@ -215,7 +215,7 @@ public static function getUniqueId($id) {
* @see self::getUniqueId()
*/
public static function getId($id) {
$id = str_replace([' ', '_', '[', ']'], ['-', '-', '-', ''], Unicode::strtolower($id));
$id = str_replace([' ', '_', '[', ']'], ['-', '-', '-', ''], mb_strtolower($id));
// As defined in http://www.w3.org/TR/html4/types.html#type-name, HTML IDs can
// only contain letters, digits ([0-9]), hyphens ("-"), underscores ("_"),
......
......@@ -87,13 +87,6 @@ class Unicode {
*/
const STATUS_ERROR = -1;
/**
* Holds the multibyte capabilities of the current environment.
*
* @var int
*/
protected static $status = 0;
/**
* Gets the current status of unicode/multibyte support on this environment.
*
......@@ -107,7 +100,13 @@ class Unicode {
* An error occurred. No unicode support.
*/
public static function getStatus() {
return static::$status;
switch (static::check()) {
case 'mb_strlen':
return Unicode::STATUS_SINGLEBYTE;
case '':
return Unicode::STATUS_MULTIBYTE;
}
return Unicode::STATUS_ERROR;
}
/**
......@@ -123,12 +122,16 @@ public static function getStatus() {
*
* @param int $status
* The new status of multibyte support.
*
* @deprecated in Drupal 8.6.0 and will be removed before Drupal 9.0.0. In
* Drupal 9 there will be no way to set the status and in Drupal 8 this
* ability has been removed because mb_*() functions are supplied using
* Symfony's polyfill.
*
* @see https://www.drupal.org/node/2850048
*/
public static function setStatus($status) {
if (!in_array($status, [static::STATUS_SINGLEBYTE, static::STATUS_MULTIBYTE, static::STATUS_ERROR])) {
throw new \InvalidArgumentException('Invalid status value for unicode support.');
}
static::$status = $status;
@trigger_error('\Drupal\Component\Utility\Unicode::setStatus() is deprecated in Drupal 8.6.0 and will be removed before Drupal 9.0.0. In Drupal 9 there will be no way to set the status and in Drupal 8 this ability has been removed because mb_*() functions are supplied using Symfony\'s polyfill. See https://www.drupal.org/node/2850048.', E_USER_DEPRECATED);
}
/**
......@@ -143,38 +146,33 @@ public static function setStatus($status) {
* Otherwise, an empty string.
*/
public static function check() {
// Set appropriate configuration.
mb_internal_encoding('utf-8');
mb_language('uni');
// Check for mbstring extension.
if (!function_exists('mb_strlen')) {
static::$status = static::STATUS_SINGLEBYTE;
if (!extension_loaded('mbstring')) {
return 'mb_strlen';
}
// Check mbstring configuration.
if (ini_get('mbstring.func_overload') != 0) {
static::$status = static::STATUS_ERROR;
return 'mbstring.func_overload';
}
if (ini_get('mbstring.encoding_translation') != 0) {
static::$status = static::STATUS_ERROR;
return 'mbstring.encoding_translation';
}
// mbstring.http_input and mbstring.http_output are deprecated and empty by
// default in PHP 5.6.
if (version_compare(PHP_VERSION, '5.6.0') == -1) {
if (ini_get('mbstring.http_input') != 'pass') {
static::$status = static::STATUS_ERROR;
return 'mbstring.http_input';
}
if (ini_get('mbstring.http_output') != 'pass') {
static::$status = static::STATUS_ERROR;
return 'mbstring.http_output';
}
}
// Set appropriate configuration.
mb_internal_encoding('utf-8');
mb_language('uni');
static::$status = static::STATUS_MULTIBYTE;
return '';
}
......@@ -224,17 +222,7 @@ public static function encodingFromBOM($data) {
* Converted data or FALSE.
*/
public static function convertToUtf8($data, $encoding) {
if (function_exists('iconv')) {
return @iconv($encoding, 'utf-8', $data);
}
elseif (function_exists('mb_convert_encoding')) {
return @mb_convert_encoding($data, 'utf-8', $encoding);
}
elseif (function_exists('recode_string')) {
return @recode_string($encoding . '..utf-8', $data);
}
// Cannot convert.
return FALSE;
return @iconv($encoding, 'utf-8', $data);
}
/**
......@@ -281,15 +269,15 @@ public static function truncateBytes($string, $len) {
*
* @return int
* The length of the string.
*
* @deprecated in Drupal 8.6.0, will be removed before Drupal 9.0.0. Use
* mb_strlen() instead.
*
* @see https://www.drupal.org/node/2850048
*/
public static function strlen($text) {
if (static::getStatus() == static::STATUS_MULTIBYTE) {
return mb_strlen($text);
}
else {
// Do not count UTF-8 continuation bytes.
return strlen(preg_replace("/[\x80-\xBF]/", '', $text));
}
@trigger_error('\Drupal\Component\Utility\Unicode::strlen() is deprecated in Drupal 8.6.0 and will be removed before Drupal 9.0.0. Use mb_strlen() instead. See https://www.drupal.org/node/2850048.', E_USER_DEPRECATED);
return mb_strlen($text);
}
/**
......@@ -300,18 +288,15 @@ public static function strlen($text) {
*
* @return string
* The string in uppercase.
*
* @deprecated in Drupal 8.6.0, will be removed before Drupal 9.0.0. Use
* mb_strtoupper() instead.
*
* @see https://www.drupal.org/node/2850048
*/
public static function strtoupper($text) {
if (static::getStatus() == static::STATUS_MULTIBYTE) {
return mb_strtoupper($text);
}
else {
// Use C-locale for ASCII-only uppercase.
$text = strtoupper($text);
// Case flip Latin-1 accented letters.
$text = preg_replace_callback('/\xC3[\xA0-\xB6\xB8-\xBE]/', '\Drupal\Component\Utility\Unicode::caseFlip', $text);
return $text;
}
@trigger_error('\Drupal\Component\Utility\Unicode::strtoupper() is deprecated in Drupal 8.6.0 and will be removed before Drupal 9.0.0. Use mb_strtoupper() instead. See https://www.drupal.org/node/2850048.', E_USER_DEPRECATED);
return mb_strtoupper($text);
}
/**
......@@ -322,18 +307,15 @@ public static function strtoupper($text) {
*
* @return string
* The string in lowercase.
*
* @deprecated in Drupal 8.6.0, will be removed before Drupal 9.0.0. Use
* mb_strtolower() instead.
*
* @see https://www.drupal.org/node/2850048
*/
public static function strtolower($text) {
if (static::getStatus() == static::STATUS_MULTIBYTE) {
return mb_strtolower($text);
}
else {
// Use C-locale for ASCII-only lowercase.
$text = strtolower($text);
// Case flip Latin-1 accented letters.
$text = preg_replace_callback('/\xC3[\x80-\x96\x98-\x9E]/', '\Drupal\Component\Utility\Unicode::caseFlip', $text);
return $text;
}
@trigger_error('\Drupal\Component\Utility\Unicode::strtolower() is deprecated in Drupal 8.6.0 and will be removed before Drupal 9.0.0. Use mb_strtolower() instead. See https://www.drupal.org/node/2850048.', E_USER_DEPRECATED);
return mb_strtolower($text);
}
/**
......@@ -346,7 +328,7 @@ public static function strtolower($text) {
* The string with the first character as uppercase.
*/
public static function ucfirst($text) {
return static::strtoupper(static::substr($text, 0, 1)) . static::substr($text, 1);
return mb_strtoupper(mb_substr($text, 0, 1)) . mb_substr($text, 1);
}
/**
......@@ -362,7 +344,7 @@ public static function ucfirst($text) {
*/
public static function lcfirst($text) {
// Note: no mbstring equivalent!
return static::strtolower(static::substr($text, 0, 1)) . static::substr($text, 1);
return mb_strtolower(mb_substr($text, 0, 1)) . mb_substr($text, 1);
}
/**
......@@ -379,7 +361,7 @@ public static function lcfirst($text) {
public static function ucwords($text) {
$regex = '/(^|[' . static::PREG_CLASS_WORD_BOUNDARY . '])([^' . static::PREG_CLASS_WORD_BOUNDARY . '])/u';
return preg_replace_callback($regex, function (array $matches) {
return $matches[1] . Unicode::strtoupper($matches[2]);
return $matches[1] . mb_strtoupper($matches[2]);
}, $text);
}
......@@ -399,92 +381,15 @@ public static function ucwords($text) {
*
* @return string
* The shortened string.
*
* @deprecated in Drupal 8.6.0, will be removed before Drupal 9.0.0. Use
* mb_substr() instead.
*
* @see https://www.drupal.org/node/2850048
*/
public static function substr($text, $start, $length = NULL) {
if (static::getStatus() == static::STATUS_MULTIBYTE) {
return $length === NULL ? mb_substr($text, $start) : mb_substr($text, $start, $length);
}
else {
$strlen = strlen($text);
// Find the starting byte offset.
$bytes = 0;
if ($start > 0) {
// Count all the characters except continuation bytes from the start
// until we have found $start characters or the end of the string.
$bytes = -1; $chars = -1;
while ($bytes < $strlen - 1 && $chars < $start) {
$bytes++;
$c = ord($text[$bytes]);
if ($c < 0x80 || $c >= 0xC0) {
$chars++;
}
}
}
elseif ($start < 0) {
// Count all the characters except continuation bytes from the end
// until we have found abs($start) characters.
$start = abs($start);
$bytes = $strlen; $chars = 0;
while ($bytes > 0 && $chars < $start) {
$bytes--;
$c = ord($text[$bytes]);
if ($c < 0x80 || $c >= 0xC0) {
$chars++;
}
}
}
$istart = $bytes;
// Find the ending byte offset.
if ($length === NULL) {
$iend = $strlen;
}
elseif ($length > 0) {
// Count all the characters except continuation bytes from the starting
// index until we have found $length characters or reached the end of
// the string, then backtrace one byte.
$iend = $istart - 1;
$chars = -1;
$last_real = FALSE;
while ($iend < $strlen - 1 && $chars < $length) {
$iend++;
$c = ord($text[$iend]);
$last_real = FALSE;
if ($c < 0x80 || $c >= 0xC0) {
$chars++;
$last_real = TRUE;
}
}
// Backtrace one byte if the last character we found was a real
// character and we don't need it.
if ($last_real && $chars >= $length) {
$iend--;
}
}
elseif ($length < 0) {
// Count all the characters except continuation bytes from the end
// until we have found abs($start) characters, then backtrace one byte.
$length = abs($length);
$iend = $strlen; $chars = 0;
while ($iend > 0 && $chars < $length) {
$iend--;
$c = ord($text[$iend]);
if ($c < 0x80 || $c >= 0xC0) {
$chars++;
}
}
// Backtrace one byte if we are not at the beginning of the string.
if ($iend > 0) {
$iend--;
}
}
else {
// $length == 0, return an empty string.
return '';
}
return substr($text, $istart, max(0, $iend - $istart + 1));
}
@trigger_error('\Drupal\Component\Utility\Unicode::substr() is deprecated in Drupal 8.6.0 and will be removed before Drupal 9.0.0. Use mb_substr() instead. See https://www.drupal.org/node/2850048.', E_USER_DEPRECATED);
return mb_substr($text, $start, $length);
}
/**
......@@ -526,15 +431,15 @@ public static function truncate($string, $max_length, $wordsafe = FALSE, $add_el
$max_length = max($max_length, 0);
$min_wordsafe_length = max($min_wordsafe_length, 0);
if (static::strlen($string) <= $max_length) {
if (mb_strlen($string) <= $max_length) {
// No truncation needed, so don't add ellipsis, just return.
return $string;
}
if ($add_ellipsis) {
// Truncate ellipsis in case $max_length is small.
$ellipsis = static::substr('…', 0, $max_length);
$max_length -= static::strlen($ellipsis);
$ellipsis = mb_substr('…', 0, $max_length);
$max_length -= mb_strlen($ellipsis);
$max_length = max($max_length, 0);
}
......@@ -553,11 +458,11 @@ public static function truncate($string, $max_length, $wordsafe = FALSE, $add_el
$string = $matches[1];
}
else {
$string = static::substr($string, 0, $max_length);
$string = mb_substr($string, 0, $max_length);
}
}
else {
$string = static::substr($string, 0, $max_length);
$string = mb_substr($string, 0, $max_length);
}
if ($add_ellipsis) {
......@@ -583,7 +488,7 @@ public static function truncate($string, $max_length, $wordsafe = FALSE, $add_el
* $str2, and 0 if they are equal.
*/
public static function strcasecmp($str1, $str2) {
return strcmp(static::strtoupper($str1), static::strtoupper($str2));
return strcmp(mb_strtoupper($str1), mb_strtoupper($str2));
}
/**
......@@ -715,18 +620,15 @@ public static function validateUtf8($text) {
* The position where $needle occurs in $haystack, always relative to the
* beginning (independent of $offset), or FALSE if not found. Note that
* a return value of 0 is not the same as FALSE.
*
* @deprecated in Drupal 8.6.0, will be removed before Drupal 9.0.0. Use
* mb_strpos() instead.
*
* @see https://www.drupal.org/node/2850048
*/
public static function strpos($haystack, $needle, $offset = 0) {
if (static::getStatus() == static::STATUS_MULTIBYTE) {
return mb_strpos($haystack, $needle, $offset);
}
else {
// Remove Unicode continuation characters, to be compatible with
// Unicode::strlen() and Unicode::substr().
$haystack = preg_replace("/[\x80-\xBF]/", '', $haystack);
$needle = preg_replace("/[\x80-\xBF]/", '', $needle);
return strpos($haystack, $needle, $offset);
}
@trigger_error('\Drupal\Component\Utility\Unicode::strpos() is deprecated in Drupal 8.6.0 and will be removed before Drupal 9.0.0. Use mb_strpos() instead. See https://www.drupal.org/node/2850048.', E_USER_DEPRECATED);
return mb_strpos($haystack, $needle, $offset);
}
}
......@@ -7,7 +7,9 @@
"require": {
"php": ">=5.5.9",
"paragonie/random_compat": "^1.0|^2.0",
"drupal/core-render": "^8.2"
"drupal/core-render": "^8.2",
"symfony/polyfill-iconv": "~1.0",
"symfony/polyfill-mbstring": "~1.0"
},
"autoload": {
"psr-4": {
......
......@@ -119,7 +119,7 @@ public function loadFile($file, $optimize = NULL, $reset_basepath = TRUE) {
// If a BOM is found, convert the file to UTF-8, then use substr() to
// remove the BOM from the result.
if ($encoding = (Unicode::encodingFromBOM($contents))) {
$contents = Unicode::substr(Unicode::convertToUtf8($contents, $encoding), 1);
$contents = mb_substr(Unicode::convertToUtf8($contents, $encoding), 1);
}
// If no BOM, check for fallback encoding. Per CSS spec the regex is very strict.
elseif (preg_match('/^@charset "([^"]+)";/', $contents, $matches)) {
......
......@@ -24,7 +24,7 @@ public function optimize(array $js_asset) {
// remove the BOM from the result.
$data = file_get_contents($js_asset['data']);
if ($encoding = (Unicode::encodingFromBOM($data))) {
$data = Unicode::substr(Unicode::convertToUtf8($data, $encoding), 1);
$data = mb_substr(Unicode::convertToUtf8($data, $encoding), 1);
}
// If no BOM is found, check for the charset attribute.
elseif (isset($js_asset['attributes']['charset'])) {
......
......@@ -7,7 +7,6 @@
use Drupal\Core\Messenger\MessengerTrait;
use Drupal\Core\Plugin\ContextAwarePluginAssignmentTrait;
use Drupal\Core\Plugin\ContextAwarePluginBase;
use Drupal\Component\Utility\Unicode;
use Drupal\Component\Utility\NestedArray;
use Drupal\Core\Language\LanguageInterface;
use Drupal\Core\Plugin\PluginWithFormsInterface;
......@@ -246,7 +245,7 @@ public function getMachineNameSuggestion() {
// \Drupal\system\MachineNameController::transliterate(), so it might make
// sense to provide a common service for the two.
$transliterated = $this->transliteration()->transliterate($admin_label, LanguageInterface::LANGCODE_DEFAULT, '_');
$transliterated = Unicode::strtolower($transliterated);
$transliterated = mb_strtolower($transliterated);
$transliterated = preg_replace('@[^a-z0-9_.]+@', '', $transliterated);
......
......@@ -3,7 +3,6 @@
namespace Drupal\Core\Config;
use Drupal\Component\Utility\Crypt;
use Drupal\Component\Utility\Unicode;
use Drupal\Core\Config\Entity\ConfigDependencyManager;
use Drupal\Core\Config\Entity\ConfigEntityDependency;
use Symfony\Component\EventDispatcher\EventDispatcherInterface;
......@@ -344,7 +343,7 @@ public function installCollectionDefaultConfig($collection) {
// Only install configuration for enabled extensions.
$enabled_extensions = $this->getEnabledExtensions();
$config_to_install = array_filter($storage->listAll(), function ($config_name) use ($enabled_extensions) {
$provider = Unicode::substr($config_name