Commit 686c269b authored by alexpott's avatar alexpott

Issue #1938670 by ParisLiakos, Xano, RobLoach, pp, alexpott: Convert...

Issue #1938670 by ParisLiakos, Xano, RobLoach, pp, alexpott: Convert unicode.inc to \Drupal\Component\Utility\Unicode.
parent 608fa1d5
......@@ -5,6 +5,7 @@
use Drupal\Component\Utility\Settings;
use Drupal\Component\Utility\String;
use Drupal\Component\Utility\Timer;
use Drupal\Component\Utility\Unicode;
use Drupal\Core\DrupalKernel;
use Drupal\Core\Database\Database;
use Drupal\Core\DependencyInjection\ContainerBuilder;
......@@ -270,22 +271,6 @@
*/
const LANGUAGE_RTL = 1;
/**
* Indicates an error during check for PHP unicode support.
*/
const UNICODE_ERROR = -1;
/**
* Indicates that standard PHP (emulated) unicode support is being used.
*/
const UNICODE_SINGLEBYTE = 0;
/**
* Indicates that full unicode support with the PHP mbstring extension is being
* used.
*/
const UNICODE_MULTIBYTE = 1;
/**
* Time of the current request in seconds elapsed since the Unix Epoch.
*
......@@ -667,9 +652,6 @@ function drupal_environment_initialize() {
// Set sane locale settings, to ensure consistent string, dates, times and
// numbers handling.
setlocale(LC_ALL, 'C');
// Detect string handling method.
unicode_check();
}
/**
......@@ -682,51 +664,6 @@ function drupal_valid_http_host($host) {
return preg_match('/^\[?(?:[a-zA-Z0-9-:\]_]+\.?)+$/', $host);
}
/**
* Checks for Unicode support in PHP and sets the proper settings if possible.
*
* Because Drupal needs to be able to handle text in various encodings, we do
* not support mbstring function overloading. HTTP input/output conversion must
* be disabled for similar reasons.
*
* @return string
* A string identifier of a failed multibyte extension check, if any.
* Otherwise, an empty string.
*/
function unicode_check() {
global $multibyte;
// Check for mbstring extension.
if (!function_exists('mb_strlen')) {
$multibyte = UNICODE_SINGLEBYTE;
return 'mb_strlen';
}
// Check mbstring configuration.
if (ini_get('mbstring.func_overload') != 0) {
$multibyte = UNICODE_ERROR;
return 'mbstring.func_overload';
}
if (ini_get('mbstring.encoding_translation') != 0) {
$multibyte = UNICODE_ERROR;
return 'mbstring.encoding_translation';
}
if (ini_get('mbstring.http_input') != 'pass') {
$multibyte = UNICODE_ERROR;
return 'mbstring.http_input';
}
if (ini_get('mbstring.http_output') != 'pass') {
$multibyte = UNICODE_ERROR;
return 'mbstring.http_output';
}
// Set appropriate configuration.
mb_internal_encoding('utf-8');
mb_language('uni');
$multibyte = UNICODE_MULTIBYTE;
return '';
}
/**
* Sets the base URL, cookie domain, and session name from configuration.
*/
......@@ -2165,6 +2102,9 @@ function _drupal_bootstrap_configuration() {
// Start a page timer:
Timer::start('page');
// Detect string handling method.
Unicode::check();
// Load the procedural configuration system helper functions.
require_once DRUPAL_ROOT . '/core/includes/config.inc';
......
......@@ -5,6 +5,8 @@
* Theming for maintenance pages.
*/
use Drupal\Component\Utility\Unicode;
/**
* Sets up the theming system for maintenance page.
*
......@@ -28,7 +30,7 @@ function _drupal_maintenance_theme() {
require_once DRUPAL_ROOT . '/core/includes/unicode.inc';
require_once DRUPAL_ROOT . '/core/includes/file.inc';
require_once DRUPAL_ROOT . '/core/includes/module.inc';
unicode_check();
Unicode::check();
// Install and update pages are treated differently to prevent theming overrides.
if (defined('MAINTENANCE_MODE') && (MAINTENANCE_MODE == 'install' || MAINTENANCE_MODE == 'update')) {
......
This diff is collapsed.
......@@ -31,6 +31,23 @@ public static function checkPlain($text) {
return htmlspecialchars($text, ENT_QUOTES, 'UTF-8');
}
/**
* Decodes all HTML entities including numerical ones to regular UTF-8 bytes.
*
* Double-escaped entities will only be decoded once ("<" becomes
* "&lt;", not "<"). Be careful when using this function, as it will revert
* previous sanitization efforts (&lt;script&gt; will become <script>).
*
* @param string $text
* The text to decode entities in.
*
* @return string
* The input $text, with all HTML entities decoded once.
*/
public static function decodeEntities($text) {
return html_entity_decode($text, ENT_QUOTES, 'UTF-8');
}
/**
* Formats a string for HTML display by replacing variable placeholders.
*
......
This diff is collapsed.
......@@ -6,6 +6,7 @@
*/
use Drupal\Core\Entity\EntityInterface;
use Drupal\Component\Utility\Unicode;
/**
* Matches all 'N' Unicode character classes (numbers)
......@@ -447,7 +448,7 @@ function search_simplify($text, $langcode = NULL) {
// With the exception of the rules above, we consider all punctuation,
// marks, spacers, etc, to be a word boundary.
$text = preg_replace('/[' . PREG_CLASS_UNICODE_WORD_BOUNDARY . ']+/u', ' ', $text);
$text = preg_replace('/[' . Unicode::PREG_CLASS_WORD_BOUNDARY . ']+/u', ' ', $text);
// Truncate everything to 50 characters.
$words = explode(' ', $text);
......@@ -1102,7 +1103,7 @@ function search_data($keys, $module, $conditions = NULL) {
*/
function search_excerpt($keys, $text, $langcode = NULL) {
// We highlight around non-indexable or CJK characters.
$boundary = '(?:(?<=[' . PREG_CLASS_UNICODE_WORD_BOUNDARY . PREG_CLASS_CJK . '])|(?=[' . PREG_CLASS_UNICODE_WORD_BOUNDARY . PREG_CLASS_CJK . ']))';
$boundary = '(?:(?<=[' . Unicode::PREG_CLASS_WORD_BOUNDARY . PREG_CLASS_CJK . '])|(?=[' . Unicode::PREG_CLASS_WORD_BOUNDARY . PREG_CLASS_CJK . ']))';
// Extract positive keywords and phrases
preg_match_all('/ ("([^"]+)"|(?!OR)([^" ]+))/', ' ' . $keys, $matches);
......
......@@ -7,6 +7,7 @@
namespace Drupal\views\Plugin\views;
use Drupal\Component\Utility\Unicode;
use Drupal\views\Plugin\views\display\DisplayPluginBase;
use Drupal\views\Plugin\views\PluginBase;
use Drupal\views\ViewExecutable;
......@@ -242,8 +243,6 @@ public function sanitizeValue($value, $type = NULL) {
* The transformed string.
*/
protected function caseTransform($string, $option) {
global $multibyte;
switch ($option) {
default:
return $string;
......@@ -254,7 +253,7 @@ protected function caseTransform($string, $option) {
case 'ucfirst':
return drupal_strtoupper(drupal_substr($string, 0, 1)) . drupal_substr($string, 1);
case 'ucwords':
if ($multibyte == UNICODE_MULTIBYTE) {
if (Unicode::getStatus() == Unicode::STATUS_MULTIBYTE) {
return mb_convert_case($string, MB_CASE_TITLE);
}
else {
......
......@@ -41,7 +41,7 @@ public static function getInfo() {
*/
function testCheckPlain($text, $expected, $message, $ignorewarnings = FALSE) {
$result = $ignorewarnings ? @String::checkPlain($text) : String::checkPlain($text);
$this->assertEquals($result, $expected, $message);
$this->assertEquals($expected, $result, $message);
}
/**
......@@ -80,7 +80,7 @@ function providerCheckPlain() {
*/
function testFormat($string, $args, $expected, $message) {
$result = String::format($string, $args);
$this->assertEquals($result, $expected, $message);
$this->assertEquals($expected, $result, $message);
}
/**
......@@ -106,4 +106,44 @@ function testPlaceholder() {
$this->assertEquals('<em class="placeholder">Some text</em>', String::placeholder('Some text'));
}
/**
* Tests String::decodeEntities().
*
* @dataProvider providerDecodeEntities
*/
public function testDecodeEntities($text, $expected) {
$this->assertEquals($expected, String::decodeEntities($text));
}
/**
* Data provider for testDecodeEntities().
*
* @see testCheckPlain()
*/
public function providerDecodeEntities() {
return array(
array('Drupal', 'Drupal'),
array('<script>', '<script>'),
array('&lt;script&gt;', '<script>'),
array('&#60;script&#62;', '<script>'),
array('&amp;lt;script&amp;gt;', '&lt;script&gt;'),
array('"', '"'),
array('&#34;', '"'),
array('&amp;#34;', '&#34;'),
array('&quot;', '"'),
array('&amp;quot;', '&quot;'),
array("'", "'"),
array('&#39;', "'"),
array('&amp;#39;', '&#39;'),
array('©', '©'),
array('&copy;', '©'),
array('&#169;', '©'),
array('→', '→'),
array('&#8594;', '→'),
array('➼', '➼'),
array('&#10172;', '➼'),
array('&euro;', '€'),
);
}
}
This diff is collapsed.
......@@ -20,3 +20,8 @@
require __DIR__ . "/../../core/lib/Drupal.php";
// Look into removing this later.
define('REQUEST_TIME', (int) $_SERVER['REQUEST_TIME']);
// Set sane locale settings, to ensure consistent string, dates, times and
// numbers handling.
// @see drupal_environment_initialize()
setlocale(LC_ALL, 'C');
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment