Commit bc60c929 authored by David_Rothstein's avatar David_Rothstein

Issue #2488180 by stefan.r, stovak, pwolanin, David_Rothstein, Noe_,...

Issue #2488180 by stefan.r, stovak, pwolanin, David_Rothstein, Noe_, typhonius, KhaledBlah, joelpittet, Fabianx, geerlingguy, nithinkolekar, mikeytown2, jduhls, scuba_fly, travelvc, hass: Support full UTF-8 (emojis, Asian symbols, mathematical symbols) on MySQL and other database drivers when they are configured to allow it
parent c371d42c
......@@ -61,6 +61,9 @@ Drupal 7.50, xxxx-xx-xx (development version)
- Made it possible to use any PHP callable in Ajax form callbacks, form API
form-building functions, and form API wrapper callbacks (API addition:
https://www.drupal.org/node/2761169).
- Added support for full UTF-8 (emojis, Asian symbols, mathematical symbols) on
MySQL and other database drivers when the site and database are configured to
allow it (https://www.drupal.org/node/2761183).
Drupal 7.44, 2016-06-15
-----------------------
......
......@@ -1313,6 +1313,39 @@ public function commit() {
* also larger than the $existing_id if one was passed in.
*/
abstract public function nextId($existing_id = 0);
/**
* Checks whether utf8mb4 support is configurable in settings.php.
*
* @return bool
*/
public function utf8mb4IsConfigurable() {
// Since 4 byte UTF-8 is not supported by default, there is nothing to
// configure.
return FALSE;
}
/**
* Checks whether utf8mb4 support is currently active.
*
* @return bool
*/
public function utf8mb4IsActive() {
// Since 4 byte UTF-8 is not supported by default, there is nothing to
// activate.
return FALSE;
}
/**
* Checks whether utf8mb4 support is available on the current database system.
*
* @return bool
*/
public function utf8mb4IsSupported() {
// By default we assume that the database backend may not support 4 byte
// UTF-8.
return FALSE;
}
}
/**
......
......@@ -28,6 +28,12 @@ public function __construct(array $connection_options = array()) {
$this->connectionOptions = $connection_options;
$charset = 'utf8';
// Check if the charset is overridden to utf8mb4 in settings.php.
if ($this->utf8mb4IsActive()) {
$charset = 'utf8mb4';
}
// The DSN should use either a socket or a host/port.
if (isset($connection_options['unix_socket'])) {
$dsn = 'mysql:unix_socket=' . $connection_options['unix_socket'];
......@@ -39,7 +45,7 @@ public function __construct(array $connection_options = array()) {
// Character set is added to dsn to ensure PDO uses the proper character
// set when escaping. This has security implications. See
// https://www.drupal.org/node/1201452 for further discussion.
$dsn .= ';charset=utf8';
$dsn .= ';charset=' . $charset;
$dsn .= ';dbname=' . $connection_options['database'];
// Allow PDO options to be overridden.
$connection_options += array(
......@@ -63,10 +69,10 @@ public function __construct(array $connection_options = array()) {
// certain one has been set; otherwise, MySQL defaults to 'utf8_general_ci'
// for UTF-8.
if (!empty($connection_options['collation'])) {
$this->exec('SET NAMES utf8 COLLATE ' . $connection_options['collation']);
$this->exec('SET NAMES ' . $charset . ' COLLATE ' . $connection_options['collation']);
}
else {
$this->exec('SET NAMES utf8');
$this->exec('SET NAMES ' . $charset);
}
// Set MySQL init_commands if not already defined. Default Drupal's MySQL
......@@ -206,6 +212,42 @@ protected function popCommittableTransactions() {
}
}
}
public function utf8mb4IsConfigurable() {
return TRUE;
}
public function utf8mb4IsActive() {
return isset($this->connectionOptions['charset']) && $this->connectionOptions['charset'] === 'utf8mb4';
}
public function utf8mb4IsSupported() {
// Ensure that the MySQL driver supports utf8mb4 encoding.
$version = $this->getAttribute(PDO::ATTR_CLIENT_VERSION);
if (strpos($version, 'mysqlnd') !== FALSE) {
// The mysqlnd driver supports utf8mb4 starting at version 5.0.9.
$version = preg_replace('/^\D+([\d.]+).*/', '$1', $version);
if (version_compare($version, '5.0.9', '<')) {
return FALSE;
}
}
else {
// The libmysqlclient driver supports utf8mb4 starting at version 5.5.3.
if (version_compare($version, '5.5.3', '<')) {
return FALSE;
}
}
// Ensure that the MySQL server supports large prefixes and utf8mb4.
try {
$this->query("CREATE TABLE {drupal_utf8mb4_test} (id VARCHAR(255), PRIMARY KEY(id(255))) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci ROW_FORMAT=DYNAMIC");
}
catch (Exception $e) {
return FALSE;
}
$this->query("DROP TABLE {drupal_utf8mb4_test}");
return TRUE;
}
}
......
......@@ -81,7 +81,8 @@ protected function createTableSql($name, $table) {
// Provide defaults if needed.
$table += array(
'mysql_engine' => 'InnoDB',
'mysql_character_set' => 'utf8',
// Allow the default charset to be overridden in settings.php.
'mysql_character_set' => $this->connection->utf8mb4IsActive() ? 'utf8mb4' : 'utf8',
);
$sql = "CREATE TABLE {" . $name . "} (\n";
......@@ -109,6 +110,13 @@ protected function createTableSql($name, $table) {
$sql .= ' COLLATE ' . $info['collation'];
}
// The row format needs to be either DYNAMIC or COMPRESSED in order to allow
// for the innodb_large_prefix setting to take effect, see
// https://dev.mysql.com/doc/refman/5.6/en/create-table.html
if ($this->connection->utf8mb4IsActive()) {
$sql .= ' ROW_FORMAT=DYNAMIC';
}
// Add table comment.
if (!empty($table['description'])) {
$sql .= ' COMMENT ' . $this->prepareComment($table['description'], self::COMMENT_MAX_TABLE);
......
......@@ -216,6 +216,14 @@ public function nextId($existing = 0) {
return $id;
}
public function utf8mb4IsActive() {
return TRUE;
}
public function utf8mb4IsSupported() {
return TRUE;
}
}
/**
......
......@@ -378,6 +378,14 @@ public function popTransaction($name) {
}
}
public function utf8mb4IsActive() {
return TRUE;
}
public function utf8mb4IsSupported() {
return TRUE;
}
}
/**
......
......@@ -809,6 +809,13 @@ function install_system_module(&$install_state) {
variable_set('install_profile_modules', array_diff($modules, array('system')));
$install_state['database_tables_exist'] = TRUE;
// Prevent the hook_requirements() check from telling us to convert the
// database to utf8mb4.
$connection = Database::getConnection();
if ($connection->utf8mb4IsConfigurable() && $connection->utf8mb4IsActive()) {
variable_set('drupal_all_databases_are_utf8mb4', TRUE);
}
}
/**
......
......@@ -2986,3 +2986,36 @@ class NodePageCacheTest extends NodeWebTestCase {
$this->assertResponse(404);
}
}
/**
* Tests that multi-byte UTF-8 characters are stored and retrieved correctly.
*/
class NodeMultiByteUtf8Test extends NodeWebTestCase {
public static function getInfo() {
return array(
'name' => 'Multi-byte UTF-8',
'description' => 'Test that multi-byte UTF-8 characters are stored and retrieved correctly.',
'group' => 'Node',
);
}
/**
* Tests that multi-byte UTF-8 characters are stored and retrieved correctly.
*/
public function testMultiByteUtf8() {
$connection = Database::getConnection();
// On MySQL, this test will only run if 'charset' is set to 'utf8mb4' in
// settings.php.
if (!($connection->utf8mb4IsSupported() && $connection->utf8mb4IsActive())) {
return;
}
$title = '🐙';
$this->assertTrue(drupal_strlen($title, 'utf-8') < strlen($title), 'Title has multi-byte characters.');
$node = $this->drupalCreateNode(array('title' => $title));
$this->drupalGet('node/' . $node->nid);
$result = $this->xpath('//h1[@id="page-title"]');
$this->assertEqual(trim((string) $result[0]), $title, 'The passed title was returned.');
}
}
......@@ -196,6 +196,12 @@ function system_requirements($phase) {
);
}
// Test database-specific multi-byte UTF-8 related requirements.
$charset_requirements = _system_check_db_utf8mb4_requirements($phase);
if (!empty($charset_requirements)) {
$requirements['database_charset'] = $charset_requirements;
}
// Test PHP memory_limit
$memory_limit = ini_get('memory_limit');
$requirements['php_memory_limit'] = array(
......@@ -517,6 +523,75 @@ function system_requirements($phase) {
return $requirements;
}
/**
* Checks whether the requirements for multi-byte UTF-8 support are met.
*
* @param string $phase
* The hook_requirements() stage.
*
* @return array
* A requirements array with the result of the charset check.
*/
function _system_check_db_utf8mb4_requirements($phase) {
global $install_state;
// In the requirements check of the installer, skip the utf8mb4 check unless
// the database connection info has been preconfigured by hand with valid
// information before running the installer, as otherwise we cannot get a
// valid database connection object.
if (isset($install_state['settings_verified']) && !$install_state['settings_verified']) {
return array();
}
$connection = Database::getConnection();
$t = get_t();
$requirements['title'] = $t('Database 4 byte UTF-8 support');
$utf8mb4_configurable = $connection->utf8mb4IsConfigurable();
$utf8mb4_active = $connection->utf8mb4IsActive();
$utf8mb4_supported = $connection->utf8mb4IsSupported();
$driver = $connection->driver();
$documentation_url = 'https://www.drupal.org/node/2754539';
if ($utf8mb4_active) {
if ($utf8mb4_supported) {
if ($phase != 'install' && $utf8mb4_configurable && !variable_get('drupal_all_databases_are_utf8mb4', FALSE)) {
// Supported, active, and configurable, but not all database tables
// have been converted yet.
$requirements['value'] = $t('Enabled, but database tables need conversion');
$requirements['description'] = $t('Please convert all database tables to utf8mb4 prior to enabling it in settings.php. See the <a href="@url">documentation on adding 4 byte UTF-8 support</a> for more information.', array('@url' => $documentation_url));
$requirements['severity'] = REQUIREMENT_ERROR;
}
else {
// Supported, active.
$requirements['value'] = $t('Enabled');
$requirements['description'] = $t('4 byte UTF-8 for @driver is enabled.', array('@driver' => $driver));
$requirements['severity'] = REQUIREMENT_OK;
}
}
else {
// Not supported, active.
$requirements['value'] = $t('Not supported');
$requirements['description'] = $t('4 byte UTF-8 for @driver is activated, but not supported on your system. Please turn this off in settings.php, or ensure that all database-related requirements are met. See the <a href="@url">documentation on adding 4 byte UTF-8 support</a> for more information.', array('@driver' => $driver, '@url' => $documentation_url));
$requirements['severity'] = REQUIREMENT_ERROR;
}
}
else {
if ($utf8mb4_supported) {
// Supported, not active.
$requirements['value'] = $t('Not enabled');
$requirements['description'] = $t('4 byte UTF-8 for @driver is not activated, but it is supported on your system. It is recommended that you enable this to allow 4-byte UTF-8 input such as emojis, Asian symbols and mathematical symbols to be stored correctly. See the <a href="@url">documentation on adding 4 byte UTF-8 support</a> for more information.', array('@driver' => $driver, '@url' => $documentation_url));
$requirements['severity'] = REQUIREMENT_INFO;
}
else {
// Not supported, not active.
$requirements['value'] = $t('Disabled');
$requirements['description'] = $t('4 byte UTF-8 for @driver is disabled. See the <a href="@url">documentation on adding 4 byte UTF-8 support</a> for more information.', array('@driver' => $driver, '@url' => $documentation_url));
$requirements['severity'] = REQUIREMENT_INFO;
}
}
return $requirements;
}
/**
* Implements hook_install().
*/
......
......@@ -126,6 +126,38 @@
* );
* @endcode
*
* For handling full UTF-8 in MySQL, including multi-byte characters such as
* emojis, Asian symbols, and mathematical symbols, you may set the collation
* and charset to "utf8mb4" prior to running install.php:
* @code
* $databases['default']['default'] = array(
* 'driver' => 'mysql',
* 'database' => 'databasename',
* 'username' => 'username',
* 'password' => 'password',
* 'host' => 'localhost',
* 'charset' => 'utf8mb4',
* 'collation' => 'utf8mb4_general_ci',
* );
* @endcode
* When using this setting on an existing installation, ensure that all existing
* tables have been converted to the utf8mb4 charset, for example by using the
* utf8mb4_convert contributed project available at
* https://www.drupal.org/project/utf8mb4_convert, so as to prevent mixing data
* with different charsets.
* Note this should only be used when all of the following conditions are met:
* - In order to allow for large indexes, MySQL must be set up with the
* following my.cnf settings:
* [mysqld]
* innodb_large_prefix=true
* innodb_file_format=barracuda
* innodb_file_per_table=true
* These settings are available as of MySQL 5.5.14, and are defaults in
* MySQL 5.7.7 and up.
* - The PHP MySQL driver must support the utf8mb4 charset (libmysqlclient
5.5.3 and up, as well as mysqlnd 5.0.9 and up).
* - The MySQL server must support the utf8mb4 charset (5.5.3 and up).
*
* You can optionally set prefixes for some or all database table names
* by using the 'prefix' setting. If a prefix is specified, the table
* name will be prepended with its value. Be sure to use valid database
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment