Commit d57ee5f9 authored by alexpott's avatar alexpott

Issue #1923406 by stefan.r, yannickoo, catch, Crell, amateescu, pwolanin,...

Issue #1923406 by stefan.r, yannickoo, catch, Crell, amateescu, pwolanin, morgantocker, Damien Tournoud, sun: Use ASCII character set on alphanumeric fields so we can index all 255 characters
parent c6890085
......@@ -445,6 +445,9 @@ field.storage_settings.string:
case_sensitive:
type: boolean
label: 'Case sensitive'
is_ascii:
type: boolean
label: 'Contains US ASCII characters only'
field.field_settings.string:
type: mapping
......
......@@ -422,22 +422,26 @@ protected function catchException(\Exception $e, $table_name = NULL) {
}
/**
* Ensures that cache IDs have a maximum length of 255 characters.
* Normalizes a cache ID in order to comply with database limitations.
*
* @param string $cid
* The passed in cache ID.
*
* @return string
* A cache ID that is at most 255 characters long.
* An ASCII-encoded cache ID that is at most 255 characters long.
*/
protected function normalizeCid($cid) {
// Nothing to do if the ID length is 255 characters or less.
if (strlen($cid) <= 255) {
// Nothing to do if the ID is a US ASCII string of 255 characters or less.
$cid_is_ascii = mb_check_encoding($cid, 'ASCII');
if (strlen($cid) <= 255 && $cid_is_ascii) {
return $cid;
}
// Return a string that uses as much as possible of the original cache ID
// with the hash appended.
$hash = Crypt::hashBase64($cid);
if (!$cid_is_ascii) {
return $hash;
}
return substr($cid, 0, 255 - strlen($hash)) . $hash;
}
......@@ -450,7 +454,7 @@ public function schemaDefinition() {
'fields' => array(
'cid' => array(
'description' => 'Primary Key: Unique cache ID.',
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 255,
'not null' => TRUE,
'default' => '',
......@@ -491,7 +495,7 @@ public function schemaDefinition() {
),
'checksum' => array(
'description' => 'The tag invalidation checksum when this entry was saved.',
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 255,
'not null' => TRUE,
),
......
......@@ -175,7 +175,7 @@ public function schemaDefinition() {
'fields' => array(
'tag' => array(
'description' => 'Namespace-prefixed tag string.',
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 255,
'not null' => TRUE,
'default' => '',
......
......@@ -192,14 +192,14 @@ protected static function schemaDefinition() {
'fields' => array(
'collection' => array(
'description' => 'Primary Key: Config object collection.',
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 255,
'not null' => TRUE,
'default' => '',
),
'name' => array(
'description' => 'Primary Key: Config object name.',
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 255,
'not null' => TRUE,
'default' => '',
......
......@@ -144,6 +144,10 @@ protected function createFieldSql($name, $spec) {
if (!empty($spec['binary'])) {
$sql .= ' BINARY';
}
// Note we check for the "type" key here. "mysql_type" is VARCHAR:
if (isset($spec['type']) && $spec['type'] == 'varchar_ascii') {
$sql .= ' CHARACTER SET ascii COLLATE ascii_general_ci';
}
}
elseif (isset($spec['precision']) && isset($spec['scale'])) {
$sql .= '(' . $spec['precision'] . ', ' . $spec['scale'] . ')';
......@@ -218,6 +222,8 @@ public function getFieldTypeMap() {
// database types back into schema types.
// $map does not use drupal_static as its value never changes.
static $map = array(
'varchar_ascii:normal' => 'VARCHAR',
'varchar:normal' => 'VARCHAR',
'char:normal' => 'CHAR',
......
......@@ -363,6 +363,8 @@ function getFieldTypeMap() {
// database types back into schema types.
// $map does not use drupal_static as its value never changes.
static $map = array(
'varchar_ascii:normal' => 'varchar',
'varchar:normal' => 'varchar',
'char:normal' => 'character',
......
......@@ -212,6 +212,8 @@ public function getFieldTypeMap() {
// database types back into schema types.
// $map does not use drupal_static as its value never changes.
static $map = array(
'varchar_ascii:normal' => 'VARCHAR',
'varchar:normal' => 'VARCHAR',
'char:normal' => 'CHAR',
......
......@@ -1568,7 +1568,7 @@ protected function getDedicatedTableSchema(FieldStorageDefinitionInterface $stor
}
else {
$id_schema = array(
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 128,
'not null' => TRUE,
'description' => 'The entity id this data is attached to',
......@@ -1601,7 +1601,7 @@ protected function getDedicatedTableSchema(FieldStorageDefinitionInterface $stor
'description' => $description_current,
'fields' => array(
'bundle' => array(
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 128,
'not null' => TRUE,
'default' => '',
......@@ -1617,7 +1617,7 @@ protected function getDedicatedTableSchema(FieldStorageDefinitionInterface $stor
'entity_id' => $id_schema,
'revision_id' => $revision_id_schema,
'langcode' => array(
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 32,
'not null' => TRUE,
'default' => '',
......
......@@ -133,7 +133,7 @@ public static function schema(FieldStorageDefinitionInterface $field_definition)
$columns = array(
'target_id' => array(
'description' => 'The ID of the target entity.',
'type' => 'varchar',
'type' => 'varchar_ascii',
// If the target entities act as bundles for another entity type,
// their IDs should not exceed the maximum length for bundles.
'length' => $target_type_info->getBundleOf() ? EntityTypeInterface::BUNDLE_MAX_LENGTH : 255,
......
......@@ -44,6 +44,7 @@ class LanguageItem extends FieldItemBase {
public static function propertyDefinitions(FieldStorageDefinitionInterface $field_definition) {
$properties['value'] = DataDefinition::create('string')
->setLabel(t('Language code'))
->setSetting('is_ascii', TRUE)
->setRequired(TRUE);
$properties['language'] = DataReferenceDefinition::create('language')
......@@ -75,6 +76,7 @@ public static function schema(FieldStorageDefinitionInterface $field_definition)
'value' => array(
'type' => 'varchar',
'length' => 12,
'is_ascii' => TRUE,
),
),
);
......
......@@ -32,6 +32,7 @@ class StringItem extends StringItemBase {
public static function defaultStorageSettings() {
return array(
'max_length' => 255,
'is_ascii' => FALSE,
) + parent::defaultStorageSettings();
}
......@@ -42,7 +43,7 @@ public static function schema(FieldStorageDefinitionInterface $field_definition)
return array(
'columns' => array(
'value' => array(
'type' => 'varchar',
'type' => $field_definition->getSetting('is_ascii') === TRUE ? 'varchar_ascii' : 'varchar',
'length' => (int) $field_definition->getSetting('max_length'),
'binary' => $field_definition->getSetting('case_sensitive'),
),
......
......@@ -30,6 +30,7 @@ class UuidItem extends StringItem {
public static function defaultStorageSettings() {
return array(
'max_length' => 128,
'is_ascii' => TRUE,
) + parent::defaultStorageSettings();
}
......
......@@ -1193,7 +1193,7 @@ protected static function schemaDefinition() {
'fields' => array(
'menu_name' => array(
'description' => "The menu name. All links with the same menu name (such as 'tools') are part of the same menu.",
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 32,
'not null' => TRUE,
'default' => '',
......@@ -1206,20 +1206,20 @@ protected static function schemaDefinition() {
),
'id' => array(
'description' => 'Unique machine name: the plugin ID.',
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 255,
'not null' => TRUE,
),
'parent' => array(
'description' => 'The plugin ID for the parent of this link.',
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 255,
'not null' => TRUE,
'default' => '',
),
'route_name' => array(
'description' => 'The machine name of a defined Symfony Route this menu item represents.',
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 255,
),
'route_param_key' => array(
......@@ -1281,7 +1281,7 @@ protected static function schemaDefinition() {
),
'provider' => array(
'description' => 'The name of the module that generated this link.',
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => DRUPAL_EXTENSION_NAME_MAX_LENGTH,
'not null' => TRUE,
'default' => 'system',
......
......@@ -226,6 +226,7 @@ public static function baseFieldDefinitions(EntityTypeInterface $entity_type) {
$fields['hash'] = BaseFieldDefinition::create('string')
->setLabel(t('Hash'))
->setSetting('is_ascii', TRUE)
->setDescription(t('Calculated hash of the feed data, used for validating cache.'));
$fields['etag'] = BaseFieldDefinition::create('string')
......
......@@ -170,7 +170,8 @@ public function getHash();
* Sets the calculated hash of the feed data, used for validating cache.
*
* @param string $hash
* A string containing the calculated hash of the feed.
* A string containing the calculated hash of the feed. Must contain
* US ASCII characters only.
*
* @return \Drupal\aggregator\FeedInterface
* The class instance that this method is called on.
......
......@@ -20,7 +20,7 @@ function ban_schema() {
),
'ip' => array(
'description' => 'IP address',
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 40,
'not null' => TRUE,
'default' => '',
......
......@@ -46,14 +46,14 @@ function comment_schema() {
'description' => 'The entity_id of the entity for which the statistics are compiled.',
),
'entity_type' => array(
'type' => 'varchar',
'type' => 'varchar_ascii',
'not null' => TRUE,
'default' => 'node',
'length' => EntityTypeInterface::ID_MAX_LENGTH,
'description' => 'The entity_type of the entity to which this comment is a reply.',
),
'field_name' => array(
'type' => 'varchar',
'type' => 'varchar_ascii',
'not null' => TRUE,
'default' => '',
'length' => FieldStorageConfig::NAME_MAX_LENGTH,
......
......@@ -303,6 +303,7 @@ public static function baseFieldDefinitions(EntityTypeInterface $entity_type) {
$fields['entity_type'] = BaseFieldDefinition::create('string')
->setLabel(t('Entity type'))
->setDescription(t('The entity type to which this comment is attached.'))
->setSetting('is_ascii', TRUE)
->setSetting('max_length', EntityTypeInterface::ID_MAX_LENGTH);
$fields['comment_type'] = BaseFieldDefinition::create('entity_reference')
......@@ -313,6 +314,7 @@ public static function baseFieldDefinitions(EntityTypeInterface $entity_type) {
$fields['field_name'] = BaseFieldDefinition::create('string')
->setLabel(t('Comment field name'))
->setDescription(t('The field name through which this comment was added.'))
->setSetting('is_ascii', TRUE)
->setSetting('max_length', FieldStorageConfig::NAME_MAX_LENGTH);
return $fields;
......
......@@ -25,7 +25,7 @@ function dblog_schema() {
'description' => 'The {users}.uid of the user who triggered the event.',
),
'type' => array(
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 64,
'not null' => TRUE,
'default' => '',
......@@ -69,7 +69,7 @@ function dblog_schema() {
'description' => 'URL of referring page.',
),
'hostname' => array(
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 128,
'not null' => TRUE,
'default' => '',
......
......@@ -20,21 +20,21 @@ function file_schema() {
),
'module' => array(
'description' => 'The name of the module that is using the file.',
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => DRUPAL_EXTENSION_NAME_MAX_LENGTH,
'not null' => TRUE,
'default' => '',
),
'type' => array(
'description' => 'The name of the object type in which the file is used.',
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 64,
'not null' => TRUE,
'default' => '',
),
'id' => array(
'description' => 'The primary key of the object using the file.',
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 64,
'not null' => TRUE,
'default' => 0,
......
......@@ -254,6 +254,7 @@ public static function baseFieldDefinitions(EntityTypeInterface $entity_type) {
$fields['filemime'] = BaseFieldDefinition::create('string')
->setLabel(t('File MIME type'))
->setSetting('is_ascii', TRUE)
->setDescription(t("The file's MIME type."));
$fields['filesize'] = BaseFieldDefinition::create('integer')
......
......@@ -67,14 +67,14 @@ function locale_schema() {
'description' => 'The original string in English.',
),
'context' => array(
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 255,
'not null' => TRUE,
'default' => '',
'description' => 'The context this string applies to.',
),
'version' => array(
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 20,
'not null' => TRUE,
'default' => 'none',
......@@ -103,7 +103,7 @@ function locale_schema() {
'description' => 'Translation string value in this language.',
),
'language' => array(
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 12,
'not null' => TRUE,
'default' => '',
......@@ -142,7 +142,7 @@ function locale_schema() {
'description' => 'Unique identifier of this string.',
),
'type' => array(
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 50,
'not null' => TRUE,
'default' => '',
......@@ -156,7 +156,7 @@ function locale_schema() {
'description' => 'Type dependent location information (file name, path, etc).',
),
'version' => array(
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 20,
'not null' => TRUE,
'default' => 'none',
......@@ -180,14 +180,14 @@ function locale_schema() {
'description' => 'File import status information for interface translation files.',
'fields' => array(
'project' => array(
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => '255',
'not null' => TRUE,
'default' => '',
'description' => 'A unique short name to identify the project the file belongs to.',
),
'langcode' => array(
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => '12',
'not null' => TRUE,
'default' => '',
......
......@@ -250,6 +250,7 @@ public static function baseFieldDefinitions(EntityTypeInterface $entity_type) {
->setLabel(t('Bundle'))
->setDescription(t('The content menu link bundle.'))
->setSetting('max_length', EntityTypeInterface::BUNDLE_MAX_LENGTH)
->setSetting('is_ascii', TRUE)
->setReadOnly(TRUE);
$fields['title'] = BaseFieldDefinition::create('string')
......@@ -291,7 +292,8 @@ public static function baseFieldDefinitions(EntityTypeInterface $entity_type) {
$fields['menu_name'] = BaseFieldDefinition::create('string')
->setLabel(t('Menu name'))
->setDescription(t('The menu name. All links with the same menu name (such as "tools") are part of the same menu.'))
->setDefaultValue('tools');
->setDefaultValue('tools')
->setSetting('is_ascii', TRUE);
$fields['link'] = BaseFieldDefinition::create('link')
->setLabel(t('Link'))
......
......@@ -54,7 +54,7 @@ function node_schema() {
),
'langcode' => array(
'description' => 'The {language}.langcode of this node.',
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 12,
'not null' => TRUE,
'default' => '',
......@@ -75,7 +75,7 @@ function node_schema() {
),
'realm' => array(
'description' => 'The realm in which the user must possess the grant ID. Each node access node can define one or more realms.',
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 255,
'not null' => TRUE,
'default' => '',
......
......@@ -20,14 +20,14 @@ function search_schema() {
'description' => 'Search item ID, e.g. node ID for nodes.',
),
'langcode' => array(
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => '12',
'not null' => TRUE,
'description' => 'The {languages}.langcode of the item variant.',
'default' => '',
),
'type' => array(
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 64,
'not null' => TRUE,
'description' => 'Type of item, e.g. node.',
......@@ -67,14 +67,14 @@ function search_schema() {
'description' => 'The {search_dataset}.sid of the searchable item to which the word belongs.',
),
'langcode' => array(
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => '12',
'not null' => TRUE,
'description' => 'The {languages}.langcode of the item variant.',
'default' => '',
),
'type' => array(
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 64,
'not null' => TRUE,
'description' => 'The {search_dataset}.type of the searchable item to which the word belongs.',
......
......@@ -20,7 +20,7 @@ function shortcut_schema() {
'description' => 'The {users}.uid for this set.',
),
'set_name' => array(
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 32,
'not null' => TRUE,
'default' => '',
......
......@@ -108,7 +108,7 @@ function simpletest_schema() {
'description' => 'Test ID, messages belonging to the same ID are reported together',
),
'test_class' => array(
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 255,
'not null' => TRUE,
'default' => '',
......@@ -127,14 +127,14 @@ function simpletest_schema() {
'description' => 'The message itself.',
),
'message_group' => array(
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 255,
'not null' => TRUE,
'default' => '',
'description' => 'The message group this message belongs to. For example: warning, browser, user.',
),
'function' => array(
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 255,
'not null' => TRUE,
'default' => '',
......
......@@ -256,6 +256,8 @@
* 'float', 'numeric', or 'serial'. Most types just map to the according
* database engine specific datatypes. Use 'serial' for auto incrementing
* fields. This will expand to 'INT auto_increment' on MySQL.
* A special 'varchar_ascii' type is also available for limiting machine
* name field to US ASCII characters.
* - 'mysql_type', 'pgsql_type', 'sqlite_type', etc.: If you need to
* use a record type not included in the officially supported list
* of types above, you can specify a type for each database
......
......@@ -33,4 +33,24 @@ protected function createCacheBackend($bin) {
return new DatabaseBackend($this->container->get('database'), $this->container->get('cache_tags.invalidator.checksum'), $bin);
}
/**
* {@inheritdoc}
*/
public function testSetGet() {
parent::testSetGet();
$backend = $this->getCacheBackend();
// Set up a cache ID that is not ASCII and longer than 255 characters so we
// can test cache ID normalization.
$cid_long = str_repeat('愛€', 500);
$cached_value_long = $this->randomMachineName();
$backend->set($cid_long, $cached_value_long);
$this->assertIdentical($cached_value_long, $backend->get($cid_long)->data, "Backend contains the correct value for long, non-ASCII cache id.");
$cid_short = '愛1€';
$cached_value_short = $this->randomMachineName();
$backend->set($cid_short, $cached_value_short);
$this->assertIdentical($cached_value_short, $backend->get($cid_short)->data, "Backend contains the correct value for short, non-ASCII cache id.");
}
}
......@@ -49,6 +49,11 @@ function testSchema() {
'default' => "'\"funky default'\"",
'description' => 'Schema column description for string.',
),
'test_field_string_ascii' => array(
'type' => 'varchar_ascii',
'length' => 255,
'description' => 'Schema column description for ASCII string.',
),
),
);
db_create_table('test_table', $table_specification);
......@@ -62,6 +67,21 @@ function testSchema() {
// Assert that the column comment has been set.
$this->checkSchemaComment($table_specification['fields']['test_field']['description'], 'test_table', 'test_field');
if (Database::getConnection()->databaseType() == 'mysql') {
// Make sure that varchar fields have the correct collation.
$columns = db_query('SHOW FULL COLUMNS FROM {test_table}');
foreach ($columns as $column) {
if ($column->Field == 'test_field_string') {
$string_check = ($column->Collation == 'utf8_general_ci');
}
if ($column->Field == 'test_field_string_ascii') {
$string_ascii_check = ($column->Collation == 'ascii_general_ci');
}
}
$this->assertTrue(!empty($string_check), 'string field has the right collation.');
$this->assertTrue(!empty($string_ascii_check), 'ASCII string field has the right collation.');
}
// An insert without a value for the column 'test_table' should fail.
$this->assertFalse($this->tryInsert(), 'Insert without a default failed.');
......
......@@ -685,7 +685,7 @@ function system_schema() {
),
'token' => array(
'description' => "A string token generated against the current user's session id and the batch id, used to ensure that only the user who submitted the batch can effectively access it.",
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 64,
'not null' => TRUE,
),
......@@ -717,14 +717,14 @@ function system_schema() {
),
'event' => array(
'description' => 'Name of event (e.g. contact).',
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 64,
'not null' => TRUE,
'default' => '',
),
'identifier' => array(
'description' => 'Identifier of the visitor, such as an IP address or hostname.',
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 128,
'not null' => TRUE,
'default' => '',
......@@ -754,14 +754,14 @@ function system_schema() {
'fields' => array(
'collection' => array(
'description' => 'A named collection of key and value pairs.',
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 128,
'not null' => TRUE,
'default' => '',
),
'name' => array(
'description' => 'The key of the key-value pair. As KEY is a SQL reserved keyword, name was chosen instead.',
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 128,
'not null' => TRUE,
'default' => '',
......@@ -781,7 +781,7 @@ function system_schema() {
'fields' => array(
'collection' => array(
'description' => 'A named collection of key and value pairs.',
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 128,
'not null' => TRUE,
'default' => '',
......@@ -789,7 +789,7 @@ function system_schema() {
'name' => array(
// KEY is an SQL reserved word, so use 'name' as the key's field name.
'description' => 'The key of the key/value pair.',
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 128,
'not null' => TRUE,
'default' => '',
......@@ -824,7 +824,7 @@ function system_schema() {
'description' => 'Primary Key: Unique item ID.',
),
'name' => array(
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 255,
'not null' => TRUE,
'default' => '',
......@@ -862,7 +862,7 @@ function system_schema() {
'fields' => array(
'name' => array(
'description' => 'Primary Key: Machine name of this route',
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 255,
'not null' => TRUE,
'default' => '',
......@@ -911,14 +911,14 @@ function system_schema() {
'fields' => array(
'name' => array(
'description' => 'Primary Key: Unique name.',
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 255,
'not null' => TRUE,
'default' => ''
),
'value' => array(
'description' => 'A value for the semaphore.',
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 255,
'not null' => TRUE,
'default' => ''
......@@ -961,13 +961,13 @@ function system_schema() {
),
'sid' => array(
'description' => "A session ID (hashed). The value is generated by Drupal's session handlers.",
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 128,
'not null' => TRUE,
),
'hostname' => array(
'description' => 'The IP address that last used this session ID (sid).',
'type' => 'varchar',
'type' => 'varchar_ascii',
'length' => 128,
'not null' => TRUE,
'default' => '',
......@@ -1025,7 +1025,7 @@ function system_schema() {
),
'langcode' => array(
'description' => "The language code this alias is for; if 'und', the alias will be used for unknown languages. Each Drupal path can have an alias for each supported language.",
'type' => 'varchar',
'type' => 'varchar_ascii',