Skip to content
Snippets Groups Projects
Commit bb7fb6a3 authored by catch's avatar catch
Browse files

Issue #3000630 by scott_euser, Krzysztof Domański, APolitsin, vijaycs85,...

Issue #3000630 by scott_euser, Krzysztof Domański, APolitsin, vijaycs85, longwave: Transliteration causes 2 capital letters at the beginning of a word
parent 099d3e65
No related branches found
No related tags found
2 merge requests!7452Issue #1797438. HTML5 validation is preventing form submit and not fully...,!789Issue #3210310: Adjust Database API to remove deprecated Drupal 9 code in Drupal 10
...@@ -105,10 +105,22 @@ public function removeDiacritics($string) { ...@@ -105,10 +105,22 @@ public function removeDiacritics($string) {
* {@inheritdoc} * {@inheritdoc}
*/ */
public function transliterate($string, $langcode = 'en', $unknown_character = '?', $max_length = NULL) { public function transliterate($string, $langcode = 'en', $unknown_character = '?', $max_length = NULL) {
$result = ''; $results = [];
$length = 0; $length = 0;
// Split on words to handle mixed case per word.
$words = explode(' ', $string);
foreach ($words as $key => $word) {
$results[$key] = '';
// String is mixed case if it consists of both uppercase and lowercase
// letters. To accurately check this, remove any numbers and check that
// remaining characters are not all uppercase and not all lowercase.
$alpha_string = preg_replace('/\\d/', '', $word);
$mixed_case = (strlen($alpha_string) > 1 && mb_strtolower($alpha_string) !== $alpha_string && mb_strtoupper($alpha_string) !== $alpha_string);
// Split into Unicode characters and transliterate each one. // Split into Unicode characters and transliterate each one.
foreach (preg_split('//u', $string, 0, PREG_SPLIT_NO_EMPTY) as $character) { foreach (preg_split('//u', $word, 0, PREG_SPLIT_NO_EMPTY) as $character) {
$code = self::ordUTF8($character); $code = self::ordUTF8($character);
if ($code == -1) { if ($code == -1) {
$to_add = $unknown_character; $to_add = $unknown_character;
...@@ -122,14 +134,26 @@ public function transliterate($string, $langcode = 'en', $unknown_character = '? ...@@ -122,14 +134,26 @@ public function transliterate($string, $langcode = 'en', $unknown_character = '?
$length += strlen($to_add); $length += strlen($to_add);
if ($length > $max_length) { if ($length > $max_length) {
// There is no more space. // There is no more space.
return $result; $results = array_filter($results);
return implode(' ', $results);
} }
} }
$result .= $to_add; // If this is a capitalised letter of a mixed case word, only capitalise
// the first letter and lowercase any subsequent letters.
if ($mixed_case && strlen($to_add) > 1 && mb_strtoupper($to_add) === $to_add) {
$to_add = ucfirst(strtolower($to_add));
} }
return $result; $results[$key] .= $to_add;
}
// Add space to count for max length.
$length++;
}
$results = array_filter($results);
return implode(' ', $results);
} }
/** /**
......
...@@ -146,6 +146,19 @@ public function providerTestPhpTransliteration() { ...@@ -146,6 +146,19 @@ public function providerTestPhpTransliteration() {
['en', chr(0xF8) . chr(0x80) . chr(0x80) . chr(0x80) . chr(0x80), '?'], ['en', chr(0xF8) . chr(0x80) . chr(0x80) . chr(0x80) . chr(0x80), '?'],
// Max length. // Max length.
['de', $two_byte, 'Ae Oe', '?', 5], ['de', $two_byte, 'Ae Oe', '?', 5],
// Test strings with mixed case words where a single capital character
// results in multiple characters. The first character should remain
// capitalised but subsequent resulting characters should be lowercase.
// For example a result of the transliteration should be 'Shtrikhkod'
// not 'SHtrikhkod'. Numbers should not be used in determining whether a
// string is mixed case.
['ru', 'Штрихкод', 'Shtrikhkod'],
['bg', 'Щастие', 'Schastie'],
['bg', 'Щ1', 'SCH1'],
['bg', 'Щ1Щ', 'SCH1SCH'],
['bg', 'Щ1щ', 'Sch1sch'],
['bg', 'Щастие ЩЩЩ', 'Schastie SCHSCHSCH'],
['bg', 'Щастие ЩЩЩ. Щастие! Щастие', 'Schastie SCHSCHSCH. Schastie! Schastie'],
]; ];
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment