diff --git a/core/lib/Drupal/Component/Transliteration/PhpTransliteration.php b/core/lib/Drupal/Component/Transliteration/PhpTransliteration.php index 2f6397406ae2004202bc58e3a71d754aaaeee258..8d62175e604d1fcdc1dc6f2b4bf6e7630eed4a14 100644 --- a/core/lib/Drupal/Component/Transliteration/PhpTransliteration.php +++ b/core/lib/Drupal/Component/Transliteration/PhpTransliteration.php @@ -58,6 +58,21 @@ class PhpTransliteration implements TransliterationInterface { */ protected $genericMap = []; + /** + * Special characters for ::removeDiacritics(). + * + * Characters which have accented variants but their base character + * transliterates to more than one ASCII character require special + * treatment: we want to remove their accent and use the un- + * transliterated base character. + */ + protected $fixTransliterateForRemoveDiacritics = [ + 'AE' => 'Æ', + 'ae' => 'æ', + 'ZH' => 'Ʒ', + 'zh' => 'ʒ', + ]; + /** * Constructs a transliteration object. * @@ -93,6 +108,9 @@ public function removeDiacritics($string) { if (strlen($to_add) === 1) { $replacement = $to_add; } + elseif (isset($this->fixTransliterateForRemoveDiacritics[$to_add])) { + $replacement = $this->fixTransliterateForRemoveDiacritics[$to_add]; + } } $result .= $replacement; diff --git a/core/modules/search/search.post_update.php b/core/modules/search/search.post_update.php index 83f16fac5dc3fa62c9c25bff6e3059bd251076e6..8cad76e8a15cadedbebd99378cb36ffb98222fbb 100644 --- a/core/modules/search/search.post_update.php +++ b/core/modules/search/search.post_update.php @@ -13,3 +13,14 @@ function search_removed_post_updates() { 'search_post_update_block_page' => '9.0.0', ]; } + +/** + * Mark everything for reindexing after diacritics removal rule change. + */ +function search_post_update_reindex_after_diacritics_rule_change() { + $search_page_repository = \Drupal::service('search.search_page_repository'); + foreach ($search_page_repository->getIndexableSearchPages() as $entity) { + $entity->getPlugin()->markForReindex(); + } + return t("Content has been marked for re-indexing for all active search pages. Searching will continue to work, but new content won't be indexed until all existing content has been re-indexed."); +} diff --git a/core/tests/Drupal/Tests/Component/Transliteration/PhpTransliterationTest.php b/core/tests/Drupal/Tests/Component/Transliteration/PhpTransliterationTest.php index 82c5ed0cec0429895367ff149dbeeca086d5b49d..db9f4ee159d2b7e4856a28d942fd25be9944a395 100644 --- a/core/tests/Drupal/Tests/Component/Transliteration/PhpTransliterationTest.php +++ b/core/tests/Drupal/Tests/Component/Transliteration/PhpTransliterationTest.php @@ -58,8 +58,8 @@ public function providerTestPhpTransliterationRemoveDiacritics() { // Test all characters in the Unicode range 0x01CD to 0x024F. ['ǍǎǏ', 'AaI'], ['ǐǑǒǓǔǕǖǗǘǙǚǛǜǝǞǟ', 'iOoUuUuUuUuUuǝAa'], - ['ǠǡǢǣǤǥǦǧǨǩǪǫǬǭǮǯ', 'AaǢǣGgGgKkOoOoǮǯ'], - ['ǰDZDzdzǴǵǶǷǸǹǺǻǼǽǾǿ', 'jDZDzdzGgǶǷNnAaǼǽOo'], + ['ǠǡǢǣǤǥǦǧǨǩǪǫǬǭǮǯ', 'AaÆæGgGgKkOoOoƷʒ'], + ['ǰDZDzdzǴǵǶǷǸǹǺǻǼǽǾǿ', 'jDZDzdzGgǶǷNnAaÆæOo'], ['ȀȁȂȃȄȅȆȇȈȉȊȋȌȍȎȏ', 'AaAaEeEeIiIiOoOo'], ['ȐȑȒȓȔȕȖȗȘșȚțȜȝȞȟ', 'RrRrUuUuSsTtȜȝHh'], ['ȠȡȢȣȤȥȦȧȨȩȪȫȬȭȮȯ', 'ȠȡȢȣZzAaEeOoOoOo'],