Commit c9ab0090 authored by alexpott's avatar alexpott

Issue #1919086 by jhodgdon: Extend transliterate() with maxlength.

parent 9eacefa1
......@@ -76,19 +76,32 @@ public function __construct($data_directory = NULL) {
}
/**
* Implements TransliterationInterface::transliterate().
* {@inheritdoc}
*/
public function transliterate($string, $langcode = 'en', $unknown_character = '?') {
public function transliterate($string, $langcode = 'en', $unknown_character = '?', $max_length = NULL) {
$result = '';
$length = 0;
// Split into Unicode characters and transliterate each one.
foreach (preg_split('//u', $string, 0, PREG_SPLIT_NO_EMPTY) as $character) {
$code = self::ordUTF8($character);
$to_add = '';
if ($code == -1) {
$result .= $unknown_character;
$to_add = $unknown_character;
}
else {
$result .= $this->replace($code, $langcode, $unknown_character);
$to_add = $this->replace($code, $langcode, $unknown_character);
}
// Check if this exceeds the maximum allowed length.
if (isset($max_length)) {
$length += strlen($to_add);
if ($length > $max_length) {
// There is no more space.
return $result;
}
}
$result .= $to_add;
}
return $result;
......
......@@ -25,10 +25,14 @@ interface TransliterationInterface {
* @param string $unknown_character
* (optional) The character to substitute for characters in $string without
* transliterated equivalents. Defaults to '?'.
* @param int $max_length
* (optional) If provided, return at most this many characters, ensuring
* that the transliteration does not split in the middle of an input
* character's transliteration.
*
* @return string
* $string with non-US-ASCII characters transliterated to US-ASCII
* characters, and unknown characters replaced with $unknown_character.
*/
public function transliterate($string, $langcode = 'en', $unknown_character = '?');
public function transliterate($string, $langcode = 'en', $unknown_character = '?', $max_length = NULL);
}
......@@ -106,5 +106,13 @@ public function testPHPTransliteration() {
'@actual' => $actual,
)));
}
// Test with max length, using German. It should never split up the
// transliteration of a single character.
$input = 'Ä Ö Ü Å Ø äöüåøhello';
$trunc_output = 'Ae Oe Ue A O aeoe';
$this->assertIdentical($trunc_output, $transliterator_service->transliterate($input, 'de', '?', 17), 'Truncating to 17 characters works');
$this->assertIdentical($trunc_output, $transliterator_service->transliterate($input, 'de', '?', 18), 'Truncating to 18 characters works');
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment