Select Git revision
UnicodeTest.php

#2866894 by maxocub: Component tests should not use...
Lee Rowlands authored
Issue #2866894 by maxocub: Component tests should not use Drupal\Tests\UnitTestCase but PHPUnit\Framework\TestCase
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
UnicodeTest.php 18.26 KiB
<?php
namespace Drupal\Tests\Component\Utility;
use Drupal\Component\Utility\Unicode;
use PHPUnit\Framework\TestCase;
/**
* Test unicode handling features implemented in Unicode component.
*
* @group Utility
*
* @coversDefaultClass \Drupal\Component\Utility\Unicode
*/
class UnicodeTest extends TestCase {
/**
* {@inheritdoc}
*
* @covers ::check
*/
protected function setUp() {
// Initialize unicode component.
Unicode::check();
}
/**
* Getting and settings the multibyte environment status.
*
* @dataProvider providerTestStatus
* @covers ::getStatus
* @covers ::setStatus
*/
public function testStatus($value, $expected, $invalid = FALSE) {
if ($invalid) {
$this->setExpectedException('InvalidArgumentException');
}
Unicode::setStatus($value);
$this->assertEquals($expected, Unicode::getStatus());
}
/**
* Data provider for testStatus().
*
* @see testStatus()
*
* @return array
* An array containing:
* - The status value to set.
* - The status value to expect after setting the new value.
* - (optional) Boolean indicating invalid status. Defaults to FALSE.
*/
public function providerTestStatus() {
return [
[Unicode::STATUS_SINGLEBYTE, Unicode::STATUS_SINGLEBYTE],
[rand(10, 100), Unicode::STATUS_SINGLEBYTE, TRUE],
[rand(10, 100), Unicode::STATUS_SINGLEBYTE, TRUE],
[Unicode::STATUS_MULTIBYTE, Unicode::STATUS_MULTIBYTE],
[rand(10, 100), Unicode::STATUS_MULTIBYTE, TRUE],
[Unicode::STATUS_ERROR, Unicode::STATUS_ERROR],
[Unicode::STATUS_MULTIBYTE, Unicode::STATUS_MULTIBYTE],
];
}
/**
* Tests multibyte encoding and decoding.
*
* @dataProvider providerTestMimeHeader
* @covers ::mimeHeaderEncode
* @covers ::mimeHeaderDecode
*/
public function testMimeHeader($value, $encoded) {
$this->assertEquals($encoded, Unicode::mimeHeaderEncode($value));
$this->assertEquals($value, Unicode::mimeHeaderDecode($encoded));
}
/**
* Data provider for testMimeHeader().
*
* @see testMimeHeader()
*
* @return array
* An array containing a string and its encoded value.
*/
public function providerTestMimeHeader() {
return [
['tést.txt', '=?UTF-8?B?dMOpc3QudHh0?='],
// Simple ASCII characters.
['ASCII', 'ASCII'],
];
}
/**
* Tests multibyte strtolower.
*
* @dataProvider providerStrtolower
* @covers ::strtolower
* @covers ::caseFlip
*/
public function testStrtolower($text, $expected, $multibyte = FALSE) {
$status = $multibyte ? Unicode::STATUS_MULTIBYTE : Unicode::STATUS_SINGLEBYTE;
Unicode::setStatus($status);
$this->assertEquals($expected, Unicode::strtolower($text));
}
/**
* Data provider for testStrtolower().
*
* @see testStrtolower()
*
* @return array
* An array containing a string, its lowercase version and whether it should
* be processed as multibyte.
*/
public function providerStrtolower() {
$cases = [
['tHe QUIcK bRoWn', 'the quick brown'],
['FrançAIS is ÜBER-åwesome', 'français is über-åwesome'],
];
foreach ($cases as $case) {
// Test the same string both in multibyte and singlebyte conditions.
array_push($case, TRUE);
$cases[] = $case;
}
// Add a multibyte string.
$cases[] = ['ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ', 'αβγδεζηθικλμνξοσὠ', TRUE];
return $cases;
}
/**
* Tests multibyte strtoupper.
*
* @dataProvider providerStrtoupper
* @covers ::strtoupper
* @covers ::caseFlip
*/
public function testStrtoupper($text, $expected, $multibyte = FALSE) {
$status = $multibyte ? Unicode::STATUS_MULTIBYTE : Unicode::STATUS_SINGLEBYTE;
Unicode::setStatus($status);
$this->assertEquals($expected, Unicode::strtoupper($text));
}
/**
* Data provider for testStrtoupper().
*
* @see testStrtoupper()
*
* @return array
* An array containing a string, its uppercase version and whether it should
* be processed as multibyte.
*/
public function providerStrtoupper() {
$cases = [
['tHe QUIcK bRoWn', 'THE QUICK BROWN'],
['FrançAIS is ÜBER-åwesome', 'FRANÇAIS IS ÜBER-ÅWESOME'],
];
foreach ($cases as $case) {
// Test the same string both in multibyte and singlebyte conditions.
array_push($case, TRUE);
$cases[] = $case;
}
// Add a multibyte string.
$cases[] = ['αβγδεζηθικλμνξοσὠ', 'ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ', TRUE];
return $cases;
}
/**
* Tests multibyte ucfirst.
*
* @dataProvider providerUcfirst
* @covers ::ucfirst
*/
public function testUcfirst($text, $expected) {
$this->assertEquals($expected, Unicode::ucfirst($text));
}
/**
* Data provider for testUcfirst().
*
* @see testUcfirst()
*
* @return array
* An array containing a string and its uppercase first version.
*/
public function providerUcfirst() {
return [
['tHe QUIcK bRoWn', 'THe QUIcK bRoWn'],
['françAIS', 'FrançAIS'],
['über', 'Über'],
['åwesome', 'Åwesome'],
// A multibyte string.
['σion', 'Σion'],
];
}
/**
* Tests multibyte lcfirst.
*
* @dataProvider providerLcfirst
* @covers ::lcfirst
*/
public function testLcfirst($text, $expected, $multibyte = FALSE) {
$status = $multibyte ? Unicode::STATUS_MULTIBYTE : Unicode::STATUS_SINGLEBYTE;
Unicode::setStatus($status);
$this->assertEquals($expected, Unicode::lcfirst($text));
}
/**
* Data provider for testLcfirst().
*
* @see testLcfirst()
*
* @return array
* An array containing a string, its lowercase version and whether it should
* be processed as multibyte.
*/
public function providerLcfirst() {
return [
['tHe QUIcK bRoWn', 'tHe QUIcK bRoWn'],
['FrançAIS is ÜBER-åwesome', 'françAIS is ÜBER-åwesome'],
['Über', 'über'],
['Åwesome', 'åwesome'],
// Add a multibyte string.
['ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ', 'αΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ', TRUE],
];
}
/**
* Tests multibyte ucwords.
*
* @dataProvider providerUcwords
* @covers ::ucwords
*/
public function testUcwords($text, $expected, $multibyte = FALSE) {
$status = $multibyte ? Unicode::STATUS_MULTIBYTE : Unicode::STATUS_SINGLEBYTE;
Unicode::setStatus($status);
$this->assertEquals($expected, Unicode::ucwords($text));
}
/**
* Data provider for testUcwords().
*
* @see testUcwords()
*
* @return array
* An array containing a string, its capitalized version and whether it should
* be processed as multibyte.
*/
public function providerUcwords() {
return [
['tHe QUIcK bRoWn', 'THe QUIcK BRoWn'],
['françAIS', 'FrançAIS'],
['über', 'Über'],
['åwesome', 'Åwesome'],
// Make sure we don't mangle extra spaces.
['frànçAIS is über-åwesome', 'FrànçAIS Is Über-Åwesome'],
// Add a multibyte string.
['σion', 'Σion', TRUE],
];
}
/**
* Tests multibyte strlen.
*
* @dataProvider providerStrlen
* @covers ::strlen
*/
public function testStrlen($text, $expected) {
// Run through multibyte code path.
Unicode::setStatus(Unicode::STATUS_MULTIBYTE);
$this->assertEquals($expected, Unicode::strlen($text));
// Run through singlebyte code path.
Unicode::setStatus(Unicode::STATUS_SINGLEBYTE);
$this->assertEquals($expected, Unicode::strlen($text));
}
/**
* Data provider for testStrlen().
*
* @see testStrlen()
*
* @return array
* An array containing a string and its length.
*/
public function providerStrlen() {
return [
['tHe QUIcK bRoWn', 15],
['ÜBER-åwesome', 12],
['以呂波耳・ほへとち。リヌルヲ。', 15],
];
}
/**
* Tests multibyte substr.
*
* @dataProvider providerSubstr
* @covers ::substr
*/
public function testSubstr($text, $start, $length, $expected) {
// Run through multibyte code path.
Unicode::setStatus(Unicode::STATUS_MULTIBYTE);
$this->assertEquals($expected, Unicode::substr($text, $start, $length));
// Run through singlebyte code path.
Unicode::setStatus(Unicode::STATUS_SINGLEBYTE);
$this->assertEquals($expected, Unicode::substr($text, $start, $length));
}
/**
* Data provider for testSubstr().
*
* @see testSubstr()
*
* @return array
* An array containing:
* - The string to test.
* - The start number to be processed by substr.
* - The length number to be processed by substr.
* - The expected string result.
*/
public function providerSubstr() {
return [
['frànçAIS is über-åwesome', 0, NULL, 'frànçAIS is über-åwesome'],
['frànçAIS is über-åwesome', 0, 0, ''],
['frànçAIS is über-åwesome', 0, 1, 'f'],
['frànçAIS is über-åwesome', 0, 8, 'frànçAIS'],
['frànçAIS is über-åwesome', 0, 23, 'frànçAIS is über-åwesom'],
['frànçAIS is über-åwesome', 0, 24, 'frànçAIS is über-åwesome'],
['frànçAIS is über-åwesome', 0, 25, 'frànçAIS is über-åwesome'],
['frànçAIS is über-åwesome', 0, 100, 'frànçAIS is über-åwesome'],
['frànçAIS is über-åwesome', 4, 4, 'çAIS'],
['frànçAIS is über-åwesome', 1, 0, ''],
['frànçAIS is über-åwesome', 100, 0, ''],
['frànçAIS is über-åwesome', -4, 2, 'so'],
['frànçAIS is über-åwesome', -4, 3, 'som'],
['frànçAIS is über-åwesome', -4, 4, 'some'],
['frànçAIS is über-åwesome', -4, 5, 'some'],
['frànçAIS is über-åwesome', -7, 10, 'åwesome'],
['frànçAIS is über-åwesome', 5, -10, 'AIS is üb'],
['frànçAIS is über-åwesome', 0, -10, 'frànçAIS is üb'],
['frànçAIS is über-åwesome', 0, -1, 'frànçAIS is über-åwesom'],
['frànçAIS is über-åwesome', -7, -2, 'åweso'],
['frànçAIS is über-åwesome', -7, -6, 'å'],
['frànçAIS is über-åwesome', -7, -7, ''],
['frànçAIS is über-åwesome', -7, -8, ''],
['...', 0, 2, '..'],
['以呂波耳・ほへとち。リヌルヲ。', 1, 3, '呂波耳'],
];
}
/**
* Tests multibyte truncate.
*
* @dataProvider providerTruncate
* @covers ::truncate
*/
public function testTruncate($text, $max_length, $expected, $wordsafe = FALSE, $add_ellipsis = FALSE) {
$this->assertEquals($expected, Unicode::truncate($text, $max_length, $wordsafe, $add_ellipsis));
}
/**
* Data provider for testTruncate().
*
* @see testTruncate()
*
* @return array
* An array containing:
* - The string to test.
* - The max length to truncate this string to.
* - The expected string result.
* - (optional) Boolean for the $wordsafe flag. Defaults to FALSE.
* - (optional) Boolean for the $add_ellipsis flag. Defaults to FALSE.
*/
public function providerTruncate() {
return [
['frànçAIS is über-åwesome', 24, 'frànçAIS is über-åwesome'],
['frànçAIS is über-åwesome', 23, 'frànçAIS is über-åwesom'],
['frànçAIS is über-åwesome', 17, 'frànçAIS is über-'],
['以呂波耳・ほへとち。リヌルヲ。', 6, '以呂波耳・ほ'],
['frànçAIS is über-åwesome', 24, 'frànçAIS is über-åwesome', FALSE, TRUE],
['frànçAIS is über-åwesome', 23, 'frànçAIS is über-åweso…', FALSE, TRUE],
['frànçAIS is über-åwesome', 17, 'frànçAIS is über…', FALSE, TRUE],
['123', 1, '…', TRUE, TRUE],
['123', 2, '1…', TRUE, TRUE],
['123', 3, '123', TRUE, TRUE],
['1234', 3, '12…', TRUE, TRUE],
['1234567890', 10, '1234567890', TRUE, TRUE],
['12345678901', 10, '123456789…', TRUE, TRUE],
['12345678901', 11, '12345678901', TRUE, TRUE],
['123456789012', 11, '1234567890…', TRUE, TRUE],
['12345 7890', 10, '12345 7890', TRUE, TRUE],
['12345 7890', 9, '12345…', TRUE, TRUE],
['123 567 90', 10, '123 567 90', TRUE, TRUE],
['123 567 901', 10, '123 567…', TRUE, TRUE],
['Stop. Hammertime.', 17, 'Stop. Hammertime.', TRUE, TRUE],
['Stop. Hammertime.', 16, 'Stop…', TRUE, TRUE],
['frànçAIS is über-åwesome', 24, 'frànçAIS is über-åwesome', TRUE, TRUE],
['frànçAIS is über-åwesome', 23, 'frànçAIS is über…', TRUE, TRUE],
['frànçAIS is über-åwesome', 17, 'frànçAIS is über…', TRUE, TRUE],
['¿Dónde está el niño?', 20, '¿Dónde está el niño?', TRUE, TRUE],
['¿Dónde está el niño?', 19, '¿Dónde está el…', TRUE, TRUE],
['¿Dónde está el niño?', 13, '¿Dónde está…', TRUE, TRUE],
['¿Dónde está el niño?', 10, '¿Dónde…', TRUE, TRUE],
['Help! Help! Help!', 17, 'Help! Help! Help!', TRUE, TRUE],
['Help! Help! Help!', 16, 'Help! Help!…', TRUE, TRUE],
['Help! Help! Help!', 15, 'Help! Help!…', TRUE, TRUE],
['Help! Help! Help!', 14, 'Help! Help!…', TRUE, TRUE],
['Help! Help! Help!', 13, 'Help! Help!…', TRUE, TRUE],
['Help! Help! Help!', 12, 'Help! Help!…', TRUE, TRUE],
['Help! Help! Help!', 11, 'Help! Help…', TRUE, TRUE],
['Help! Help! Help!', 10, 'Help!…', TRUE, TRUE],
['Help! Help! Help!', 9, 'Help!…', TRUE, TRUE],
['Help! Help! Help!', 8, 'Help!…', TRUE, TRUE],
['Help! Help! Help!', 7, 'Help!…', TRUE, TRUE],
['Help! Help! Help!', 6, 'Help!…', TRUE, TRUE],
['Help! Help! Help!', 5, 'Help…', TRUE, TRUE],
['Help! Help! Help!', 4, 'Hel…', TRUE, TRUE],
['Help! Help! Help!', 3, 'He…', TRUE, TRUE],
['Help! Help! Help!', 2, 'H…', TRUE, TRUE],
];
}
/**
* Tests multibyte truncate bytes.
*
* @dataProvider providerTestTruncateBytes
* @covers ::truncateBytes
*
* @param string $text
* The string to truncate.
* @param int $max_length
* The upper limit on the returned string length.
* @param string $expected
* The expected return from Unicode::truncateBytes().
*/
public function testTruncateBytes($text, $max_length, $expected) {
$this->assertEquals($expected, Unicode::truncateBytes($text, $max_length), 'The string was not correctly truncated.');
}
/**
* Provides data for self::testTruncateBytes().
*
* @return array
* An array of arrays, each containing the parameters to
* self::testTruncateBytes().
*/
public function providerTestTruncateBytes() {
return [
// String shorter than max length.
['Short string', 42, 'Short string'],
// Simple string longer than max length.
['Longer string than previous.', 10, 'Longer str'],
// Unicode.
['以呂波耳・ほへとち。リヌルヲ。', 10, '以呂波'],
];
}
/**
* Tests UTF-8 validation.
*
* @dataProvider providerTestValidateUtf8
* @covers ::validateUtf8
*
* @param string $text
* The text to validate.
* @param bool $expected
* The expected return value from Unicode::validateUtf8().
* @param string $message
* The message to display on failure.
*/
public function testValidateUtf8($text, $expected, $message) {
$this->assertEquals($expected, Unicode::validateUtf8($text), $message);
}
/**
* Provides data for self::testValidateUtf8().
*
* Invalid UTF-8 examples sourced from http://stackoverflow.com/a/11709412/109119.
*
* @return array
* An array of arrays, each containing the parameters for
* self::testValidateUtf8().
*/
public function providerTestValidateUtf8() {
return [
// Empty string.
['', TRUE, 'An empty string did not validate.'],
// Simple text string.
['Simple text.', TRUE, 'A simple ASCII text string did not validate.'],
// Invalid UTF-8, overlong 5 byte encoding.
[chr(0xF8) . chr(0x80) . chr(0x80) . chr(0x80) . chr(0x80), FALSE, 'Invalid UTF-8 was validated.'],
// High code-point without trailing characters.
[chr(0xD0) . chr(0x01), FALSE, 'Invalid UTF-8 was validated.'],
];
}
/**
* Tests UTF-8 conversion.
*
* @dataProvider providerTestConvertToUtf8
* @covers ::convertToUtf8
*
* @param string $data
* The data to be converted.
* @param string $encoding
* The encoding the data is in.
* @param string|bool $expected
* The expected result.
*/
public function testConvertToUtf8($data, $encoding, $expected) {
$this->assertEquals($expected, Unicode::convertToUtf8($data, $encoding));
}
/**
* Provides data to self::testConvertToUtf8().
*
* @return array
* An array of arrays, each containing the parameters to
* self::testConvertUtf8(). }
*/
public function providerTestConvertToUtf8() {
return [
[chr(0x97), 'Windows-1252', '—'],
[chr(0x99), 'Windows-1252', '™'],
[chr(0x80), 'Windows-1252', '€'],
];
}
/**
* Tests multibyte strpos.
*
* @dataProvider providerStrpos
* @covers ::strpos
*/
public function testStrpos($haystack, $needle, $offset, $expected) {
// Run through multibyte code path.
Unicode::setStatus(Unicode::STATUS_MULTIBYTE);
$this->assertEquals($expected, Unicode::strpos($haystack, $needle, $offset));
// Run through singlebyte code path.
Unicode::setStatus(Unicode::STATUS_SINGLEBYTE);
$this->assertEquals($expected, Unicode::strpos($haystack, $needle, $offset));
}
/**
* Data provider for testStrpos().
*
* @see testStrpos()
*
* @return array
* An array containing:
* - The haystack string to be searched in.
* - The needle string to search for.
* - The offset integer to start at.
* - The expected integer/FALSE result.
*/
public function providerStrpos() {
return [
['frànçAIS is über-åwesome', 'frànçAIS is über-åwesome', 0, 0],
['frànçAIS is über-åwesome', 'rànçAIS is über-åwesome', 0, 1],
['frànçAIS is über-åwesome', 'not in string', 0, FALSE],
['frànçAIS is über-åwesome', 'r', 0, 1],
['frànçAIS is über-åwesome', 'nçAIS', 0, 3],
['frànçAIS is über-åwesome', 'nçAIS', 2, 3],
['frànçAIS is über-åwesome', 'nçAIS', 3, 3],
['以呂波耳・ほへとち。リヌルヲ。', '波耳', 0, 2],
['以呂波耳・ほへとち。リヌルヲ。', '波耳', 1, 2],
['以呂波耳・ほへとち。リヌルヲ。', '波耳', 2, 2],
];
}
}