Commit 82727ed8 authored by Dries's avatar Dries

- Patch #352359 by Damien Tournoud: provided unit tests for the unicode...

- Patch #352359 by Damien Tournoud: provided unit tests for the unicode library and fixed some bugs.
parent ba9f2041
......@@ -471,13 +471,13 @@ function drupal_substr($text, $start, $length = NULL) {
}
else {
$strlen = strlen($text);
// Find the starting byte offset
// Find the starting byte offset.
$bytes = 0;
if ($start > 0) {
// Count all the continuation bytes from the start until we have found
// $start characters
// $start characters or the end of the string.
$bytes = -1; $chars = -1;
while ($bytes < $strlen && $chars < $start) {
while ($bytes < $strlen - 1 && $chars < $start) {
$bytes++;
$c = ord($text[$bytes]);
if ($c < 0x80 || $c >= 0xC0) {
......@@ -487,7 +487,7 @@ function drupal_substr($text, $start, $length = NULL) {
}
elseif ($start < 0) {
// Count all the continuation bytes from the end until we have found
// abs($start) characters
// abs($start) characters.
$start = abs($start);
$bytes = $strlen; $chars = 0;
while ($bytes > 0 && $chars < $start) {
......@@ -500,37 +500,48 @@ function drupal_substr($text, $start, $length = NULL) {
}
$istart = $bytes;
// Find the ending byte offset
// Find the ending byte offset.
if ($length === NULL) {
$bytes = $strlen - 1;
$iend = $strlen;
}
elseif ($length > 0) {
// Count all the continuation bytes from the starting index until we have
// found $length + 1 characters. Then backtrack one byte.
$bytes = $istart; $chars = 0;
while ($bytes < $strlen && $chars < $length) {
$bytes++;
$c = ord($text[$bytes]);
// found $length characters or reached the end of the string, then
// backtrace one byte.
$iend = $istart - 1; $chars = -1;
while ($iend < $strlen - 1 && $chars < $length) {
$iend++;
$c = ord($text[$iend]);
if ($c < 0x80 || $c >= 0xC0) {
$chars++;
}
}
$bytes--;
// Backtrace one byte if the end of the string was not reached.
if ($iend < $strlen - 1) {
$iend--;
}
}
elseif ($length < 0) {
// Count all the continuation bytes from the end until we have found
// abs($length) characters
// abs($start) characters, then backtrace one byte.
$length = abs($length);
$bytes = $strlen - 1; $chars = 0;
while ($bytes >= 0 && $chars < $length) {
$c = ord($text[$bytes]);
$iend = $strlen; $chars = 0;
while ($iend > 0 && $chars < $length) {
$iend--;
$c = ord($text[$iend]);
if ($c < 0x80 || $c >= 0xC0) {
$chars++;
}
$bytes--;
}
// Backtrace one byte if we are not at the begining of the string.
if ($iend > 0) {
$iend--;
}
}
else {
// $length == 0, return an empty string.
$iend = $istart - 1;
}
$iend = $bytes;
return substr($text, $istart, max(0, $iend - $istart + 1));
}
......
<?php
// $Id $
/**
* @file
* Various unicode handling tests.
*/
/**
* Test unicode handling features implemented in unicode.inc.
*/
class UnicodeUnitTest extends DrupalWebTestCase {
/**
* Whether to run the extended version of the tests (including non latin1 characters).
*
* @var boolean
*/
protected $extendedMode = FALSE;
function getInfo() {
return array(
'name' => t('Unicode handling'),
'description' => t('Tests Drupal Unicode handling.'),
'group' => t('System'),
);
}
/**
* Test full unicode features implemented using the mbstring extension.
*/
function testMbStringUnicode() {
global $multibyte;
// mbstring was not detected on this installation, there is no way to test
// multibyte features. Treat that as an exception.
if ($multibyte == UNICODE_SINGLEBYTE) {
$this->error(t('Unable to test Multibyte features: mbstring extension was not detected.'));
}
$multibyte = UNICODE_MULTIBYTE;
$this->extendedMode = TRUE;
$this->pass(t('Testing in mbstring mode'));
$this->helperTestStrToLower();
$this->helperTestStrToUpper();
$this->helperTestUcFirst();
$this->helperTestStrLen();
$this->helperTestSubStr();
}
/**
* Test emulated unicode features.
*/
function testEmulatedUnicode() {
global $multibyte;
$multibyte = UNICODE_SINGLEBYTE;
$this->extendedMode = FALSE;
$this->pass(t('Testing in emulated (best-effort) mode'));
$this->helperTestStrToLower();
$this->helperTestStrToUpper();
$this->helperTestUcFirst();
$this->helperTestStrLen();
$this->helperTestSubStr();
}
function helperTestStrToLower() {
$testcase = array(
'tHe QUIcK bRoWn' => 'the quick brown',
'FrançAIS is ÜBER-åwesome' => 'français is über-åwesome',
);
if ($this->extendedMode) {
$testcase['ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ'] = 'αβγδεζηθικλμνξοσὠ';
}
foreach ($testcase as $input => $output) {
$this->assertEqual(drupal_strtolower($input), $output, t('%input is lowercased as %output', array('%input' => $input, '%output' => $output)));
}
}
function helperTestStrToUpper() {
$testcase = array(
'tHe QUIcK bRoWn' => 'THE QUICK BROWN',
'FrançAIS is ÜBER-åwesome' => 'FRANÇAIS IS ÜBER-ÅWESOME',
);
if ($this->extendedMode) {
$testcase['αβγδεζηθικλμνξοσὠ'] = 'ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ';
}
foreach ($testcase as $input => $output) {
$this->assertEqual(drupal_strtoupper($input), $output, t('%input is uppercased as %output', array('%input' => $input, '%output' => $output)));
}
}
function helperTestUcFirst() {
$testcase = array(
'tHe QUIcK bRoWn' => 'THe QUIcK bRoWn',
'françAIS' => 'FrançAIS',
'über' => 'Über',
'åwesome' => 'Åwesome'
);
if ($this->extendedMode) {
$testcase['σion'] = 'Σion';
}
foreach ($testcase as $input => $output) {
$this->assertEqual(drupal_ucfirst($input), $output, t('%input is ucfirst-ed as %output', array('%input' => $input, '%output' => $output)));
}
}
function helperTestStrLen() {
$testcase = array(
'tHe QUIcK bRoWn' => 15,
'ÜBER-åwesome' => 12,
);
foreach ($testcase as $input => $output) {
$this->assertEqual(drupal_strlen($input), $output, t('%input length is %output', array('%input' => $input, '%output' => $output)));
}
}
function helperTestSubStr() {
$testcase = array(
// 012345678901234567890123
array('frànçAIS is über-åwesome', 0, 1,
'f'),
array('frànçAIS is über-åwesome', 0, 8,
'frànçAIS'),
array('frànçAIS is über-åwesome', 0, 100,
'frànçAIS is über-åwesome'),
array('frànçAIS is über-åwesome', 4, 4,
'çAIS'),
array('frànçAIS is über-åwesome', 1, 0,
''),
array('frànçAIS is über-åwesome', 100, 0,
''),
array('frànçAIS is über-åwesome', -4, 2,
'so'),
array('frànçAIS is über-åwesome', -7, 10,
'åwesome'),
array('frànçAIS is über-åwesome', 5, -10,
'AIS is üb'),
);
foreach ($testcase as $test) {
list($input, $start, $length, $output) = $test;
$this->assertEqual(drupal_substr($input, $start, $length), $output, t('%input substring-ed at offset %offset for %length characters is %output', array('%input' => $input, '%offset' => $start, '%length' => $length, '%output' => $output)));
}
}
/**
* Test decode_entities().
*/
function testDecodeEntities() {
$testcase = array(
'Drupal' => 'Drupal',
'<script>' => '<script>',
'&lt;script&gt;' => '<script>',
'&amp;lt;script&amp;gt;' => '&lt;script&gt;',
'"' => '"',
'&#34;' => '"',
'&amp;#34;' => '&#34;',
'&quot;' => '"',
'&amp;quot;' => '&quot;',
"'" => "'",
'&#39;' => "'",
'&amp;#39;' => '&#39;',
'©' => '©',
'&copy;' => '©',
'&#169;' => '©',
'→' => '→',
'&#8594;' => '→',
'➼' => '➼',
'&#10172;' => '➼',
);
foreach ($testcase as $input => $output) {
$this->assertEqual(decode_entities($input), $output, t('Make sure the decoded entity of @input is @output', array('@input' => $input, '@output' => $output)));
}
}
function testDecodeEntitiesExclusion() {
$testcase = array(
'Drupal' => 'Drupal',
'<script>' => '<script>',
'&lt;script&gt;' => '&lt;script>',
'&amp;lt;script&amp;gt;' => '&amp;lt;script&amp;gt;',
'"' => '"',
'&#34;' => '&#34;',
'&amp;#34;' => '&amp;#34;',
'&quot;' => '&quot;',
'&amp;quot;' => '&amp;quot;',
"'" => "'",
'&#39;' => "'",
'&amp;#39;' => '&amp;#39;',
'©' => '©',
'&copy;' => '©',
'&#169;' => '©',
'→' => '→',
'&#8594;' => '→',
'➼' => '➼',
'&#10172;' => '➼',
);
$exclude = array('<', '&', '"');
foreach ($testcase as $input => $output) {
$this->assertIdentical(decode_entities($input, $exclude), $output, t('Make sure the decoded entity of %input, excluding %excludes, is %output', array('%input' => $input, '%excludes' => implode(',', $exclude), '%output' => $output)));
}
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment