Commit 6a1b9324 authored by alexpott's avatar alexpott

Issue #2553351 by jhodgdon, dawehner: _search_find_match_with_simplify() is inefficient

parent 1e1a38b7
......@@ -412,8 +412,8 @@ public static function substr($text, $start, $length = NULL) {
// Find the starting byte offset.
$bytes = 0;
if ($start > 0) {
// Count all the continuation bytes from the start until we have found
// $start characters or the end of the string.
// Count all the characters except continuation bytes from the start
// until we have found $start characters or the end of the string.
$bytes = -1; $chars = -1;
while ($bytes < $strlen - 1 && $chars < $start) {
$bytes++;
......@@ -424,8 +424,8 @@ public static function substr($text, $start, $length = NULL) {
}
}
elseif ($start < 0) {
// Count all the continuation bytes from the end until we have found
// abs($start) characters.
// Count all the characters except continuation bytes from the end
// until we have found abs($start) characters.
$start = abs($start);
$bytes = $strlen; $chars = 0;
while ($bytes > 0 && $chars < $start) {
......@@ -443,9 +443,9 @@ public static function substr($text, $start, $length = NULL) {
$iend = $strlen;
}
elseif ($length > 0) {
// Count all the continuation bytes from the starting index until we have
// found $length characters or reached the end of the string, then
// backtrace one byte.
// Count all the characters except continuation bytes from the starting
// index until we have found $length characters or reached the end of
// the string, then backtrace one byte.
$iend = $istart - 1;
$chars = -1;
$last_real = FALSE;
......@@ -458,15 +458,15 @@ public static function substr($text, $start, $length = NULL) {
$last_real = TRUE;
}
}
// Backtrace one byte if the last character we found was a real character
// and we don't need it.
// Backtrace one byte if the last character we found was a real
// character and we don't need it.
if ($last_real && $chars >= $length) {
$iend--;
}
}
elseif ($length < 0) {
// Count all the continuation bytes from the end until we have found
// abs($start) characters, then backtrace one byte.
// Count all the characters except continuation bytes from the end
// until we have found abs($start) characters, then backtrace one byte.
$length = abs($length);
$iend = $strlen; $chars = 0;
while ($iend > 0 && $chars < $length) {
......@@ -697,4 +697,33 @@ public static function validateUtf8($text) {
return (preg_match('/^./us', $text) == 1);
}
/**
* Finds the position of the first occurrence of a string in another string.
*
* @param string $haystack
* The string to search in.
* @param string $needle
* The string to find in $haystack.
* @param int $offset
* If specified, start the search at this number of characters from the
* beginning (default 0).
*
* @return int|false
* The position where $needle occurs in $haystack, always relative to the
* beginning (independent of $offset), or FALSE if not found. Note that
* a return value of 0 is not the same as FALSE.
*/
public static function strpos($haystack, $needle, $offset = 0) {
if (static::getStatus() == static::STATUS_MULTIBYTE) {
return mb_strpos($haystack, $needle, $offset);
}
else {
// Remove Unicode continuation characters, to be compatible with
// Unicode::strlen() and Unicode::substr().
$haystack = preg_replace("/[\x80-\xBF]/", '', $haystack);
$needle = preg_replace("/[\x80-\xBF]/", '', $needle);
return strpos($haystack, $needle, $offset);
}
}
}
This diff is collapsed.
......@@ -291,6 +291,7 @@ public function providerStrlen() {
return array(
array('tHe QUIcK bRoWn', 15),
array('ÜBER-åwesome', 12),
array('以呂波耳・ほへとち。リヌルヲ。', 15),
);
}
......@@ -529,4 +530,46 @@ public function providerTestConvertToUtf8() {
);
}
/**
* Tests multibyte strpos.
*
* @dataProvider providerStrpos
* @covers ::strpos
*/
public function testStrpos($haystack, $needle, $offset, $expected) {
// Run through multibyte code path.
Unicode::setStatus(Unicode::STATUS_MULTIBYTE);
$this->assertEquals($expected, Unicode::strpos($haystack, $needle, $offset));
// Run through singlebyte code path.
Unicode::setStatus(Unicode::STATUS_SINGLEBYTE);
$this->assertEquals($expected, Unicode::strpos($haystack, $needle, $offset));
}
/**
* Data provider for testStrpos().
*
* @see testStrpos()
*
* @return array
* An array containing:
* - The haystack string to be searched in.
* - The needle string to search for.
* - The offset integer to start at.
* - The expected integer/FALSE result.
*/
public function providerStrpos() {
return array(
array('frànçAIS is über-åwesome', 'frànçAIS is über-åwesome', 0, 0),
array('frànçAIS is über-åwesome', 'rànçAIS is über-åwesome', 0, 1),
array('frànçAIS is über-åwesome', 'not in string', 0, FALSE),
array('frànçAIS is über-åwesome', 'r', 0, 1),
array('frànçAIS is über-åwesome', 'nçAIS', 0, 3),
array('frànçAIS is über-åwesome', 'nçAIS', 2, 3),
array('frànçAIS is über-åwesome', 'nçAIS', 3, 3),
array('以呂波耳・ほへとち。リヌルヲ。', '波耳', 0, 2),
array('以呂波耳・ほへとち。リヌルヲ。', '波耳', 1, 2),
array('以呂波耳・ほへとち。リヌルヲ。', '波耳', 2, 2),
);
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment