Verified Commit 687473d5 authored by Alex Pott's avatar Alex Pott
Browse files

Issue #3239472 by stefanos.petrakis, danflanagan8, sjerdo, kporras07, alorenc,...

Issue #3239472 by stefanos.petrakis, danflanagan8, sjerdo, kporras07, alorenc, borutpiletic, tanc, benjifisher, alexpott, Kristen Pol, larowlan, longwave: preg_split in _filter_url breaks for long html tags

(cherry picked from commit e7ded380)
parent 7ae9645c
Loading
Loading
Loading
Loading
+44 −36
Original line number Diff line number Diff line
@@ -447,6 +447,9 @@ function template_preprocess_filter_tips(&$variables) {
 * expression that could possibly match all of the cases in one pass.
 */
function _filter_url($text, $filter) {
  // Store the current text in case any of the preg_* functions fail.
  $saved_text = $text;

  // Tags to skip and not recurse into.
  $ignore_tags = 'a|script|style|code|pre';

@@ -523,10 +526,13 @@ function _filter_url($text, $filter) {
    // markup, especially a '>'. Therefore, remove all comment contents and add
    // them back later.
    _filter_url_escape_comments('', TRUE);
    $text = preg_replace_callback('`<!--(.*?)-->`s', '_filter_url_escape_comments', $text);
    $text = is_null($text) ? '' : preg_replace_callback('`<!--(.*?)-->`s', '_filter_url_escape_comments', $text);

    // Split at all tags; ensures that no tags or attributes are processed.
    $chunks = preg_split('/(<.+?>)/is', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
    $chunks = is_null($text) ? [''] : preg_split('/(<.+?>)/is', $text, -1, PREG_SPLIT_DELIM_CAPTURE);

    // Do not attempt to convert links into URLs if preg_split() fails.
    if ($chunks !== FALSE) {
      // PHP ensures that the array consists of alternating delimiters and
      // literals, and begins and ends with a literal (inserting NULL as
      // required). Therefore, the first chunk is always text:
@@ -535,7 +541,6 @@ function _filter_url($text, $filter) {
      // removed when the closing tag is found. Until the closing tag is found,
      // no replacements are made.
      $open_tag = '';

      for ($i = 0; $i < count($chunks); $i++) {
        if ($chunk_type == 'text') {
          // Only process this text if there are no unclosed $ignore_tags.
@@ -567,12 +572,15 @@ function _filter_url($text, $filter) {
      }

      $text = implode($chunks);
    }

    // Revert to the original comment contents
    _filter_url_escape_comments('', FALSE);
    $text = preg_replace_callback('`<!--(.*?)-->`', '_filter_url_escape_comments', $text);
    $text = $text ? preg_replace_callback('`<!--(.*?)-->`', '_filter_url_escape_comments', $text) : $text;
  }

  return $text;
  // If there is no text at this point revert to the previous text.
  return strlen((string) $text) > 0 ? $text : $saved_text;
}

/**
+19 −0
Original line number Diff line number Diff line
@@ -879,6 +879,7 @@ public function assertFilteredString(FilterInterface $filter, array $tests): voi
   *   comments.
   * - Empty HTML tags (BR, IMG).
   * - Mix of absolute and partial URLs, and email addresses in one content.
   * - Input that exceeds PCRE backtracking limit.
   */
  public function testUrlFilterContent() {
    // Get FilterUrl object.
@@ -894,6 +895,24 @@ public function testUrlFilterContent() {
    $expected = file_get_contents($path . '/filter.url-output.txt');
    $result = _filter_url($input, $filter);
    $this->assertSame($expected, $result, 'Complex HTML document was correctly processed.');

    $pcre_backtrack_limit = ini_get('pcre.backtrack_limit');
    // Setting this limit to the smallest possible value should cause PCRE
    // errors and break the various preg_* functions used by _filter_url().
    ini_set('pcre.backtrack_limit', 1);

    // If PCRE errors occur, _filter_url() should return the exact same text.
    // Case of a small and simple HTML document.
    $input = $expected = '<p>www.test.com</p>';
    $result = _filter_url($input, $filter);
    $this->assertSame($expected, $result, 'Simple HTML document was left intact when PCRE errors occurred.');
    // Case of a complex HTML document.
    $input = $expected = file_get_contents($path . '/filter.url-input.txt');
    $result = _filter_url($input, $filter);
    $this->assertSame($expected, $result, 'Complex HTML document was left intact when PCRE errors occurred.');

    // Setting limit back to default.
    ini_set('pcre.backtrack_limit', $pcre_backtrack_limit);
  }

  /**