Verified Commit ed5ff2b3 authored by godotislate's avatar godotislate
Browse files

fix: #3564713 Search module fails to handle HTML tags with whitespace between...

fix: #3564713 Search module fails to handle HTML tags with whitespace between tag name and attributes

By: u7aro
By: smustgrave
By: godotislate
(cherry picked from commit 7bef38be)
parent f290315a
Loading
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -83,7 +83,7 @@ public function index($type, $sid, $langcode, $text, $update_weights = TRUE) {
    foreach ($split as $value) {
      if ($tag) {
        // Increase or decrease score per word based on tag.
        [$tagname] = explode(' ', $value, 2);
        [$tagname] = preg_split('/\s+/', $value, 2);
        $tagname = mb_strtolower($tagname);
        // Closing or opening tag?
        if ($tagname[0] == '/') {
+51 −0
Original line number Diff line number Diff line
@@ -48,6 +48,57 @@ public function testMatching(): void {
    $this->_testQueries();
  }

  /**
   * Tests HTML tags with whitespace characters are parsed correctly.
   */
  public function testHTMLTagsWithWhitespace(): void {
    $this->config('search.settings')->set('index.minimum_word_size', 3)->save();

    $search_index = \Drupal::service('search.index');
    assert($search_index instanceof SearchIndexInterface);
    $langcode = LanguageInterface::LANGCODE_NOT_SPECIFIED;

    // Test case 1: inline anchor tag.
    $search_index->index(static::SEARCH_TYPE, 101, $langcode,
      '<a href="https://example.com/">Drupal Rocks</a>');

    // Test case 2: anchor tag with newlines.
    $search_index->index(static::SEARCH_TYPE, 102, $langcode,
      '<a' . "\n" . '  href="https://example.com/"' . "\n" . '>Drupal Rocks</a>');

    // Test case 3: anchor tag with tab character.
    $search_index->index(static::SEARCH_TYPE, 103, $langcode,
      '<a' . "\t" . 'href="https://example.com/">Drupal Rocks</a>');

    // Test case 4: no tag (control).
    $search_index->index(static::SEARCH_TYPE, 104, $langcode, 'Drupal Rocks');

    // Perform search for 'rocks'.
    $connection = Database::getConnection();
    $result = $connection->select('search_index', 'i')
      ->extend(SearchQuery::class)
      ->searchExpression('rocks', static::SEARCH_TYPE)
      ->execute();

    $set = $result ? $result->fetchAll() : [];

    // Build scores map.
    $scores = [];
    foreach ($set as $item) {
      $scores[$item->sid] = $item->calculated_score;
    }

    // Verify all items found.
    $this->assertCount(4, $scores);

    // Items with anchor tags should have same score.
    $this->assertEquals($scores[101], $scores[102]);
    $this->assertEquals($scores[101], $scores[103]);

    // Anchor tag items should score higher than no-tag item.
    $this->assertGreaterThan($scores[104], $scores[101]);
  }

  /**
   * Set up a small index of items to test against.
   */