Commit 0273daf0 authored by Michael Vanetta's avatar Michael Vanetta Committed by Thomas Seidl
Browse files

Issue #3304560 by recrit, Gertlor, drunken monkey: Fixed problems with...

Issue #3304560 by recrit, Gertlor, drunken monkey: Fixed problems with "Multiple terms" parse mode and invalid UTF-8 strings.
parent 84f6f6bd
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
Search API 1.x, dev (xxxx-xx-xx):
---------------------------------
- #3304560 by recrit, Gertlor, drunken monkey: Fixed problems with "Multiple
  terms" parse mode and invalid UTF-8 strings.
- #3327383 by unstatu, drunken monkey: Fixed handling of the "view any
  unpublished content" permission in the "Content access" processor.
- #3363208 by drunken monkey: Fixed implicit conversion from float to int in
+9 −4
Original line number Diff line number Diff line
@@ -2,6 +2,7 @@

namespace Drupal\search_api\Plugin\search_api\parse_mode;

use Drupal\Component\Utility\Unicode;
use Drupal\search_api\ParseMode\ParseModePluginBase;

/**
@@ -19,17 +20,21 @@ class Terms extends ParseModePluginBase {
   * {@inheritdoc}
   */
  public function parseInput($keys) {
    $ret = [
      '#conjunction' => $this->getConjunction(),
    ];

    if (!Unicode::validateUtf8($keys)) {
      return $ret;
    }
    // Split the keys into tokens. Any whitespace is considered as a delimiter
    // for tokens. This covers ASCII white spaces as well as multi-byte "spaces"
    // which for example are common in Japanese.
    $tokens = preg_split('/\s+/u', $keys);
    $tokens = preg_split('/\s+/u', $keys) ?: [];
    $quoted = FALSE;
    $negated = FALSE;
    $phrase_contents = [];

    $ret = [
      '#conjunction' => $this->getConjunction(),
    ];
    foreach ($tokens as $token) {
      // Ignore empty tokens. (Also helps keep the following code simpler.)
      if ($token === '') {
+7 −1
Original line number Diff line number Diff line
@@ -2,6 +2,7 @@

namespace Drupal\search_api\Plugin\views\filter;

use Drupal\Component\Utility\Unicode;
use Drupal\Core\Form\FormStateInterface;
use Drupal\search_api\Entity\Index;
use Drupal\search_api\ParseMode\ParseModePluginManager;
@@ -347,12 +348,17 @@ class SearchApiFulltext extends FilterPluginBase {
      return;
    }

    if (!Unicode::validateUtf8($input)) {
      $msg = $this->t('Invalid input.');
      $form_state->setErrorByName($identifier, $msg);
    }

    // Only continue if there is a minimum word length set.
    if ($this->options['min_length'] < 2) {
      return;
    }

    $words = preg_split('/\s+/', $input);
    $words = preg_split('/\s+/', $input) ?: [];
    foreach ($words as $i => $word) {
      if (mb_strlen($word) < $this->options['min_length']) {
        unset($words[$i]);
+10 −0
Original line number Diff line number Diff line
@@ -136,4 +136,14 @@ class TermsParseModeTest extends UnitTestCase {
    ];
  }

  /**
   * Tests that invalid UTF-8 in the input string is handled correctly.
   */
  public function testInvalidInput(): void {
    $parsed = $this->plugin->parseInput("\xc3\x28");
    $this->assertEquals([
      '#conjunction' => 'AND',
    ], $parsed);
  }

}