Commit 4faaa851 authored by Thomas Seidl's avatar Thomas Seidl
Browse files

Issue #3253986 by drunken monkey, gaddman: Fixed empty "ignore characters"...

Issue #3253986 by drunken monkey, gaddman: Fixed empty "ignore characters" setting for the Tokenizer processor.
parent 8636e729
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
Search API 1.x, dev (xxxx-xx-xx):
---------------------------------
- #3253986 by drunken monkey, gaddman: Fixed empty "ignore characters" setting
  for the Tokenizer processor.
- #3197050 by BAHbKA, drunken monkey: Fixed caching issue for facets on AJAX
  views.
- #3246615 by drunken monkey: Fixed error when saving an unindexed translation
+9 −12
Original line number Diff line number Diff line
@@ -78,7 +78,7 @@ class Tokenizer extends FieldsProcessorPluginBase {
    $form['ignored'] = [
      '#type' => 'textfield',
      '#title' => $this->t('Ignored characters'),
      '#description' => $this->t('Specify the characters that should be removed prior to processing. Dots, dashes, and underscores are ignored by default to allow meaningful search behavior with acronyms and URLs. Specify the characters as the inside of a <a href=":pcre-url">PCRE character class</a>.', $args),
      '#description' => $this->t('Specify the characters that should be removed prior to processing, as the inside of a <a href=":pcre-url">PCRE character class</a>.', $args),
      '#default_value' => $this->configuration['ignored'],
    ];

@@ -256,11 +256,13 @@ class Tokenizer extends FieldsProcessorPluginBase {
    // Readable regular expression: "([number]+)[punctuation]+(?=[number])".
    $text = preg_replace('/([' . $this->getPregClassNumbers() . ']+)[' . $this->getPregClassPunctuation() . ']+(?=[' . $this->getPregClassNumbers() . '])/u', '\1', $text);

    if ($this->ignored !== '') {
      // A group of multiple ignored characters is still treated as whitespace.
      $text = preg_replace('/[' . $this->ignored . ']{2,}/u', ' ', $text);

      // Remove all other instances of ignored characters.
      $text = preg_replace('/[' . $this->ignored . ']+/u', '', $text);
    }

    // Finally, convert all characters we want to treat as word boundaries to
    // plain spaces.
@@ -337,7 +339,7 @@ class Tokenizer extends FieldsProcessorPluginBase {
  }

  /**
   * Prepares the processor by setting the $spaces property.
   * Prepares the processor by setting the $spaces and $ignored properties.
   */
  protected function prepare() {
    if (!isset($this->spaces)) {
@@ -349,13 +351,8 @@ class Tokenizer extends FieldsProcessorPluginBase {
      }
    }
    if (!isset($this->ignored)) {
      if ($this->configuration['ignored'] !== '') {
      $this->ignored = str_replace('/', '\/', $this->configuration['ignored']);
    }
      else {
        $this->ignored = '._-';
      }
    }
  }

}
+8 −0
Original line number Diff line number Diff line
@@ -137,6 +137,14 @@ class TokenizerTest extends UnitTestCase {
        [Utility::createTextToken('foobr')],
        ['ignored' => 'a'],
      ],
      [
        'foo-bar',
        [Utility::createTextToken('foo-bar')],
        [
          'ignored' => '',
          'spaces' => ' ',
        ],
      ],
      // Test multiple ignored characters are still treated as word boundary.
      [
        'foobar',