Issue #3253986 by drunken monkey, gaddman: Fixed empty "ignore characters"... (4faaa851) · Commits · project / search_api

CHANGELOG.txt

+2 −0

Original line number	Diff line number	Diff line
		Search API 1.x, dev (xxxx-xx-xx):
		---------------------------------
		- #3253986 by drunken monkey, gaddman: Fixed empty "ignore characters" setting
		for the Tokenizer processor.
		- #3197050 by BAHbKA, drunken monkey: Fixed caching issue for facets on AJAX
		views.
		- #3246615 by drunken monkey: Fixed error when saving an unindexed translation

src/Plugin/search_api/processor/Tokenizer.php

+9 −12

Original line number	Diff line number	Diff line
		@@ -78,7 +78,7 @@ class Tokenizer extends FieldsProcessorPluginBase {
		$form['ignored'] = [
		'#type' => 'textfield',
		'#title' => $this->t('Ignored characters'),
		'#description' => $this->t('Specify the characters that should be removed prior to processing. Dots, dashes, and underscores are ignored by default to allow meaningful search behavior with acronyms and URLs. Specify the characters as the inside of a <a href=":pcre-url">PCRE character class</a>.', $args),
		'#description' => $this->t('Specify the characters that should be removed prior to processing, as the inside of a <a href=":pcre-url">PCRE character class</a>.', $args),
		'#default_value' => $this->configuration['ignored'],
		];

		@@ -256,11 +256,13 @@ class Tokenizer extends FieldsProcessorPluginBase {
		// Readable regular expression: "([number]+)[punctuation]+(?=[number])".
		$text = preg_replace('/([' . $this->getPregClassNumbers() . ']+)[' . $this->getPregClassPunctuation() . ']+(?=[' . $this->getPregClassNumbers() . '])/u', '\1', $text);

		if ($this->ignored !== '') {
		// A group of multiple ignored characters is still treated as whitespace.
		$text = preg_replace('/[' . $this->ignored . ']{2,}/u', ' ', $text);

		// Remove all other instances of ignored characters.
		$text = preg_replace('/[' . $this->ignored . ']+/u', '', $text);
		}

		// Finally, convert all characters we want to treat as word boundaries to
		// plain spaces.
		@@ -337,7 +339,7 @@ class Tokenizer extends FieldsProcessorPluginBase {
		}

		/**
		* Prepares the processor by setting the $spaces property.
		* Prepares the processor by setting the $spaces and $ignored properties.
		*/
		protected function prepare() {
		if (!isset($this->spaces)) {
		@@ -349,13 +351,8 @@ class Tokenizer extends FieldsProcessorPluginBase {
		}
		}
		if (!isset($this->ignored)) {
		if ($this->configuration['ignored'] !== '') {
		$this->ignored = str_replace('/', '\/', $this->configuration['ignored']);
		}
		else {
		$this->ignored = '._-';
		}
		}
		}

		}

tests/src/Unit/Processor/TokenizerTest.php

+8 −0

Original line number	Diff line number	Diff line
		@@ -137,6 +137,14 @@ class TokenizerTest extends UnitTestCase {
		[Utility::createTextToken('foobr')],
		['ignored' => 'a'],
		],
		[
		'foo-bar',
		[Utility::createTextToken('foo-bar')],
		[
		'ignored' => '',
		'spaces' => ' ',
		],
		],
		// Test multiple ignored characters are still treated as word boundary.
		[
		'foobar',