Loading CHANGELOG.txt +2 −0 Original line number Diff line number Diff line Search API 1.x, dev (xxxx-xx-xx): --------------------------------- - #3267092 by drunken monkey: Fixed stemming of content with invalid language codes. - #3352134 by drunken monkey: Fixed performance degradation with database backend in 1.29. - #3263875 by Eduardo Morales Alberti, drunken monkey, GrumpySchlag: Fixed Behat Loading src/Plugin/search_api/processor/Stemmer.php +103 −20 Original line number Diff line number Diff line Loading @@ -3,10 +3,12 @@ namespace Drupal\search_api\Plugin\search_api\processor; use Drupal\Core\Form\FormStateInterface; use Drupal\Core\Language\LanguageManagerInterface; use Drupal\search_api\IndexInterface; use Drupal\search_api\Plugin\search_api\processor\Resources\Porter2; use Drupal\search_api\Processor\FieldsProcessorPluginBase; use Drupal\search_api\Query\QueryInterface; use Symfony\Component\DependencyInjection\ContainerInterface; /** * Stems search terms. Loading @@ -32,20 +34,11 @@ class Stemmer extends FieldsProcessorPluginBase { protected $stems = []; /** * {@inheritdoc} * The language manager. * * @var \Drupal\Core\Language\LanguageManagerInterface|null */ public function defaultConfiguration() { $configuration = parent::defaultConfiguration(); $configuration += [ 'exceptions' => [ 'texan' => 'texa', 'mexican' => 'mexic', ], ]; return $configuration; } protected $languageManager; /** * {@inheritdoc} Loading @@ -54,13 +47,75 @@ class Stemmer extends FieldsProcessorPluginBase { $languages = \Drupal::languageManager()->getLanguages(); // Make processor available only if English is one of the site languages. foreach ($languages as $language) { if (substr($language->getId(), 0, 2) === 'en') { if (static::isEnglish($language->getId())) { return TRUE; } } return FALSE; } /** * {@inheritdoc} */ public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition) { $plugin = parent::create($container, $configuration, $plugin_id, $plugin_definition); $plugin->setLanguageManager($container->get('language_manager')); return $plugin; } /** * Checks whether the given language code represents a variation of English. * * @param string $langcode * An ISO 639-1 language code or IETF language tag. * * @return bool * TRUE if the language code represents a variation of English, FALSE * otherwise. */ protected static function isEnglish(string $langcode): bool { return substr($langcode, 0, 2) === 'en'; } /** * Retrieves the language manager. * * @return \Drupal\Core\Language\LanguageManagerInterface * The language manager. */ public function getLanguageManager(): LanguageManagerInterface { return $this->languageManager ?: \Drupal::service('language_manager'); } /** * Sets the language manager. * * @param \Drupal\Core\Language\LanguageManagerInterface $language_manager * The new language manager. * * @return $this */ public function setLanguageManager(LanguageManagerInterface $language_manager): self { $this->languageManager = $language_manager; return $this; } /** * {@inheritdoc} */ public function defaultConfiguration() { $configuration = parent::defaultConfiguration(); $configuration += [ 'exceptions' => [ 'texan' => 'texa', 'mexican' => 'mexic', ], ]; return $configuration; } /** * {@inheritdoc} */ Loading Loading @@ -103,8 +158,10 @@ class Stemmer extends FieldsProcessorPluginBase { */ public function preprocessIndexItems(array $items) { foreach ($items as $item) { // Limit this processor to English language data. if ($item->getLanguage() !== 'en') { // Limit this processor to English language data. If the site only has // English languages enabled, we assume all content is English. if (!static::isEnglish($item->getLanguage()) && !$this->isSiteEnglishOnly()) { continue; } foreach ($item->getFields() as $name => $field) { Loading @@ -119,11 +176,21 @@ class Stemmer extends FieldsProcessorPluginBase { * {@inheritdoc} */ public function preprocessSearchQuery(QueryInterface $query) { // Only process queries that can (also) return English language content. $languages = $query->getLanguages(); if ($languages && !in_array('en', $languages)) { // Only process queries that can (also) return English language content. If // the site only has English languages enabled, we assume all content is // English. if ($query->getLanguages() !== NULL && !$this->isSiteEnglishOnly()) { $has_english = FALSE; foreach ($query->getLanguages() as $langcode) { if (static::isEnglish($langcode)) { $has_english = TRUE; break; } } if (!$has_english) { return; } } parent::preprocessSearchQuery($query); } Loading Loading @@ -152,4 +219,20 @@ class Stemmer extends FieldsProcessorPluginBase { $value = implode(' ', $stemmed); } /** * Tests whether this site only has English languages enabled. * * @return bool * TRUE if all enabled languages are variations of English. */ protected function isSiteEnglishOnly(): bool { $langcodes = array_keys($this->getLanguageManager()->getLanguages()); foreach ($langcodes as $langcode) { if (!static::isEnglish($langcode)) { return FALSE; } } return TRUE; } } tests/src/Unit/Processor/StemmerTest.php +25 −1 Original line number Diff line number Diff line Loading @@ -35,6 +35,15 @@ class StemmerTest extends UnitTestCase { $this->setUpMockContainer(); $this->processor = new Stemmer([], 'string', []); $language_manager = $this->createMock(LanguageManagerInterface::class); $language_manager->method('getLanguages')->willReturn([ 'de' => 'de', 'en' => 'en', 'en-GB' => 'en-GB', 'fr' => 'fr', 'it' => 'it', ]); $this->processor->setLanguageManager($language_manager); } /** Loading Loading @@ -99,6 +108,17 @@ class StemmerTest extends UnitTestCase { ]); $item_en->method('getFields')->willReturn(['foo' => $field_en]); $item_en_gb = $this->getMockBuilder(ItemInterface::class) ->disableOriginalConstructor() ->getMock(); $item_en_gb->method('getLanguage')->willReturn('en-GB'); $field_en_gb = new Field($index, 'foo'); $field_en_gb->setType('text'); $field_en_gb->setValues([ new TextValue('ties'), ]); $item_en_gb->method('getFields')->willReturn(['foo' => $field_en_gb]); $item_de = $this->getMockBuilder(ItemInterface::class) ->disableOriginalConstructor() ->getMock(); Loading @@ -110,12 +130,14 @@ class StemmerTest extends UnitTestCase { ]); $item_de->method('getFields')->willReturn(['foo' => $field_de]); $items = [$item_en, $item_de]; $items = [$item_en, $item_en_gb, $item_de]; $this->processor->preprocessIndexItems($items); /** @var \Drupal\search_api\Plugin\search_api\data_type\value\TextValueInterface $value */ $value = $field_en->getValues()[0]; $this->assertEquals('tie', $value->toText()); $value = $field_en_gb->getValues()[0]; $this->assertEquals('tie', $value->toText()); $value = $field_de->getValues()[0]; $this->assertEquals('ties', $value->toText()); } Loading Loading @@ -161,8 +183,10 @@ class StemmerTest extends UnitTestCase { return [ 'language-less query' => [NULL, TRUE], 'English query' => [['en'], TRUE], 'British English query' => [['en-GB'], TRUE], 'Non-English query' => [['de'], FALSE], 'Multilingual query (including English)' => [['en', 'fr', 'es'], TRUE], 'Multilingual query (including British English)' => [['en-GB', 'fr', 'es'], TRUE], 'Multilingual query (not including English)' => [['de', 'it'], FALSE], ]; } Loading Loading
CHANGELOG.txt +2 −0 Original line number Diff line number Diff line Search API 1.x, dev (xxxx-xx-xx): --------------------------------- - #3267092 by drunken monkey: Fixed stemming of content with invalid language codes. - #3352134 by drunken monkey: Fixed performance degradation with database backend in 1.29. - #3263875 by Eduardo Morales Alberti, drunken monkey, GrumpySchlag: Fixed Behat Loading
src/Plugin/search_api/processor/Stemmer.php +103 −20 Original line number Diff line number Diff line Loading @@ -3,10 +3,12 @@ namespace Drupal\search_api\Plugin\search_api\processor; use Drupal\Core\Form\FormStateInterface; use Drupal\Core\Language\LanguageManagerInterface; use Drupal\search_api\IndexInterface; use Drupal\search_api\Plugin\search_api\processor\Resources\Porter2; use Drupal\search_api\Processor\FieldsProcessorPluginBase; use Drupal\search_api\Query\QueryInterface; use Symfony\Component\DependencyInjection\ContainerInterface; /** * Stems search terms. Loading @@ -32,20 +34,11 @@ class Stemmer extends FieldsProcessorPluginBase { protected $stems = []; /** * {@inheritdoc} * The language manager. * * @var \Drupal\Core\Language\LanguageManagerInterface|null */ public function defaultConfiguration() { $configuration = parent::defaultConfiguration(); $configuration += [ 'exceptions' => [ 'texan' => 'texa', 'mexican' => 'mexic', ], ]; return $configuration; } protected $languageManager; /** * {@inheritdoc} Loading @@ -54,13 +47,75 @@ class Stemmer extends FieldsProcessorPluginBase { $languages = \Drupal::languageManager()->getLanguages(); // Make processor available only if English is one of the site languages. foreach ($languages as $language) { if (substr($language->getId(), 0, 2) === 'en') { if (static::isEnglish($language->getId())) { return TRUE; } } return FALSE; } /** * {@inheritdoc} */ public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition) { $plugin = parent::create($container, $configuration, $plugin_id, $plugin_definition); $plugin->setLanguageManager($container->get('language_manager')); return $plugin; } /** * Checks whether the given language code represents a variation of English. * * @param string $langcode * An ISO 639-1 language code or IETF language tag. * * @return bool * TRUE if the language code represents a variation of English, FALSE * otherwise. */ protected static function isEnglish(string $langcode): bool { return substr($langcode, 0, 2) === 'en'; } /** * Retrieves the language manager. * * @return \Drupal\Core\Language\LanguageManagerInterface * The language manager. */ public function getLanguageManager(): LanguageManagerInterface { return $this->languageManager ?: \Drupal::service('language_manager'); } /** * Sets the language manager. * * @param \Drupal\Core\Language\LanguageManagerInterface $language_manager * The new language manager. * * @return $this */ public function setLanguageManager(LanguageManagerInterface $language_manager): self { $this->languageManager = $language_manager; return $this; } /** * {@inheritdoc} */ public function defaultConfiguration() { $configuration = parent::defaultConfiguration(); $configuration += [ 'exceptions' => [ 'texan' => 'texa', 'mexican' => 'mexic', ], ]; return $configuration; } /** * {@inheritdoc} */ Loading Loading @@ -103,8 +158,10 @@ class Stemmer extends FieldsProcessorPluginBase { */ public function preprocessIndexItems(array $items) { foreach ($items as $item) { // Limit this processor to English language data. if ($item->getLanguage() !== 'en') { // Limit this processor to English language data. If the site only has // English languages enabled, we assume all content is English. if (!static::isEnglish($item->getLanguage()) && !$this->isSiteEnglishOnly()) { continue; } foreach ($item->getFields() as $name => $field) { Loading @@ -119,11 +176,21 @@ class Stemmer extends FieldsProcessorPluginBase { * {@inheritdoc} */ public function preprocessSearchQuery(QueryInterface $query) { // Only process queries that can (also) return English language content. $languages = $query->getLanguages(); if ($languages && !in_array('en', $languages)) { // Only process queries that can (also) return English language content. If // the site only has English languages enabled, we assume all content is // English. if ($query->getLanguages() !== NULL && !$this->isSiteEnglishOnly()) { $has_english = FALSE; foreach ($query->getLanguages() as $langcode) { if (static::isEnglish($langcode)) { $has_english = TRUE; break; } } if (!$has_english) { return; } } parent::preprocessSearchQuery($query); } Loading Loading @@ -152,4 +219,20 @@ class Stemmer extends FieldsProcessorPluginBase { $value = implode(' ', $stemmed); } /** * Tests whether this site only has English languages enabled. * * @return bool * TRUE if all enabled languages are variations of English. */ protected function isSiteEnglishOnly(): bool { $langcodes = array_keys($this->getLanguageManager()->getLanguages()); foreach ($langcodes as $langcode) { if (!static::isEnglish($langcode)) { return FALSE; } } return TRUE; } }
tests/src/Unit/Processor/StemmerTest.php +25 −1 Original line number Diff line number Diff line Loading @@ -35,6 +35,15 @@ class StemmerTest extends UnitTestCase { $this->setUpMockContainer(); $this->processor = new Stemmer([], 'string', []); $language_manager = $this->createMock(LanguageManagerInterface::class); $language_manager->method('getLanguages')->willReturn([ 'de' => 'de', 'en' => 'en', 'en-GB' => 'en-GB', 'fr' => 'fr', 'it' => 'it', ]); $this->processor->setLanguageManager($language_manager); } /** Loading Loading @@ -99,6 +108,17 @@ class StemmerTest extends UnitTestCase { ]); $item_en->method('getFields')->willReturn(['foo' => $field_en]); $item_en_gb = $this->getMockBuilder(ItemInterface::class) ->disableOriginalConstructor() ->getMock(); $item_en_gb->method('getLanguage')->willReturn('en-GB'); $field_en_gb = new Field($index, 'foo'); $field_en_gb->setType('text'); $field_en_gb->setValues([ new TextValue('ties'), ]); $item_en_gb->method('getFields')->willReturn(['foo' => $field_en_gb]); $item_de = $this->getMockBuilder(ItemInterface::class) ->disableOriginalConstructor() ->getMock(); Loading @@ -110,12 +130,14 @@ class StemmerTest extends UnitTestCase { ]); $item_de->method('getFields')->willReturn(['foo' => $field_de]); $items = [$item_en, $item_de]; $items = [$item_en, $item_en_gb, $item_de]; $this->processor->preprocessIndexItems($items); /** @var \Drupal\search_api\Plugin\search_api\data_type\value\TextValueInterface $value */ $value = $field_en->getValues()[0]; $this->assertEquals('tie', $value->toText()); $value = $field_en_gb->getValues()[0]; $this->assertEquals('tie', $value->toText()); $value = $field_de->getValues()[0]; $this->assertEquals('ties', $value->toText()); } Loading Loading @@ -161,8 +183,10 @@ class StemmerTest extends UnitTestCase { return [ 'language-less query' => [NULL, TRUE], 'English query' => [['en'], TRUE], 'British English query' => [['en-GB'], TRUE], 'Non-English query' => [['de'], FALSE], 'Multilingual query (including English)' => [['en', 'fr', 'es'], TRUE], 'Multilingual query (including British English)' => [['en-GB', 'fr', 'es'], TRUE], 'Multilingual query (not including English)' => [['de', 'it'], FALSE], ]; } Loading