Commit 1205ca43 authored by Thomas Seidl's avatar Thomas Seidl
Browse files

Issue #3267092 by drunken monkey: Fixed stemming of content with invalid language codes.

parent 623226ca
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
Search API 1.x, dev (xxxx-xx-xx):
---------------------------------
- #3267092 by drunken monkey: Fixed stemming of content with invalid language
  codes.
- #3352134 by drunken monkey: Fixed performance degradation with database
  backend in 1.29.
- #3263875 by Eduardo Morales Alberti, drunken monkey, GrumpySchlag: Fixed Behat
+103 −20
Original line number Diff line number Diff line
@@ -3,10 +3,12 @@
namespace Drupal\search_api\Plugin\search_api\processor;

use Drupal\Core\Form\FormStateInterface;
use Drupal\Core\Language\LanguageManagerInterface;
use Drupal\search_api\IndexInterface;
use Drupal\search_api\Plugin\search_api\processor\Resources\Porter2;
use Drupal\search_api\Processor\FieldsProcessorPluginBase;
use Drupal\search_api\Query\QueryInterface;
use Symfony\Component\DependencyInjection\ContainerInterface;

/**
 * Stems search terms.
@@ -32,20 +34,11 @@ class Stemmer extends FieldsProcessorPluginBase {
  protected $stems = [];

  /**
   * {@inheritdoc}
   * The language manager.
   *
   * @var \Drupal\Core\Language\LanguageManagerInterface|null
   */
  public function defaultConfiguration() {
    $configuration = parent::defaultConfiguration();

    $configuration += [
      'exceptions' => [
        'texan' => 'texa',
        'mexican' => 'mexic',
      ],
    ];

    return $configuration;
  }
  protected $languageManager;

  /**
   * {@inheritdoc}
@@ -54,13 +47,75 @@ class Stemmer extends FieldsProcessorPluginBase {
    $languages = \Drupal::languageManager()->getLanguages();
    // Make processor available only if English is one of the site languages.
    foreach ($languages as $language) {
      if (substr($language->getId(), 0, 2) === 'en') {
      if (static::isEnglish($language->getId())) {
        return TRUE;
      }
    }
    return FALSE;
  }

  /**
   * {@inheritdoc}
   */
  public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition) {
    $plugin = parent::create($container, $configuration, $plugin_id, $plugin_definition);
    $plugin->setLanguageManager($container->get('language_manager'));
    return $plugin;
  }

  /**
   * Checks whether the given language code represents a variation of English.
   *
   * @param string $langcode
   *   An ISO 639-1 language code or IETF language tag.
   *
   * @return bool
   *   TRUE if the language code represents a variation of English, FALSE
   *   otherwise.
   */
  protected static function isEnglish(string $langcode): bool {
    return substr($langcode, 0, 2) === 'en';
  }

  /**
   * Retrieves the language manager.
   *
   * @return \Drupal\Core\Language\LanguageManagerInterface
   *   The language manager.
   */
  public function getLanguageManager(): LanguageManagerInterface {
    return $this->languageManager ?: \Drupal::service('language_manager');
  }

  /**
   * Sets the language manager.
   *
   * @param \Drupal\Core\Language\LanguageManagerInterface $language_manager
   *   The new language manager.
   *
   * @return $this
   */
  public function setLanguageManager(LanguageManagerInterface $language_manager): self {
    $this->languageManager = $language_manager;
    return $this;
  }

  /**
   * {@inheritdoc}
   */
  public function defaultConfiguration() {
    $configuration = parent::defaultConfiguration();

    $configuration += [
      'exceptions' => [
        'texan' => 'texa',
        'mexican' => 'mexic',
      ],
    ];

    return $configuration;
  }

  /**
   * {@inheritdoc}
   */
@@ -103,8 +158,10 @@ class Stemmer extends FieldsProcessorPluginBase {
   */
  public function preprocessIndexItems(array $items) {
    foreach ($items as $item) {
      // Limit this processor to English language data.
      if ($item->getLanguage() !== 'en') {
      // Limit this processor to English language data. If the site only has
      // English languages enabled, we assume all content is English.
      if (!static::isEnglish($item->getLanguage())
          && !$this->isSiteEnglishOnly()) {
        continue;
      }
      foreach ($item->getFields() as $name => $field) {
@@ -119,11 +176,21 @@ class Stemmer extends FieldsProcessorPluginBase {
   * {@inheritdoc}
   */
  public function preprocessSearchQuery(QueryInterface $query) {
    // Only process queries that can (also) return English language content.
    $languages = $query->getLanguages();
    if ($languages && !in_array('en', $languages)) {
    // Only process queries that can (also) return English language content. If
    // the site only has English languages enabled, we assume all content is
    // English.
    if ($query->getLanguages() !== NULL && !$this->isSiteEnglishOnly()) {
      $has_english = FALSE;
      foreach ($query->getLanguages() as $langcode) {
        if (static::isEnglish($langcode)) {
          $has_english = TRUE;
          break;
        }
      }
      if (!$has_english) {
        return;
      }
    }
    parent::preprocessSearchQuery($query);
  }

@@ -152,4 +219,20 @@ class Stemmer extends FieldsProcessorPluginBase {
    $value = implode(' ', $stemmed);
  }

  /**
   * Tests whether this site only has English languages enabled.
   *
   * @return bool
   *   TRUE if all enabled languages are variations of English.
   */
  protected function isSiteEnglishOnly(): bool {
    $langcodes = array_keys($this->getLanguageManager()->getLanguages());
    foreach ($langcodes as $langcode) {
      if (!static::isEnglish($langcode)) {
        return FALSE;
      }
    }
    return TRUE;
  }

}
+25 −1
Original line number Diff line number Diff line
@@ -35,6 +35,15 @@ class StemmerTest extends UnitTestCase {
    $this->setUpMockContainer();

    $this->processor = new Stemmer([], 'string', []);
    $language_manager = $this->createMock(LanguageManagerInterface::class);
    $language_manager->method('getLanguages')->willReturn([
      'de' => 'de',
      'en' => 'en',
      'en-GB' => 'en-GB',
      'fr' => 'fr',
      'it' => 'it',
    ]);
    $this->processor->setLanguageManager($language_manager);
  }

  /**
@@ -99,6 +108,17 @@ class StemmerTest extends UnitTestCase {
    ]);
    $item_en->method('getFields')->willReturn(['foo' => $field_en]);

    $item_en_gb = $this->getMockBuilder(ItemInterface::class)
      ->disableOriginalConstructor()
      ->getMock();
    $item_en_gb->method('getLanguage')->willReturn('en-GB');
    $field_en_gb = new Field($index, 'foo');
    $field_en_gb->setType('text');
    $field_en_gb->setValues([
      new TextValue('ties'),
    ]);
    $item_en_gb->method('getFields')->willReturn(['foo' => $field_en_gb]);

    $item_de = $this->getMockBuilder(ItemInterface::class)
      ->disableOriginalConstructor()
      ->getMock();
@@ -110,12 +130,14 @@ class StemmerTest extends UnitTestCase {
    ]);
    $item_de->method('getFields')->willReturn(['foo' => $field_de]);

    $items = [$item_en, $item_de];
    $items = [$item_en, $item_en_gb, $item_de];
    $this->processor->preprocessIndexItems($items);

    /** @var \Drupal\search_api\Plugin\search_api\data_type\value\TextValueInterface $value */
    $value = $field_en->getValues()[0];
    $this->assertEquals('tie', $value->toText());
    $value = $field_en_gb->getValues()[0];
    $this->assertEquals('tie', $value->toText());
    $value = $field_de->getValues()[0];
    $this->assertEquals('ties', $value->toText());
  }
@@ -161,8 +183,10 @@ class StemmerTest extends UnitTestCase {
    return [
      'language-less query' => [NULL, TRUE],
      'English query' => [['en'], TRUE],
      'British English query' => [['en-GB'], TRUE],
      'Non-English query' => [['de'], FALSE],
      'Multilingual query (including English)' => [['en', 'fr', 'es'], TRUE],
      'Multilingual query (including British English)' => [['en-GB', 'fr', 'es'], TRUE],
      'Multilingual query (not including English)' => [['de', 'it'], FALSE],
    ];
  }