Commit ac750c9e authored by neclimdul's avatar neclimdul Committed by Ismaeil Abouljamal
Browse files

Issue #2991136 by tomhollevoet, neclimdul, drunken monkey, borisson_, izus:...

Issue #2991136 by tomhollevoet, neclimdul, drunken monkey, borisson_, izus: Add file to queue if service is not available
parent d37b7b53
......@@ -5,4 +5,4 @@ core: 8.x
package: Search
configure: search_api_attachments.admin_form
dependencies:
- search_api:search_api
- search_api:search_api
<?php
namespace Drupal\search_api_attachments\Plugin\QueueWorker;
use Drupal\Core\Entity\EntityTypeManagerInterface;
use Drupal\Core\Entity\TranslatableInterface;
use Drupal\Core\KeyValueStore\KeyValueFactoryInterface;
use Drupal\Core\Plugin\ContainerFactoryPluginInterface;
use Drupal\Core\Queue\QueueWorkerBase;
use Drupal\search_api\Plugin\search_api\datasource\ContentEntity;
use Drupal\search_api_attachments\Plugin\search_api\processor\FilesExtractor;
use Drupal\search_api_attachments\TextExtractorPluginManager;
use Psr\Log\LoggerInterface;
use Psr\Log\LogLevel;
use Symfony\Component\DependencyInjection\ContainerInterface;
/**
* Processes Tasks for Search API Attachments.
*
* @QueueWorker(
* id = "search_api_attachments",
* title = @Translation("Extractor Queue"),
* cron = {"time" = 180}
* )
*/
class ExtractorQueue extends QueueWorkerBase implements ContainerFactoryPluginInterface {
/**
* Text extractor service.
*
* @var \Drupal\search_api_attachments\TextExtractorPluginManager
*/
protected $textExtractorPluginManager;
/**
* Entity type manager service.
*
* @var \Drupal\Core\Entity\EntityTypeManagerInterface
*/
protected $entityTypeManager;
/**
* Key value service.
*
* @var \Drupal\Core\KeyValueStore\KeyValueFactoryInterface
*/
protected $keyValue;
/**
* The logger service.
*
* @var \Psr\Log\LoggerInterface
*/
protected $logger;
/**
* {@inheritdoc}
*/
public function __construct(array $configuration, $plugin_id, $plugin_definition, TextExtractorPluginManager $text_extractor_plugin_manager, EntityTypeManagerInterface $entity_type_manager, KeyValueFactoryInterface $key_value, LoggerInterface $logger) {
parent::__construct($configuration, $plugin_id, $plugin_definition);
$this->textExtractorPluginManager = $text_extractor_plugin_manager;
$this->entityTypeManager = $entity_type_manager;
$this->keyValue = $key_value;
$this->logger = $logger;
}
/**
* {@inheritdoc}
*/
public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition) {
return new static(
$configuration,
$plugin_id,
$plugin_definition,
$container->get('plugin.manager.search_api_attachments.text_extractor'),
$container->get('entity_type.manager'),
$container->get('keyvalue'),
$container->get('logger.channel.search_api_attachments')
);
}
/**
* Get the extractor plugin for
* @return object
* @throws \Drupal\Component\Plugin\Exception\PluginException
*/
protected function getExtractorPlugin() {
// Get extractor configuration.
$config = \Drupal::config(FilesExtractor::CONFIGNAME);
$extractor_plugin_id = $config->get('extraction_method');
$configuration = $config->get($extractor_plugin_id . '_configuration');
// Get extractor plugin.
return $this->textExtractorPluginManager->createInstance($extractor_plugin_id, $configuration);
}
/**
* {@inheritdoc}
*/
public function processItem($data) {
$extractor_plugin = $this->getExtractorPlugin();
// Load file from queue item.
$file = $this->entityTypeManager->getStorage('file')->load($data->fid);
if ($file === NULL) {
return;
}
try {
$collection = 'search_api_attachments';
$key = $collection . ':' . $file->id();
// Skip file if element is found in key_value collection.
$extracted_data = $this->keyValue->get($collection)->get($key);
if (empty($extracted_data)) {
// Extract file and save in key_value collection.
$extracted_data = $extractor_plugin->extract($file);
$this->keyValue->get($collection)->set($key, $extracted_data);
}
$fallback_collection = $this->keyValue->get(FilesExtractor::FALLBACK_QUEUE_KV);
$fallback_collection->delete($data->entity_type . ':' . $data->entity_id);
$entity = $this->entityTypeManager->getStorage($data->entity_type)
->load($data->entity_id);
$indexes = ContentEntity::getIndexesForEntity($entity);
$item_ids = [];
if (is_a($entity, TranslatableInterface::class)) {
$translations = $entity->getTranslationLanguages();
foreach ($translations as $translation_id => $translation) {
$item_ids[] = $entity->id() . ':' . $translation_id;
}
}
$datasource_id = 'entity:' . $data->entity_type;
foreach ($indexes as $index) {
$index->trackItemsUpdated($datasource_id, $item_ids);
}
}
catch (\Exception $exception) {
if ($data->extract_attempts < 5) {
$data->extract_attempts++;
\Drupal::queue('search_api_attachments')->createItem($data);
}
else {
$message_params = [
'@file_id' => $data->fid,
'@entity_id' => $data->entity_id,
'@entity_type' => $data->entity_type,
];
$this->logger->log(LogLevel::ERROR, 'Text extraction failed after 5 attempts @file_id for @entity_type @entity_id.', $message_params);
}
}
}
}
......@@ -10,8 +10,8 @@ use Drupal\Core\Extension\ModuleHandlerInterface;
use Drupal\Core\Field\FieldDefinitionInterface;
use Drupal\Core\Form\FormStateInterface;
use Drupal\Core\KeyValueStore\KeyValueFactoryInterface;
use Drupal\Core\Utility\Error;
use Drupal\Core\Plugin\PluginFormInterface;
use Drupal\Core\Utility\Error;
use Drupal\file\Entity\File;
use Drupal\media\Entity\Media;
use Drupal\search_api\Datasource\DatasourceInterface;
......@@ -45,6 +45,10 @@ class FilesExtractor extends ProcessorPluginBase implements PluginFormInterface
*/
const CONFIGNAME = 'search_api_attachments.admin_config';
const FALLBACK_QUEUE_LOCK = 'search_api_attachments_fallback_queue';
const FALLBACK_QUEUE_KV = 'search_api_attachments:queued';
/**
* Name of the "virtual" field that handles file entity type extractions.
*
......@@ -278,8 +282,17 @@ class FilesExtractor extends ProcessorPluginBase implements PluginFormInterface
}
else {
try {
$extracted_data = $extractor_plugin->extract($file);
$extracted_data = $this->limitBytes($extracted_data);
// Only extract if this file has not previously failed and was queued.
$fallback_collection = $this->keyValue->get(FilesExtractor::FALLBACK_QUEUE_KV);
$queued_files = $fallback_collection->get($file->id());
if (empty($queued_files[$entity->getEntityTypeId()][$entity->id()])) {
$extracted_data = $extractor_plugin->extract($file);
$extracted_data = $this->limitBytes($extracted_data);
$this->keyValue->get($collection)->set($key, $extracted_data);
}
else {
$this->queueItem($entity, $file);
}
}
catch (\Exception $e) {
$error = Error::decodeException($e);
......@@ -294,12 +307,51 @@ class FilesExtractor extends ProcessorPluginBase implements PluginFormInterface
'@file' => $error['%file'],
];
$this->logger->log(LogLevel::ERROR, 'Error extracting text from file @file_id for @entity_type @entity_id. @type: @message in @function (line @line of @file).', $message_params);
$this->queueItem($entity, $file);
}
$this->keyValue->get($collection)->set($key, $extracted_data);
}
return $extracted_data;
}
/**
* Queue a failed extraction for later processing.
*
* @param \Drupal\Core\Entity\EntityInterface $entity
* The entity the file is attached to.
* @param \Drupal\file\Entity\File $file
* A file object.
*
* @return bool
* Success of queueing process.
*/
private function queueItem(EntityInterface $entity, File $file) {
if (\Drupal::lock()->acquire(static::FALLBACK_QUEUE_LOCK)) {
$queued_file_collection = $this->keyValue->get(static::FALLBACK_QUEUE_KV);
$queued_files = $queued_file_collection->get($file->id());
$queued_files[$entity->getEntityTypeId()][$entity->id()] = TRUE;
$queued_file_collection->set($file->id(), $queued_files);
\Drupal::lock()->release(static::FALLBACK_QUEUE_LOCK);
// Add file to queue.
$queue = \Drupal::queue('search_api_attachments');
$item = new \stdClass();
$item->fid = $file->id();
$item->entity_id = $entity->id();
$item->entity_type = $entity->getEntityTypeId();
$item->extract_attempts = 1;
$queue->createItem($item);
$this->logger->log(LogLevel::INFO, 'File added to the queue for text extraction @file_id for @entity_type @entity_id.', [
'@file_id' => $file->id(),
'@entity_id' => $entity->id(),
'@entity_type' => $entity->getEntityTypeId(),
]);
return TRUE;
}
return FALSE;
}
/**
* Limit the number of items to index per field to the configured limit.
*
......
......@@ -70,6 +70,11 @@ class SolrExtractor extends TextExtractorPluginBase {
// Get the Solr backend.
/** @var \Drupal\search_api_solr\Plugin\search_api\backend\SearchApiSolrBackend $backend */
$backend = $server->getBackend();
if (!$backend->isAvailable()) {
throw new \Exception('Solr Exctractor is not available.');
}
// Extract the content.
$xml_data = $backend->extractContentFromFile($filepath);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment