FilesExtractor.php 23.7 KB
Newer Older
izus's avatar
izus committed
1
2
3
4
<?php

namespace Drupal\search_api_attachments\Plugin\search_api\processor;

5
use Drupal\Component\Utility\Bytes;
6
use Drupal\Core\Config\ConfigFactoryInterface;
7
use Drupal\Core\Entity\EntityInterface;
8
9
use Drupal\Core\Entity\EntityTypeManagerInterface;
use Drupal\Core\Extension\ModuleHandlerInterface;
10
11
use Drupal\Core\Field\FieldDefinitionInterface;
use Drupal\Core\Form\FormStateInterface;
12
use Drupal\Core\KeyValueStore\KeyValueFactoryInterface;
13
use Drupal\Core\Plugin\PluginFormInterface;
14
use Drupal\Core\Utility\Error;
15
use Drupal\file\Entity\File;
16
use Drupal\media\Entity\Media;
17
use Drupal\search_api\Datasource\DatasourceInterface;
18
use Drupal\search_api\Item\ItemInterface;
izus's avatar
izus committed
19
use Drupal\search_api\Processor\ProcessorPluginBase;
20
21
use Drupal\search_api\Processor\ProcessorProperty;
use Drupal\search_api\Utility\FieldsHelperInterface;
22
use Drupal\search_api_attachments\ExtractFileValidator;
23
use Drupal\search_api_attachments\TextExtractorPluginInterface;
24
use Drupal\search_api_attachments\TextExtractorPluginManager;
25
26
use Psr\Log\LoggerInterface;
use Psr\Log\LogLevel;
27
use Symfony\Component\DependencyInjection\ContainerInterface;
izus's avatar
izus committed
28
29

/**
izus's avatar
izus committed
30
31
 * Provides file fields processor.
 *
izus's avatar
izus committed
32
33
34
35
36
 * @SearchApiProcessor(
 *   id = "file_attachments",
 *   label = @Translation("File attachments"),
 *   description = @Translation("Adds the file attachments content to the indexed data."),
 *   stages = {
37
 *     "add_properties" = 0,
izus's avatar
izus committed
38
39
40
 *   }
 * )
 */
41
class FilesExtractor extends ProcessorPluginBase implements PluginFormInterface {
42

43
44
45
46
47
  /**
   * Name of the config being edited.
   */
  const CONFIGNAME = 'search_api_attachments.admin_config';

48
49
50
51
  const FALLBACK_QUEUE_LOCK = 'search_api_attachments_fallback_queue';

  const FALLBACK_QUEUE_KV = 'search_api_attachments:queued';

52
53
54
55
56
57
  /**
   * Name of the "virtual" field that handles file entity type extractions.
   *
   * This is used per example in a File datasource index or mixed
   * datasources index.
   */
58
59
60
61
62
63
  const SAA_FILE_ENTITY = 'saa_file_entity';

  /**
   * Prefix of the properties provided by this module.
   */
  const SAA_PREFIX = 'saa_';
64

65
66
67
68
69
70
71
72
  /**
   * The plugin manager for our text extractor.
   *
   * @var \Drupal\search_api_attachments\TextExtractorPluginManager
   */
  protected $textExtractorPluginManager;

  /**
73
   * The extract file validator service.
74
   *
75
   * @var \Drupal\search_api_attachments\ExtractFileValidator
76
   */
77
  protected $extractFileValidator;
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105

  /**
   * Config factory service.
   *
   * @var \Drupal\Core\Config\ConfigFactoryInterface
   */
  protected $configFactory;

  /**
   * Entity type manager service.
   *
   * @var \Drupal\Core\Entity\EntityTypeManagerInterface
   */
  protected $entityTypeManager;

  /**
   * Key value service.
   *
   * @var \Drupal\Core\KeyValueStore\KeyValueFactoryInterface
   */
  protected $keyValue;

  /**
   * Module handler service.
   *
   * @var \Drupal\Core\Extension\ModuleHandlerInterface
   */
  protected $moduleHandler;
106

107
108
109
110
111
112
113
  /**
   * Search API field helper.
   *
   * @var \Drupal\search_api\Utility\FieldsHelperInterface
   */
  protected $fieldHelper;

114
115
116
117
118
119
120
  /**
   * The logger service.
   *
   * @var \Psr\Log\LoggerInterface
   */
  protected $logger;

121
122
123
  /**
   * {@inheritdoc}
   */
124
  public function __construct(array $configuration, $plugin_id, array $plugin_definition, TextExtractorPluginManager $text_extractor_plugin_manager, ConfigFactoryInterface $config_factory, EntityTypeManagerInterface $entity_type_manager, KeyValueFactoryInterface $key_value, ModuleHandlerInterface $module_handler, FieldsHelperInterface $field_helper, ExtractFileValidator $extractFileValidator, LoggerInterface $logger) {
125
    parent::__construct($configuration, $plugin_id, $plugin_definition);
izus's avatar
izus committed
126
    $this->textExtractorPluginManager = $text_extractor_plugin_manager;
127
128
129
130
    $this->configFactory = $config_factory;
    $this->entityTypeManager = $entity_type_manager;
    $this->keyValue = $key_value;
    $this->moduleHandler = $module_handler;
131
    $this->fieldHelper = $field_helper;
132
    $this->extractFileValidator = $extractFileValidator;
133
    $this->logger = $logger;
134
135
136
137
138
139
  }

  /**
   * {@inheritdoc}
   */
  public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition) {
140
    return new static(
141
142
143
144
145
146
147
148
149
        $configuration,
        $plugin_id,
        $plugin_definition,
        $container->get('plugin.manager.search_api_attachments.text_extractor'),
        $container->get('config.factory'),
        $container->get('entity_type.manager'),
        $container->get('keyvalue'),
        $container->get('module_handler'),
        $container->get('search_api.fields_helper'),
150
151
        $container->get('search_api_attachments.extract_file_validator'),
        $container->get('logger.channel.search_api_attachments')
152
    );
153
154
  }

155
156
157
158
  /**
   * {@inheritdoc}
   */
  public function getPropertyDefinitions(DatasourceInterface $datasource = NULL) {
izus's avatar
izus committed
159
    $properties = [];
160
161

    if (!$datasource) {
162
163
      // Add properties for all index available file fields and for file entity.
      foreach ($this->getFileFieldsAndFileEntityItems() as $field_name => $label) {
izus's avatar
izus committed
164
165
166
        $definition = [
          'label' => $this->t('Search api attachments: @label', ['@label' => $label]),
          'description' => $this->t('Search api attachments: @label', ['@label' => $label]),
167
168
          'type' => 'string',
          'processor_id' => $this->getPluginId(),
izus's avatar
izus committed
169
        ];
170
        $properties[static::SAA_PREFIX . $field_name] = new ProcessorProperty($definition);
171
      }
172
    }
173
174

    return $properties;
izus's avatar
izus committed
175
  }
176

izus's avatar
izus committed
177
178
179
  /**
   * {@inheritdoc}
   */
180
  public function addFieldValues(ItemInterface $item) {
181
    $files = [];
182
    $config = $this->configFactory->get(static::CONFIGNAME);
183
    $extractor_plugin_id = $config->get('extraction_method');
184
    if ($extractor_plugin_id != '') {
185
186
      $configuration = $config->get($extractor_plugin_id . '_configuration');
      $extractor_plugin = $this->textExtractorPluginManager->createInstance($extractor_plugin_id, $configuration);
187
188
189
      // Get the entity.
      $entity = $item->getOriginalObject()->getValue();
      $is_entity_type_file = $entity->getEntityTypeId() == 'file';
190
      foreach ($this->getFileFieldsAndFileEntityItems() as $field_name => $label) {
191
        // If the parent entity is not a file, no need to parse the
192
193
        // saa static::SAA_FILE_ENTITY item.
        if (!$is_entity_type_file && $field_name == static::SAA_FILE_ENTITY) {
194
195
          break;
        }
196
        if ($is_entity_type_file && $field_name == static::SAA_FILE_ENTITY) {
197
198
          $files[] = $entity;
        }
199

200
        $property_path = static::SAA_PREFIX . $field_name;
201

202
        // A way to load $field.
203
        foreach ($this->fieldHelper->filterForPropertyPath($item->getFields(), NULL, $property_path) as $field) {
204
          $all_fids = [];
205
          if ($entity->hasField($field_name)) {
206
207
208
            // Get type to manage media entity reference case.
            $type = $entity->get($field_name)->getFieldDefinition()->getType();
            if ($type == 'entity_reference') {
209
              /** @var \Drupal\Core\Field\BaseFieldDefinition $field_def */
210
              $field_def = $entity->get($field_name)->getFieldDefinition();
211
212
213
214
215
              if ($field_def->getItemDefinition()->getSetting('target_type') === 'media') {
                // This is a media field.
                $filefield_values = $entity->get($field_name)->filterEmptyItems()->getValue();
                foreach ($filefield_values as $media_value) {
                  $media = Media::load($media_value['target_id']);
216
                  if ($media !== NULL) {
217
218
219
220
221
222
223
224
                    $bundle_configuration = $media->getSource()->getConfiguration();
                    if (isset($bundle_configuration['source_field'])) {
                      /** @var \Drupal\Core\Field\FieldItemListInterface $field_item */
                      foreach ($media->get($bundle_configuration['source_field'])->filterEmptyItems() as $field_item) {
                        if ($field_item->getFieldDefinition()->getType() === 'file') {
                          $value = $field_item->getValue();
                          $all_fids[] = $value['target_id'];
                        }
225
                      }
226
227
228
229
230
231
232
233
234
235
                    }
                  }
                }
              }
            }
            elseif ($type == "file") {
              $filefield_values = $entity->get($field_name)->filterEmptyItems()->getValue();
              foreach ($filefield_values as $filefield_value) {
                $all_fids[] = $filefield_value['target_id'];
              }
236
            }
237

238
239
            $fids = $this->limitToAllowedNumber($all_fids);
            // Retrieve the files.
240
            $files = $this->entityTypeManager
izus's avatar
izus committed
241
242
              ->getStorage('file')
              ->loadMultiple($fids);
243
          }
244
245
246
247
248
          if (!empty($files)) {
            $extraction = '';

            foreach ($files as $file) {
              if ($this->isFileIndexable($file, $item, $field_name)) {
249
                $extraction .= $this->extractOrGetFromCache($entity, $file, $extractor_plugin);
250
              }
251
            }
252
            $field->addValue($extraction);
253
          }
254
        }
izus's avatar
izus committed
255
256
257
      }
    }
  }
258

259
  /**
260
   * Extract non text file data or get it from cache if available and cache it.
261
   *
262
263
   * @param \Drupal\Core\Entity\EntityInterface $entity
   *   The entity the file is attached to.
264
   * @param \Drupal\file\Entity\File $file
265
   *   A file object.
266
   * @param \Drupal\search_api_attachments\TextExtractorPluginInterface $extractor_plugin
267
268
   *   The plugin used to extract file content.
   *
izus's avatar
izus committed
269
270
   * @return string
   *   $extracted_data
271
   */
272
  public function extractOrGetFromCache(EntityInterface $entity, File $file, TextExtractorPluginInterface $extractor_plugin) {
273
274
275
276
    // Directly process plaintext files.
    if (substr($file->getMimeType(), 0, 5) == 'text/') {
      return file_get_contents($file->getFileUri());
    }
277
278
    $collection = 'search_api_attachments';
    $key = $collection . ':' . $file->id();
279
    $extracted_data = '';
280
    if ($cache = $this->keyValue->get($collection)->get($key)) {
281
      $extracted_data = $cache;
282
283
    }
    else {
284
      try {
285
286
287
288
289
290
291
292
293
294
295
        // Only extract if this file has not previously failed and was queued.
        $fallback_collection = $this->keyValue->get(FilesExtractor::FALLBACK_QUEUE_KV);
        $queued_files = $fallback_collection->get($file->id());
        if (empty($queued_files[$entity->getEntityTypeId()][$entity->id()])) {
          $extracted_data = $extractor_plugin->extract($file);
          $extracted_data = $this->limitBytes($extracted_data);
          $this->keyValue->get($collection)->set($key, $extracted_data);
        }
        else {
          $this->queueItem($entity, $file);
        }
296
297
298
299
300
301
302
303
304
305
306
307
308
309
      }
      catch (\Exception $e) {
        $error = Error::decodeException($e);
        $message_params = [
          '@file_id' => $file->id(),
          '@entity_id' => $entity->id(),
          '@entity_type' => $entity->getEntityTypeId(),
          '@type' => $error['%type'],
          '@message' => $error['@message'],
          '@function' => $error['%function'],
          '@line' => $error['%line'],
          '@file' => $error['%file'],
        ];
        $this->logger->log(LogLevel::ERROR, 'Error extracting text from file @file_id for @entity_type @entity_id. @type: @message in @function (line @line of @file).', $message_params);
310
        $this->queueItem($entity, $file);
311
      }
312
313
314
315
    }
    return $extracted_data;
  }

316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
  /**
   * Queue a failed extraction for later processing.
   *
   * @param \Drupal\Core\Entity\EntityInterface $entity
   *   The entity the file is attached to.
   * @param \Drupal\file\Entity\File $file
   *   A file object.
   *
   * @return bool
   *   Success of queueing process.
   */
  private function queueItem(EntityInterface $entity, File $file) {

    if (\Drupal::lock()->acquire(static::FALLBACK_QUEUE_LOCK)) {
      $queued_file_collection = $this->keyValue->get(static::FALLBACK_QUEUE_KV);
      $queued_files = $queued_file_collection->get($file->id());
      $queued_files[$entity->getEntityTypeId()][$entity->id()] = TRUE;
      $queued_file_collection->set($file->id(), $queued_files);
      \Drupal::lock()->release(static::FALLBACK_QUEUE_LOCK);

      // Add file to queue.
      $queue = \Drupal::queue('search_api_attachments');
      $item = new \stdClass();
      $item->fid = $file->id();
      $item->entity_id = $entity->id();
      $item->entity_type = $entity->getEntityTypeId();
      $item->extract_attempts = 1;
      $queue->createItem($item);

      $this->logger->log(LogLevel::INFO, 'File added to the queue for text extraction @file_id for @entity_type @entity_id.', [
        '@file_id' => $file->id(),
        '@entity_id' => $entity->id(),
        '@entity_type' => $entity->getEntityTypeId(),
      ]);
      return TRUE;
    }
    return FALSE;
  }

355
356
357
  /**
   * Limit the number of items to index per field to the configured limit.
   *
izus's avatar
izus committed
358
   * @param array $all_fids
izus's avatar
izus committed
359
   *   Array of fids.
360
   *
361
   * @return array
362
   *   An array of $limit number of items.
363
   */
izus's avatar
izus committed
364
  public function limitToAllowedNumber(array $all_fids) {
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
    $limit = 0;
    if (isset($this->configuration['number_indexed'])) {
      $limit = $this->configuration['number_indexed'];
    }
    // If limit is 0 return all items.
    if ($limit == 0) {
      return $all_fids;
    }
    if (count($all_fids) > $limit) {
      return array_slice($all_fids, 0, $limit);
    }
    else {
      return $all_fids;
    }
  }

381
382
383
384
  /**
   * Limit the indexed text to first N bytes.
   *
   * @param string $extracted_text
izus's avatar
izus committed
385
   *   The hole extracted text.
386
387
388
389
390
   *
   * @return string
   *   The first N bytes of the extracted text that will be indexed and cached.
   */
  public function limitBytes($extracted_text) {
391
392
393
394
    // Default the configuration to a sensible amount of text to extract and
    // cache in the database. 1 million characters should be enough for most
    // cases.
    $bytes = Bytes::toInt('1 MB');
395
    if (isset($this->configuration['number_first_bytes'])) {
396
      $bytes = Bytes::toInt($this->configuration['number_first_bytes']);
397
398
399
400
401
402
403
404
405
406
407
    }
    // If $bytes is 0 return all items.
    if ($bytes == 0) {
      return $extracted_text;
    }
    else {
      $extracted_text = mb_strcut($extracted_text, 0, $bytes);
    }
    return $extracted_text;
  }

408
409
410
  /**
   * Check if the file is allowed to be indexed.
   *
izus's avatar
izus committed
411
412
   * @param object $file
   *   A file object.
413
414
415
416
   * @param \Drupal\search_api\Item\ItemInterface $item
   *   The item the file was referenced in.
   * @param string|null $field_name
   *   The name of the field the file was referenced in, if applicable.
417
   *
izus's avatar
izus committed
418
   * @return bool
izus's avatar
izus committed
419
   *   TRUE or FALSE
420
   */
421
  public function isFileIndexable($file, ItemInterface $item, $field_name = NULL) {
422
    // File should exist in disc.
423
424
425
426
    $indexable = file_exists($file->getFileUri());
    if (!$indexable) {
      return FALSE;
    }
427
    // File should have a mime type that is allowed.
428
429
    $all_excluded_mimes = $this->extractFileValidator->getExcludedMimes(NULL, $this->configuration['excluded_mimes']);
    $indexable = $indexable && !in_array($file->getMimeType(), $all_excluded_mimes);
430
431
432
    if (!$indexable) {
      return FALSE;
    }
433
    // File permanent.
434
435
436
437
    $indexable = $indexable && $file->isPermanent();
    if (!$indexable) {
      return FALSE;
    }
438
    // File shouldn't exceed configured file size.
439
440
    $max_filesize = $this->configuration['max_filesize'];
    $indexable = $indexable && $this->extractFileValidator->isFileSizeAllowed($file, $max_filesize);
441
442
443
444
    if (!$indexable) {
      return FALSE;
    }
    // Whether a private file can be indexed or not.
445
446
    $excluded_private = $this->configuration['excluded_private'];
    $indexable = $indexable && $this->extractFileValidator->isPrivateFileAllowed($file, $excluded_private);
447
448
449
    if (!$indexable) {
      return FALSE;
    }
450
    $result = $this->moduleHandler->invokeAll(
izus's avatar
izus committed
451
        'search_api_attachments_indexable', [$file, $item, $field_name]
452
    );
453
    $indexable = !in_array(FALSE, $result, TRUE);
454
455
456
    return $indexable;
  }

457
  /**
458
   * Get the file fields of indexed bundles and an entity file general item.
459
460
   *
   * @return array
461
462
   *   An array of file field with field name as key and label as value and
   *   an element for generic file entity item.
463
   */
464
  protected function getFileFieldsAndFileEntityItems() {
izus's avatar
izus committed
465
    $file_elements = [];
466

467
    // Retrieve file fields of indexed bundles.
468
    foreach ($this->getIndex()->getDatasources() as $datasource) {
469
      if ($datasource->getPluginId() == 'entity:file') {
470
        $file_elements[static::SAA_FILE_ENTITY] = $this->t('File entity');
471
      }
472
      foreach ($datasource->getPropertyDefinitions() as $property) {
473
474
475
        if ($property instanceof FieldDefinitionInterface) {
          if ($property->getType() == 'file') {
            $file_elements[$property->getName()] = $property->getLabel();
476
          }
477
          if ($property->getType() == "entity_reference") {
478
479
480
481
482
483
            if ($property->getSetting('target_type') === 'media') {
              $settings = $property->getItemDefinition()->getSettings();
              if (isset($settings['handler_settings']['target_bundles'])) {
                // For each media bundle allowed, check if the source field is a
                // file field.
                foreach ($settings['handler_settings']['target_bundles'] as $bundle_name) {
484
485
486
487
488
489
490
491
                  if (!empty($this->entityTypeManager->getStorage('media_type')->load($bundle_name))) {
                    $bundle_configuration = $this->entityTypeManager->getStorage('media_type')->load($bundle_name)->toArray();
                    if (isset($bundle_configuration['source_configuration']['source_field'])) {
                      $source_field = $bundle_configuration['source_configuration']['source_field'];
                      $field_config = $this->entityTypeManager->getStorage('field_storage_config')->load(sprintf('media.%s', $source_field))->toArray();
                      if (isset($field_config['type']) && $field_config['type'] === 'file') {
                        $file_elements[$property->getName()] = $property->getLabel();
                      }
492
493
494
                    }
                  }
                }
495
496
497
              }
            }
          }
498
        }
izus's avatar
izus committed
499
500
      }
    }
501
    return $file_elements;
izus's avatar
izus committed
502
  }
503

504
505
506
507
508
509
510
511
  /**
   * {@inheritdoc}
   */
  public function buildConfigurationForm(array $form, FormStateInterface $form_state) {
    if (isset($this->configuration['excluded_extensions'])) {
      $default_excluded_extensions = $this->configuration['excluded_extensions'];
    }
    else {
512
      $default_excluded_extensions = ExtractFileValidator::DEFAULT_EXCLUDED_EXTENSIONS;
513
    }
izus's avatar
izus committed
514
    $form['excluded_extensions'] = [
515
516
517
518
519
520
      '#type' => 'textfield',
      '#title' => $this->t('Excluded file extensions'),
      '#default_value' => $default_excluded_extensions,
      '#size' => 80,
      '#maxlength' => 255,
      '#description' => $this->t('File extensions that are excluded from indexing. Separate extensions with a space and do not include the leading dot.<br />Example: "aif art avi bmp gif ico mov oga ogv png psd ra ram rgb flv"<br />Extensions are internally mapped to a MIME type, so it is not necessary to put variations that map to the same type (e.g. tif is sufficient for tif and tiff)'),
izus's avatar
izus committed
521
522
    ];
    $form['number_indexed'] = [
523
524
525
526
527
      '#type' => 'number',
      '#title' => $this->t('Number of files indexed per file field'),
      '#default_value' => isset($this->configuration['number_indexed']) ? $this->configuration['number_indexed'] : '0',
      '#size' => 5,
      '#min' => 0,
528
      '#max' => 999999,
529
      '#description' => $this->t('The number of files to index per file field.<br />The order of indexation is the weight in the widget.<br /> 0 for no restriction.'),
izus's avatar
izus committed
530
    ];
531
    $form['number_first_bytes'] = [
532
533
      '#type' => 'textfield',
      '#title' => $this->t('Limit size of the extracted string before indexing.'),
534
      '#default_value' => isset($this->configuration['number_first_bytes']) ? $this->configuration['number_first_bytes'] : '1 MB',
535
536
537
      '#size' => 5,
      '#min' => 0,
      '#max' => 99999,
538
      '#description' => $this->t('Enter a value like "1000", "10 KB", "10 MB" or "10 GB" in order to restrict the size of the content after extraction.<br /> "0" to index the full extracted content without bytes limitation.'),
539
    ];
izus's avatar
izus committed
540
    $form['max_filesize'] = [
541
542
543
544
545
      '#type' => 'textfield',
      '#title' => $this->t('Maximum upload size'),
      '#default_value' => isset($this->configuration['max_filesize']) ? $this->configuration['max_filesize'] : '0',
      '#description' => $this->t('Enter a value like "10 KB", "10 MB" or "10 GB" in order to restrict the max file size of files that should be indexed.<br /> Enter "0" for no limit restriction.'),
      '#size' => 10,
izus's avatar
izus committed
546
547
    ];
    $form['excluded_private'] = [
548
549
      '#type' => 'checkbox',
      '#title' => $this->t('Exclude private files'),
550
      '#default_value' => isset($this->configuration['excluded_private']) ? $this->configuration['excluded_private'] : TRUE,
551
      '#description' => $this->t('Check this box if you want to exclude private files from being indexed.'),
izus's avatar
izus committed
552
    ];
553
554
555
    return $form;
  }

556
  /**
557
558
559
560
561
562
563
564
565
   * Form validation handler.
   *
   * @param array $form
   *   An associative array containing the structure of the plugin form as built
   *   by static::buildConfigurationForm().
   * @param \Drupal\Core\Form\FormStateInterface $form_state
   *   The current state of the complete form.
   *
   * @see \Drupal\Core\Plugin\PluginFormInterface::validateConfigurationForm()
566
567
   */
  public function validateConfigurationForm(array &$form, FormStateInterface $form_state) {
izus's avatar
izus committed
568
    // Validate 'number_first_bytes'.
569
570
571
572
573
574
    $number_first_bytes = trim($form_state->getValue('number_first_bytes'));
    $error = $this->validateSize($number_first_bytes);
    if ($error) {
      $form_state->setError($form['number_first_bytes'], $this->t('The size limit option must contain a valid value. You may either enter "0" (for no restriction) or a string like "10 KB", "10 MB" or "10 GB".'));
    }

izus's avatar
izus committed
575
    // Validate 'max_filesize'.
576
577
578
579
580
581
    $max_filesize = trim($form_state->getValue('max_filesize'));
    $error = $this->validateSize($max_filesize);
    if ($error) {
      $form_state->setError($form['max_filesize'], $this->t('The max filesize option must contain a valid value. You may either enter "0" (for no restriction) or a string like "10 KB", "10 MB" or "10 GB".'));
    }
  }
582

583
584
  /**
   * Helper method to validate the size of files' format.
izus's avatar
izus committed
585
   *
586
   * @param string $bytes
587
   *   Number of bytes.
izus's avatar
izus committed
588
589
   *
   * @return bool
590
   *   TRUE if $bites is of form "N KB", "N MB" or "N GB" where N is integer.
591
592
593
594
   */
  public function validateSize($bytes) {
    $error = FALSE;
    if ($bytes != '0') {
595

596
597
      $size_info = explode(' ', $bytes);
      // The only case we can have count($size_info) == 1 is for '0' value.
598
      if (count($size_info) == 1) {
599
        $error = $size_info[0] != '0';
600
      }
601
      elseif (count($size_info) != 2) {
602
603
604
605
606
607
608
        $error = TRUE;
      }
      else {
        $starts_integer = is_int((int) $size_info[0]);
        $unit_expected = in_array($size_info[1], ['KB', 'MB', 'GB']);
        $error = !$starts_integer || !$unit_expected;
      }
609
    }
610
    return $error;
611
612
  }

613
  /**
614
615
616
617
618
619
620
621
622
   * Form submission handler.
   *
   * @param array $form
   *   An associative array containing the structure of the plugin form as built
   *   by static::buildConfigurationForm().
   * @param \Drupal\Core\Form\FormStateInterface $form_state
   *   The current state of the complete form.
   *
   * @see \Drupal\Core\Plugin\PluginFormInterface::submitConfigurationForm()
623
624
625
626
   */
  public function submitConfigurationForm(array &$form, FormStateInterface $form_state) {
    $excluded_extensions = $form_state->getValue('excluded_extensions');
    $excluded_extensions_array = explode(' ', $excluded_extensions);
627
    $excluded_mimes_array = $this->extractFileValidator->getExcludedMimes($excluded_extensions_array);
628
    $excluded_mimes_string = implode(' ', $excluded_mimes_array);
629
    $this->setConfiguration($form_state->getValues() + ['excluded_mimes' => $excluded_mimes_string]);
630
631
  }

632
}