project
entity_to_text

Repository

$ composer require drupal/entity_to_text
/** @var string $field_body_content */
$field_body_content = \Drupal::service('entity_to_text.extractor.node_to_text')->fromFieldtoText('body', $node);
/** @var string $field_foo_content */
$field_foo_content = \Drupal::service('entity_to_text.extractor.node_to_text')->fromFieldtoText('field_foo', $node);
/** @var array[] $bodies */
$bodies = \Drupal::service('entity_to_text_paragraphs.extractor.paragraphs_to_text')->fromParagraphToText($node->field_paragraphs);
/**
 * Apache Tika connection.
 */
$settings['entity_to_text_tika.connection']['host'] = 'tika';
$settings['entity_to_text_tika.connection']['port'] = '9998';
/** @var \Drupal\file\Entity\File $file */
$file = $file_item->entity;
$body = \Drupal::service('entity_to_text_tika.extractor.file_to_text')->fromFileToText($file, 'eng+fra');
// Anywhere at least once in the code (Eg. module.install) in order to prepare the storage.
\Drupal::service('entity_to_text_tika.storage.local_file')->prepareStorage();

// Load the already OCR'ed file if possible to avoid unecessary calls to Tika.
$body = \Drupal::service('entity_to_text_tika.storage.local_file')->load($file, 'eng+fra');

if (!$body) {
  // When the OCR'ed file is not available, then run Tika over it and store it for the next run.
  $body = \Drupal::service('entity_to_text_tika.extractor.file_to_text')->fromFileToText($file, 'eng+fra');
  // Save the OCR'ed file for the next run.
  \Drupal::service('entity_to_text_tika.storage.local_file')->save($file, $body, 'eng+fra');
}
# Warmup all files that does not already have an associated .ocr file.
drush e2t:t:w
# Warmup all files even if the files has already been processed before.
drush e2t:t:w --force
# Warmup the file with FID 2.
drush e2t:t:w --fid=2