Skip to content
Snippets Groups Projects
Commit 9b6c58b8 authored by Florent Torregrosa's avatar Florent Torregrosa Committed by Florent Torregrosa
Browse files

Issue #3130629 by Grimreaper: Library dependency system. Use symfony/process...

Issue #3130629 by Grimreaper: Library dependency system. Use symfony/process instead of php shell commands. Test file now have spaces and UTF-8 characters. Now need to use Composer 2.
parent c2b62ce9
No related branches found
Tags 4.0.0
No related merge requests found
......@@ -34,8 +34,8 @@ More information on the module origins on: https://www.drupal.org/node/3126845
REQUIREMENTS
------------
Each extractor plugin can require different modules, if the requirements are not
satisfied the plugin doesn't show up in the settings.
Each extractor plugin can require different modules or libraries, if the
requirements are not satisfied the plugin doesn't show up in the settings.
Each extractor plugin can require different binary on your server, when
configuring the extraction, a test will be done to see if the extraction works.
......@@ -46,7 +46,7 @@ extractor plugins.
INSTALLATION
------------
* Install and enable this module like any other drupal 8 module.
Composer 2.x is required to install this module.
CONFIGURATION
......
......@@ -3,7 +3,13 @@
"type": "drupal-module",
"description": "Extract file entity content.",
"license": "GPL-2.0-or-later",
"require": {
"composer-runtime-api": "^2.0"
},
"require-dev": {
"symfony/process": "^3.0 || ^4.0 || ^5.0"
},
"suggest": {
"symfony/process": "To be able to use some provided extraction plugins."
}
}
File added
File deleted
......@@ -43,4 +43,11 @@ class FileExtractorExtractor extends Plugin {
*/
public $dependencies;
/**
* The name of Composer packages that are required for this plugin.
*
* @var array
*/
public $packageDependencies;
}
......@@ -224,40 +224,4 @@ abstract class ExtractorPluginBase extends PluginBase implements ExtractorPlugin
return TRUE;
}
/**
* Helper function to run shell command.
*
* @param string $command
* The command. This will be passed to escapeshellcmd().
* @param array $command_arguments
* The command arguments. They will have to be manually passed to
* escapeshellarg() if needed.
* @param string $command_prefix
* A prefix to the command. Will no go through escapeshellcmd().
* You must had a space at the end because by default there is no space
* between $command_prefix and $command when concatenating.
*
* @return string|null
* The shell_exec return.
*/
protected function execExtraction(string $command, array $command_arguments = [], string $command_prefix = '') {
// UTF-8 multibyte characters will be stripped by escapeshellargs() for
// the default C-locale.
// So temporarily set the locale to UTF-8 so that the filepath remains
// valid.
/** @var string $backup_locale */
$backup_locale = setlocale(LC_CTYPE, '0');
setlocale(LC_CTYPE, 'en_US.UTF-8');
$cmd = $command_prefix . escapeshellcmd($command) . ' ' . implode(' ', $command_arguments);
// Restore the locale.
setlocale(LC_CTYPE, $backup_locale);
// Support UTF-8 commands.
// @see http://www.php.net/manual/en/function.shell-exec.php#85095
shell_exec("LANG=en_US.utf-8");
return shell_exec($cmd);
}
}
......@@ -4,6 +4,7 @@ declare(strict_types = 1);
namespace Drupal\file_extractor\Extractor;
use Composer\InstalledVersions;
use Drupal\Core\Cache\CacheBackendInterface;
use Drupal\Core\Extension\ModuleHandlerInterface;
use Drupal\Core\Plugin\DefaultPluginManager;
......@@ -44,6 +45,16 @@ class ExtractorPluginManager extends DefaultPluginManager {
}
}
}
// Check Composer packages dependencies.
if (!empty($definition_info['packageDependencies'])) {
foreach ($definition_info['packageDependencies'] as $package_dependency) {
if (!InstalledVersions::isInstalled($package_dependency)) {
unset($definitions[$definition_key]);
continue;
}
}
}
}
parent::alterDefinitions($definitions);
......
......@@ -8,6 +8,7 @@ use Drupal\Core\Form\FormStateInterface;
use Drupal\Core\Plugin\PluginFormInterface;
use Drupal\file\FileInterface;
use Drupal\file_extractor\Extractor\ExtractorPluginBase;
use Symfony\Component\Process\Process;
/**
* Provides docconv extractor.
......@@ -16,6 +17,7 @@ use Drupal\file_extractor\Extractor\ExtractorPluginBase;
* id = "docconv_extractor",
* label = @Translation("Docconv Extractor"),
* description = @Translation("Adds Docconv extractor support."),
* packageDependencies = {"symfony/process"},
* )
*/
class DocconvExtractor extends ExtractorPluginBase implements PluginFormInterface {
......@@ -71,15 +73,19 @@ class DocconvExtractor extends ExtractorPluginBase implements PluginFormInterfac
return '';
}
$output = $this->execExtraction($docconv_path, [
$extraction_process = new Process([
$docconv_path,
'-input',
escapeshellcmd($file_path),
$file_path,
]);
if (is_null($output)) {
$this->logger->error('An error occurred during the extraction of the file @file_path with the binary @binary_path.', $log_variables);
$extraction_process->run();
if (!$extraction_process->isSuccessful()) {
$log_variables['@error_message'] = $extraction_process->getErrorOutput();
$this->logger->error('An error occurred during the extraction of the file @file_path with the binary @binary_path. The error was: @error_message.', $log_variables);
return '';
}
return $output;
return $extraction_process->getOutput();
}
}
......@@ -8,6 +8,7 @@ use Drupal\Core\Form\FormStateInterface;
use Drupal\Core\Plugin\PluginFormInterface;
use Drupal\file\FileInterface;
use Drupal\file_extractor\Extractor\ExtractorPluginBase;
use Symfony\Component\Process\Process;
/**
* Provides pdftotext extractor.
......@@ -16,6 +17,7 @@ use Drupal\file_extractor\Extractor\ExtractorPluginBase;
* id = "pdftotext_extractor",
* label = @Translation("Pdftotext Extractor"),
* description = @Translation("Adds Pdftotext extractor support."),
* packageDependencies = {"symfony/process"},
* )
*/
class PdftotextExtractor extends ExtractorPluginBase implements PluginFormInterface {
......@@ -76,17 +78,21 @@ class PdftotextExtractor extends ExtractorPluginBase implements PluginFormInterf
return '';
}
$output = $this->execExtraction($pdftotext_path, [
escapeshellarg($file_path),
$extraction_process = new Process([
$pdftotext_path,
$file_path,
// Pdftotext description states that '-' as text-file will send text to
// stdout.
'-',
]);
if (is_null($output)) {
$this->logger->error('An error occurred during the extraction of the file @file_path with the binary @binary_path.', $log_variables);
$extraction_process->run();
if (!$extraction_process->isSuccessful()) {
$log_variables['@error_message'] = $extraction_process->getErrorOutput();
$this->logger->error('An error occurred during the extraction of the file @file_path with the binary @binary_path. The error was: @error_message.', $log_variables);
return '';
}
return $output;
return $extraction_process->getOutput();
}
}
......@@ -8,6 +8,7 @@ use Drupal\Core\Form\FormStateInterface;
use Drupal\Core\Plugin\PluginFormInterface;
use Drupal\file\FileInterface;
use Drupal\file_extractor\Extractor\ExtractorPluginBase;
use Symfony\Component\Process\Process;
/**
* Provides Python pdf2text extractor.
......@@ -16,6 +17,7 @@ use Drupal\file_extractor\Extractor\ExtractorPluginBase;
* id = "python_pdf2txt_extractor",
* label = @Translation("Python Pdf2txt Extractor"),
* description = @Translation("Adds Python Pdf2txt extractor support."),
* packageDependencies = {"symfony/process"},
* )
*/
class PythonPdf2txtExtractor extends ExtractorPluginBase implements PluginFormInterface {
......@@ -98,16 +100,22 @@ class PythonPdf2txtExtractor extends ExtractorPluginBase implements PluginFormIn
return '';
}
$output = $this->execExtraction($python_path, [
escapeshellarg($python_pdf2txt_script),
'-C -t text',
escapeshellarg($file_path),
$extraction_process = new Process([
$python_path,
$python_pdf2txt_script,
'-C',
'-t',
'text',
$file_path,
]);
if (is_null($output)) {
$this->logger->error('An error occurred during the extraction of the file @file_path with the binary @binary_path.', $log_variables);
$extraction_process->run();
if (!$extraction_process->isSuccessful()) {
$log_variables['@error_message'] = $extraction_process->getErrorOutput();
$this->logger->error('An error occurred during the extraction of the file @file_path with the binary @binary_path. The error was: @error_message.', $log_variables);
return '';
}
return $output;
return $extraction_process->getOutput();
}
}
......@@ -8,6 +8,7 @@ use Drupal\Core\Form\FormStateInterface;
use Drupal\Core\Plugin\PluginFormInterface;
use Drupal\file\FileInterface;
use Drupal\file_extractor\Extractor\ExtractorPluginBase;
use Symfony\Component\Process\Process;
/**
* Provides tika extractor.
......@@ -16,6 +17,7 @@ use Drupal\file_extractor\Extractor\ExtractorPluginBase;
* id = "tika_extractor",
* label = @Translation("Tika Extractor"),
* description = @Translation("Adds Tika extractor support."),
* packageDependencies = {"symfony/process"},
* )
*/
class TikaExtractor extends ExtractorPluginBase implements PluginFormInterface {
......@@ -69,13 +71,12 @@ class TikaExtractor extends ExtractorPluginBase implements PluginFormInterface {
$java_path = $form_state->getValue('java_path');
$tika_path = $form_state->getValue('tika_path');
$tika_config_path = $form_state->getValue('tika_config_path');
$output = [];
$return_code = NULL;
// Check Java path.
exec($java_path, $output, $return_code);
// $return_code = 127 if it fails. 1 instead.
if ($return_code != 1) {
$java_process = new Process([$java_path]);
$java_process->run();
// Exit code equals 127 if it fails. 1 instead.
if ($java_process->getExitCode() != 1) {
$form_state->setError($form['java_path'], $this->t('Invalid path or filename %path for Java binary.', ['%path' => $java_path]));
return;
}
......@@ -86,10 +87,16 @@ class TikaExtractor extends ExtractorPluginBase implements PluginFormInterface {
}
// Check return code.
else {
$cmd = $java_path . ' -jar ' . escapeshellarg($tika_path) . ' -V';
exec($cmd, $output, $return_code);
// $return_code = 1 if it fails. 0 instead.
if ($return_code) {
$tika_process = new Process([
$java_path,
'-jar',
$tika_path,
'-V',
]);
$tika_process->run();
// Exit code equals 1 if it fails. 0 instead.
if ($tika_process->getExitCode()) {
$form_state->setError($form['tika_path'], $this->t('Tika could not be reached and executed.'));
}
}
......@@ -126,33 +133,39 @@ class TikaExtractor extends ExtractorPluginBase implements PluginFormInterface {
return '';
}
$command_prefix = '';
$process_arguments = [];
$extension_dir = ini_get('extension_dir') ?: '';
if (strpos($extension_dir, 'MAMP/')) {
$command_prefix = 'export DYLD_LIBRARY_PATH=""; ';
$process_arguments[] = 'export DYLD_LIBRARY_PATH="";';
}
$command_arguments = [];
$process_arguments[] = $java_path;
// Force running the Tika jar headless.
$command_arguments[] = '-Djava.awt.headless=true';
$process_arguments[] = '-Djava.awt.headless=true';
if ($file->getMimeType() != 'audio/mpeg') {
$command_arguments[] = '-Dfile.encoding=UTF8 -cp';
$command_arguments[] = escapeshellarg($tika_path);
$process_arguments[] = '-Dfile.encoding=UTF8';
$process_arguments[] = '-cp';
$process_arguments[] = $tika_path;
}
$command_arguments[] = '-jar';
$command_arguments[] = escapeshellarg($tika_path);
$process_arguments[] = '-jar';
$process_arguments[] = $tika_path;
if (!empty($tika_config_path)) {
$command_arguments[] = '--config=' . escapeshellarg($tika_config_path);
$process_arguments[] = '--config=' . $tika_config_path;
}
$command_arguments[] = '-t';
$command_arguments[] = escapeshellarg($file_path);
$process_arguments[] = '-t';
$process_arguments[] = $file_path;
$extraction_process = new Process($process_arguments);
$extraction_process->run();
$output = $this->execExtraction($java_path, $command_arguments, $command_prefix);
if (is_null($output)) {
$this->logger->error('An error occurred during the extraction of the file @file_path with the binary @binary_path.', $log_variables);
if (!$extraction_process->isSuccessful()) {
$log_variables['@error_message'] = $extraction_process->getErrorOutput();
$this->logger->error('An error occurred during the extraction of the file @file_path with the binary @binary_path. The error was: @error_message.', $log_variables);
return '';
}
return $output;
return $extraction_process->getOutput();
}
}
......@@ -14,7 +14,7 @@ interface TestFileHelperInterface {
/**
* Name of the file used for testing.
*/
const TEST_FILENAME = 'file_extractor_test_extraction.pdf';
const TEST_FILENAME = 'file_extractor test_extraction $.pdf';
/**
* URI of the file used for testing.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment