Commit e143d33f authored by amontero's avatar amontero Committed by Alejandro Garza
Browse files

Issue #2427473 by amontero: Allow customization of remote server Tika extraction servlet

parent 43a8f969
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
Apache Solr Attachments 7.x-1.x-dev, xxxx-xx-xx
------------------------------

Apache Solr Attachments 7.x-1.5, 2022-04-28
------------------------------
#2427473 by amontero: Allow customization of remote server Tika extraction servlet

Apache Solr Attachments 7.x-1.4, 2015-02-25
------------------------------
#2195095 by thtas, David_Rothstein: Error when trying using a file field on entities on than nodes
+14 −0
Original line number Diff line number Diff line
@@ -69,6 +69,20 @@ function apachesolr_attachments_settings($form, &$form_state, $env_id) {
    '#description' => t("The name of the tika CLI application jar file, e.g. tika-app-1.1.jar."),
    '#default_value' => variable_get('apachesolr_attachments_tika_jar', 'tika-app-1.1.jar'),
  );
  $form['apachesolr_attachments_extracting_servlet_path'] = array(
    '#type' => 'textfield',
    '#title' => t('Tika extracting servlet path'),
    '#size' => 100,
    '#description' => t('URL path of Apache Solr remote server extraction servlet, e.g. extract/tika.'),
    '#default_value' => variable_get('apachesolr_attachments_extracting_servlet_path', DEFAULT_EXTRACTING_SERVLET),
  );
  $form['apachesolr_attachments_extracting_servlet_params'] = array(
    '#type' => 'textfield',
    '#title' => t('Tika extracting servlet parameters'),
    '#size' => 100,
    '#description' => t('Comma-separated name=value parameter pairs to send to remote server extraction servlet, e.g. extractOnly=true.'),
    '#default_value' => variable_get('apachesolr_attachments_extracting_servlet_params'),
  );

  $form = system_settings_form($form);
  $form['#validate'][] = 'apachesolr_attachments_settings_validate';
+10 −1
Original line number Diff line number Diff line
@@ -162,6 +162,14 @@ function apachesolr_attachments_extract_using_solr($filepath) {
    'resource.name' => $filename,
    'extractFormat' => 'text', // Matches the -t command for the tika CLI app.
  );
  $extra_params = variable_get('apachesolr_attachments_extracting_servlet_params');
  if (!empty($extra_params)) {
    foreach (explode('&', $extra_params) as $value) {
      $extra_param = explode('=', $value);
      $params += array($extra_param[0] => $extra_param[1]);
    }
  }

  // Construct a multi-part form-data POST body in $data.
  $boundary = '--' . hash('sha256', uniqid(REQUEST_TIME));
  $data = "--{$boundary}\r\n";
@@ -176,7 +184,8 @@ function apachesolr_attachments_extract_using_solr($filepath) {
    'headers' => $headers,
    'data' => $data,
  );
  $response = $solr->makeServletRequest(EXTRACTING_SERVLET, $params, $options);
  $servlet = variable_get('apachesolr_attachments_extracting_servlet_path', DEFAULT_EXTRACTING_SERVLET);
  $response = $solr->makeServletRequest($servlet, $params, $options);
  return array($response->extracted, $response->extracted_metadata);
}

+1 −1
Original line number Diff line number Diff line
@@ -5,7 +5,7 @@
 * Provides a file attachment search implementation for use with the Apache Solr module
 */

define('EXTRACTING_SERVLET', 'extract/tika');
define('DEFAULT_EXTRACTING_SERVLET', 'extract/tika');
/**
 * Implements hook_menu().
 */