CSV.php 9.27 KB
Newer Older
1 2 3 4
<?php

namespace Drupal\migrate_source_csv\Plugin\migrate\source;

5
use Drupal\Component\Plugin\ConfigurableInterface;
6
use Drupal\Component\Utility\NestedArray;
7
use Drupal\migrate\Plugin\MigrationInterface;
8 9
use Drupal\migrate\Plugin\migrate\source\SourcePluginBase;
use League\Csv\Reader;
10 11

/**
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
 * Source for CSV files.
 *
 * Available configuration options:
 * - path: Path to the  CSV file. File streams are supported.
 * - ids: Array of column names that uniquely identify each record.
 * - header_offset: (optional) The record to be used as the CSV header and the
 *   thereby each record's field name. Defaults to 0 and because records are
 *   zero indexed. Can be set to null to indicate no header record.
 * - fields: (optional) nested array of names and labels to use instead of a
 *   header record. Will overwrite values provided by header record. If used,
 *   name is required. If no label is provided, name is used instead for the
 *   field description.
 * - delimiter: (optional) The field delimiter (one character only). Defaults to
 *   a comma (,).
 * - enclosure: (optional) The field enclosure character (one character only).
 *   Defaults to double quote marks.
 * - escape: (optional) The field escape character (one character only).
 *   Defaults to a backslash (\).
30 31 32 33
 * - create_record_number: (optional) Boolean value specifying whether to create
 *   an incremented value for each record in the file. Defaults to FALSE.
 * - record_number_field: (optional) The name of a field that holds an
 *   incremented value for each record in the file. Defaults to record_num.
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
 *
 * @codingStandardsIgnoreStart
 *
 * Example with minimal options:
 * @code
 * source:
 *   plugin: csv
 *   path: /tmp/countries.csv
 *   ids: [id]
 *
 * # countries.csv
 * id,country
 * 1,Nicaragua
 * 2,Spain
 * 3,United States
 * @endcode
 *
 * In this example above, the migration source will use a single-column id using the
 * value from the 'id' column of the CSV file.
53
 *
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
 * Example with most options configured:
 * @code
 * source:
 *   plugin: csv
 *   path: /tmp/countries.csv
 *   ids: [id]
 *   delimiter: '|'
 *   enclosure: "'"
 *   escape: '`'
 *   header_offset: null
 *   fields:
 *     -
 *       name: id
 *       label: ID
 *     -
 *       name: country
 *       label: Country
 *
 * # countries.csv
 * 'really long string that makes this unique'|'United States'
 * 'even longer really long string that makes this unique'|'Nicaragua'
 * 'even more longer really long string that makes this unique'|'Spain'
 * 'escaped data'|'one`'s country'
 * @endcode
 *
 * In this example above, we override the default character controls for delimiter,
 * enclosure and escape. We also set a null header offset to indicate no header.
 *
 * @codingStandardsIgnoreEnd
 *
 * @see http://php.net/manual/en/splfileobject.setcsvcontrol.php
85 86
 *
 * @MigrateSource(
87 88
 *   id = "csv",
 *   source_module = "migrate_source_csv"
89 90
 * )
 */
91
class CSV extends SourcePluginBase implements ConfigurableInterface {
92 93

  /**
94
   * {@inheritdoc}
95
   *
96 97
   * @throws \InvalidArgumentException
   * @throws \Drupal\migrate\MigrateException
98
   */
99 100 101
  public function __construct(array $configuration, $plugin_id, $plugin_definition, MigrationInterface $migration) {
    parent::__construct($configuration, $plugin_id, $plugin_definition, $migration);
    $this->setConfiguration($configuration);
102

103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
    // Path is required.
    if (empty($this->configuration['path'])) {
      throw new \InvalidArgumentException('You must declare the "path" to the source CSV file in your source settings.');
    }
    // IDs are required.
    if (empty($this->configuration['ids']) || !is_array($this->configuration['ids'])) {
      throw new \InvalidArgumentException('You must declare "ids" as a unique array of fields in your source settings.');
    }
    // IDs must be an array of strings.
    if ($this->configuration['ids'] !== array_unique(array_filter($this->configuration['ids'], 'is_string'))) {
      throw new \InvalidArgumentException('The ids must a flat array with unique string values.');
    }
    // CSV character control characters must be exactly 1 character.
    foreach (['delimiter', 'enclosure', 'escape'] as $character) {
      if (1 !== strlen($this->configuration[$character])) {
        throw new \InvalidArgumentException(sprintf('%s must be a single character; %s given', $character, $this->configuration[$character]));
      }
    }
    // The configuration "header_offset" must be null or an integer.
    if (!(NULL === $this->configuration['header_offset'] || is_int($this->configuration['header_offset']))) {
      throw new \InvalidArgumentException('The configuration "header_offset" must be null or an integer.');
    }
    // The configuration "header_offset" must be greater or equal to 0.
    if (NULL !== $this->configuration['header_offset'] && 0 > $this->configuration['header_offset']) {
      throw new \InvalidArgumentException('The configuration "header_offset" must be greater or equal to 0.');
    }
    // If set, all fields must have a least a defined "name" property.
    if ($this->configuration['fields']) {
      foreach ($this->configuration['fields'] as $delta => $field) {
        if (!isset($field['name'])) {
          throw new \InvalidArgumentException(sprintf('The "name" configuration for "fields" in index position %s is not defined.', $delta));
        }
      }
    }
137 138 139 140 141
    // If "create_record_number" is specified, "record_number_field" must be a
    // non-empty string.
    if ($this->configuration['create_record_number'] && (!is_scalar($this->configuration['record_number_field']) || (empty($this->configuration['record_number_field'])))) {
      throw new \InvalidArgumentException('The configuration "record_number_field" must be a non-empty string.');
    }
142
  }
143

144
  /**
145
   * {@inheritdoc}
146
   */
147 148 149 150 151 152 153 154 155
  public function defaultConfiguration() {
    return [
      'path' => '',
      'ids' => [],
      'header_offset' => 0,
      'fields' => [],
      'delimiter' => ",",
      'enclosure' => "\"",
      'escape' => "\\",
156 157
      'create_record_number' => FALSE,
      'record_number_field' => 'record_number',
158 159
    ];
  }
160 161

  /**
162
   * {@inheritdoc}
163
   */
164 165 166
  public function getConfiguration() {
    return $this->configuration;
  }
167

168 169 170
  /**
   * {@inheritdoc}
   */
171 172 173
  public function setConfiguration(array $configuration) {
    // We must preserve integer keys for column_name mapping.
    $this->configuration = NestedArray::mergeDeepArray([$this->defaultConfiguration(), $configuration], TRUE);
174 175 176
  }

  /**
177
   * Return a string representing the source file path.
178 179 180 181 182
   *
   * @return string
   *   The file path.
   */
  public function __toString() {
183
    return $this->configuration['path'];
184 185 186 187
  }

  /**
   * {@inheritdoc}
188
   *
189 190
   * @throws \Drupal\migrate\MigrateException
   * @throws \League\Csv\Exception
191
   */
192 193 194 195 196 197
  public function initializeIterator() {
    $header = $this->getReader()->getHeader();
    if ($this->configuration['fields']) {
      // If there is no header record, we need to flip description and name so
      // the name becomes the header record.
      $header = array_flip($this->fields());
198
    }
199
    return $this->getGenerator($this->getReader()->getRecords($header));
200 201 202 203 204
  }

  /**
   * {@inheritdoc}
   */
205
  public function getIds() {
206
    $ids = [];
207 208
    foreach ($this->configuration['ids'] as $value) {
      $ids[$value]['type'] = 'string';
209 210 211 212 213 214 215 216
    }
    return $ids;
  }

  /**
   * {@inheritdoc}
   */
  public function fields() {
217 218 219 220
    // If fields are not defined, use the header record.
    if (empty($this->configuration['fields'])) {
      $header = $this->getReader()->getHeader();
      return array_combine($header, $header);
221
    }
222 223 224
    $fields = [];
    foreach ($this->configuration['fields'] as $field) {
      $fields[$field['name']] = isset($field['label']) ? $field['label'] : $field['name'];
225 226 227 228
    }
    return $fields;
  }

229
  /**
230
   * Get the generator.
231
   *
232 233 234 235 236 237 238 239 240
   * @param \Iterator $records
   *   The CSV records.
   *
   * @codingStandardsIgnoreStart
   *
   * @return \Generator
   *   The records generator.
   *
   * @codingStandardsIgnoreEnd
241
   */
242
  protected function getGenerator(\Iterator $records) {
243
    $record_num = $this->configuration['header_offset'] ?? 0;
244
    foreach ($records as $record) {
245 246 247
      if ($this->configuration['create_record_number']) {
        $record[$this->configuration['record_number_field']] = ++$record_num;
      }
248 249
      yield $record;
    }
250 251 252
  }

  /**
253 254 255 256 257 258 259
   * Get the CSV reader.
   *
   * @return \League\Csv\Reader
   *   The reader.
   *
   * @throws \Drupal\migrate\MigrateException
   * @throws \League\Csv\Exception
260
   */
261 262 263 264 265 266 267
  protected function getReader() {
    $reader = $this->createReader();
    $reader->setDelimiter($this->configuration['delimiter']);
    $reader->setEnclosure($this->configuration['enclosure']);
    $reader->setEscape($this->configuration['escape']);
    $reader->setHeaderOffset($this->configuration['header_offset']);
    return $reader;
268 269 270
  }

  /**
271 272 273 274
   * Construct a new CSV reader.
   *
   * @return \League\Csv\Reader
   *   The reader.
275
   */
276
  protected function createReader() {
277 278 279 280 281 282 283 284
    if (!file_exists($this->configuration['path'])) {
      throw new \RuntimeException(sprintf('File "%s" was not found.', $this->configuration['path']));
    }
    $csv = fopen($this->configuration['path'], 'r');
    if (!$csv) {
      throw new \RuntimeException(sprintf('File "%s" could not be opened.', $this->configuration['path']));
    }
    return Reader::createFromStream($csv);
285
  }
286

287
}