CSV.php 8.3 KB
Newer Older
1 2 3 4
<?php

namespace Drupal\migrate_source_csv\Plugin\migrate\source;

5
use Drupal\Component\Plugin\ConfigurableInterface;
6
use Drupal\Component\Utility\NestedArray;
7
use Drupal\migrate\Plugin\MigrationInterface;
8 9
use Drupal\migrate\Plugin\migrate\source\SourcePluginBase;
use League\Csv\Reader;
10 11

/**
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
 * Source for CSV files.
 *
 * Available configuration options:
 * - path: Path to the  CSV file. File streams are supported.
 * - ids: Array of column names that uniquely identify each record.
 * - header_offset: (optional) The record to be used as the CSV header and the
 *   thereby each record's field name. Defaults to 0 and because records are
 *   zero indexed. Can be set to null to indicate no header record.
 * - fields: (optional) nested array of names and labels to use instead of a
 *   header record. Will overwrite values provided by header record. If used,
 *   name is required. If no label is provided, name is used instead for the
 *   field description.
 * - delimiter: (optional) The field delimiter (one character only). Defaults to
 *   a comma (,).
 * - enclosure: (optional) The field enclosure character (one character only).
 *   Defaults to double quote marks.
 * - escape: (optional) The field escape character (one character only).
 *   Defaults to a backslash (\).
 *
 * @codingStandardsIgnoreStart
 *
 * Example with minimal options:
 * @code
 * source:
 *   plugin: csv
 *   path: /tmp/countries.csv
 *   ids: [id]
 *
 * # countries.csv
 * id,country
 * 1,Nicaragua
 * 2,Spain
 * 3,United States
 * @endcode
 *
 * In this example above, the migration source will use a single-column id using the
 * value from the 'id' column of the CSV file.
49
 *
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
 * Example with most options configured:
 * @code
 * source:
 *   plugin: csv
 *   path: /tmp/countries.csv
 *   ids: [id]
 *   delimiter: '|'
 *   enclosure: "'"
 *   escape: '`'
 *   header_offset: null
 *   fields:
 *     -
 *       name: id
 *       label: ID
 *     -
 *       name: country
 *       label: Country
 *
 * # countries.csv
 * 'really long string that makes this unique'|'United States'
 * 'even longer really long string that makes this unique'|'Nicaragua'
 * 'even more longer really long string that makes this unique'|'Spain'
 * 'escaped data'|'one`'s country'
 * @endcode
 *
 * In this example above, we override the default character controls for delimiter,
 * enclosure and escape. We also set a null header offset to indicate no header.
 *
 * @codingStandardsIgnoreEnd
 *
 * @see http://php.net/manual/en/splfileobject.setcsvcontrol.php
81 82
 *
 * @MigrateSource(
83 84
 *   id = "csv",
 *   source_module = "migrate_source_csv"
85 86
 * )
 */
87
class CSV extends SourcePluginBase implements ConfigurableInterface {
88 89

  /**
90
   * {@inheritdoc}
91
   *
92 93
   * @throws \InvalidArgumentException
   * @throws \Drupal\migrate\MigrateException
94
   */
95 96 97
  public function __construct(array $configuration, $plugin_id, $plugin_definition, MigrationInterface $migration) {
    parent::__construct($configuration, $plugin_id, $plugin_definition, $migration);
    $this->setConfiguration($configuration);
98

99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
    // Path is required.
    if (empty($this->configuration['path'])) {
      throw new \InvalidArgumentException('You must declare the "path" to the source CSV file in your source settings.');
    }
    // IDs are required.
    if (empty($this->configuration['ids']) || !is_array($this->configuration['ids'])) {
      throw new \InvalidArgumentException('You must declare "ids" as a unique array of fields in your source settings.');
    }
    // IDs must be an array of strings.
    if ($this->configuration['ids'] !== array_unique(array_filter($this->configuration['ids'], 'is_string'))) {
      throw new \InvalidArgumentException('The ids must a flat array with unique string values.');
    }
    // CSV character control characters must be exactly 1 character.
    foreach (['delimiter', 'enclosure', 'escape'] as $character) {
      if (1 !== strlen($this->configuration[$character])) {
        throw new \InvalidArgumentException(sprintf('%s must be a single character; %s given', $character, $this->configuration[$character]));
      }
    }
    // The configuration "header_offset" must be null or an integer.
    if (!(NULL === $this->configuration['header_offset'] || is_int($this->configuration['header_offset']))) {
      throw new \InvalidArgumentException('The configuration "header_offset" must be null or an integer.');
    }
    // The configuration "header_offset" must be greater or equal to 0.
    if (NULL !== $this->configuration['header_offset'] && 0 > $this->configuration['header_offset']) {
      throw new \InvalidArgumentException('The configuration "header_offset" must be greater or equal to 0.');
    }
    // If set, all fields must have a least a defined "name" property.
    if ($this->configuration['fields']) {
      foreach ($this->configuration['fields'] as $delta => $field) {
        if (!isset($field['name'])) {
          throw new \InvalidArgumentException(sprintf('The "name" configuration for "fields" in index position %s is not defined.', $delta));
        }
      }
    }
  }
134

135
  /**
136
   * {@inheritdoc}
137
   */
138 139 140 141 142 143 144 145 146 147 148
  public function defaultConfiguration() {
    return [
      'path' => '',
      'ids' => [],
      'header_offset' => 0,
      'fields' => [],
      'delimiter' => ",",
      'enclosure' => "\"",
      'escape' => "\\",
    ];
  }
149 150

  /**
151
   * {@inheritdoc}
152
   */
153 154 155
  public function getConfiguration() {
    return $this->configuration;
  }
156

157 158 159
  /**
   * {@inheritdoc}
   */
160 161 162
  public function setConfiguration(array $configuration) {
    // We must preserve integer keys for column_name mapping.
    $this->configuration = NestedArray::mergeDeepArray([$this->defaultConfiguration(), $configuration], TRUE);
163 164 165
  }

  /**
166
   * Return a string representing the source file path.
167 168 169 170 171
   *
   * @return string
   *   The file path.
   */
  public function __toString() {
172
    return $this->configuration['path'];
173 174 175 176
  }

  /**
   * {@inheritdoc}
177
   *
178 179
   * @throws \Drupal\migrate\MigrateException
   * @throws \League\Csv\Exception
180
   */
181 182 183 184 185 186
  public function initializeIterator() {
    $header = $this->getReader()->getHeader();
    if ($this->configuration['fields']) {
      // If there is no header record, we need to flip description and name so
      // the name becomes the header record.
      $header = array_flip($this->fields());
187
    }
188
    return $this->getGenerator($this->getReader()->getRecords($header));
189 190 191 192 193
  }

  /**
   * {@inheritdoc}
   */
194
  public function getIds() {
195
    $ids = [];
196 197
    foreach ($this->configuration['ids'] as $value) {
      $ids[$value]['type'] = 'string';
198 199 200 201 202 203 204 205
    }
    return $ids;
  }

  /**
   * {@inheritdoc}
   */
  public function fields() {
206 207 208 209
    // If fields are not defined, use the header record.
    if (empty($this->configuration['fields'])) {
      $header = $this->getReader()->getHeader();
      return array_combine($header, $header);
210
    }
211 212 213
    $fields = [];
    foreach ($this->configuration['fields'] as $field) {
      $fields[$field['name']] = isset($field['label']) ? $field['label'] : $field['name'];
214 215 216 217
    }
    return $fields;
  }

218
  /**
219
   * Get the generator.
220
   *
221 222 223 224 225 226 227 228 229
   * @param \Iterator $records
   *   The CSV records.
   *
   * @codingStandardsIgnoreStart
   *
   * @return \Generator
   *   The records generator.
   *
   * @codingStandardsIgnoreEnd
230
   */
231 232 233 234
  protected function getGenerator(\Iterator $records) {
    foreach ($records as $record) {
      yield $record;
    }
235 236 237
  }

  /**
238 239 240 241 242 243 244
   * Get the CSV reader.
   *
   * @return \League\Csv\Reader
   *   The reader.
   *
   * @throws \Drupal\migrate\MigrateException
   * @throws \League\Csv\Exception
245
   */
246 247 248 249 250 251 252
  protected function getReader() {
    $reader = $this->createReader();
    $reader->setDelimiter($this->configuration['delimiter']);
    $reader->setEnclosure($this->configuration['enclosure']);
    $reader->setEscape($this->configuration['escape']);
    $reader->setHeaderOffset($this->configuration['header_offset']);
    return $reader;
253 254 255
  }

  /**
256 257 258 259
   * Construct a new CSV reader.
   *
   * @return \League\Csv\Reader
   *   The reader.
260
   */
261
  protected function createReader() {
262 263 264 265 266 267 268 269
    if (!file_exists($this->configuration['path'])) {
      throw new \RuntimeException(sprintf('File "%s" was not found.', $this->configuration['path']));
    }
    $csv = fopen($this->configuration['path'], 'r');
    if (!$csv) {
      throw new \RuntimeException(sprintf('File "%s" could not be opened.', $this->configuration['path']));
    }
    return Reader::createFromStream($csv);
270
  }
271

272
}