CSV.php 9.27 KB
Newer Older
1
2
3
4
<?php

namespace Drupal\migrate_source_csv\Plugin\migrate\source;

5
use Drupal\Component\Plugin\ConfigurableInterface;
6
use Drupal\Component\Utility\NestedArray;
7
use Drupal\migrate\Plugin\MigrationInterface;
8
9
use Drupal\migrate\Plugin\migrate\source\SourcePluginBase;
use League\Csv\Reader;
10
11

/**
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
 * Source for CSV files.
 *
 * Available configuration options:
 * - path: Path to the  CSV file. File streams are supported.
 * - ids: Array of column names that uniquely identify each record.
 * - header_offset: (optional) The record to be used as the CSV header and the
 *   thereby each record's field name. Defaults to 0 and because records are
 *   zero indexed. Can be set to null to indicate no header record.
 * - fields: (optional) nested array of names and labels to use instead of a
 *   header record. Will overwrite values provided by header record. If used,
 *   name is required. If no label is provided, name is used instead for the
 *   field description.
 * - delimiter: (optional) The field delimiter (one character only). Defaults to
 *   a comma (,).
 * - enclosure: (optional) The field enclosure character (one character only).
 *   Defaults to double quote marks.
 * - escape: (optional) The field escape character (one character only).
 *   Defaults to a backslash (\).
30
31
32
 * - create_record_number: (optional) Boolean value specifying whether to create
 *   an incremented value for each record in the file. Defaults to FALSE.
 * - record_number_field: (optional) The name of a field that holds an
33
 *   incremented value for each record in the file. Defaults to record_number.
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
 *
 * @codingStandardsIgnoreStart
 *
 * Example with minimal options:
 * @code
 * source:
 *   plugin: csv
 *   path: /tmp/countries.csv
 *   ids: [id]
 *
 * # countries.csv
 * id,country
 * 1,Nicaragua
 * 2,Spain
 * 3,United States
 * @endcode
 *
 * In this example above, the migration source will use a single-column id using the
 * value from the 'id' column of the CSV file.
53
 *
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
 * Example with most options configured:
 * @code
 * source:
 *   plugin: csv
 *   path: /tmp/countries.csv
 *   ids: [id]
 *   delimiter: '|'
 *   enclosure: "'"
 *   escape: '`'
 *   header_offset: null
 *   fields:
 *     -
 *       name: id
 *       label: ID
 *     -
 *       name: country
 *       label: Country
 *
 * # countries.csv
 * 'really long string that makes this unique'|'United States'
 * 'even longer really long string that makes this unique'|'Nicaragua'
 * 'even more longer really long string that makes this unique'|'Spain'
 * 'escaped data'|'one`'s country'
 * @endcode
 *
 * In this example above, we override the default character controls for delimiter,
 * enclosure and escape. We also set a null header offset to indicate no header.
 *
 * @codingStandardsIgnoreEnd
 *
 * @see http://php.net/manual/en/splfileobject.setcsvcontrol.php
85
86
 *
 * @MigrateSource(
87
88
 *   id = "csv",
 *   source_module = "migrate_source_csv"
89
90
 * )
 */
91
class CSV extends SourcePluginBase implements ConfigurableInterface {
92
93

  /**
94
   * {@inheritdoc}
95
   *
96
97
   * @throws \InvalidArgumentException
   * @throws \Drupal\migrate\MigrateException
98
   */
99
100
101
  public function __construct(array $configuration, $plugin_id, $plugin_definition, MigrationInterface $migration) {
    parent::__construct($configuration, $plugin_id, $plugin_definition, $migration);
    $this->setConfiguration($configuration);
102

103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
    // Path is required.
    if (empty($this->configuration['path'])) {
      throw new \InvalidArgumentException('You must declare the "path" to the source CSV file in your source settings.');
    }
    // IDs are required.
    if (empty($this->configuration['ids']) || !is_array($this->configuration['ids'])) {
      throw new \InvalidArgumentException('You must declare "ids" as a unique array of fields in your source settings.');
    }
    // IDs must be an array of strings.
    if ($this->configuration['ids'] !== array_unique(array_filter($this->configuration['ids'], 'is_string'))) {
      throw new \InvalidArgumentException('The ids must a flat array with unique string values.');
    }
    // CSV character control characters must be exactly 1 character.
    foreach (['delimiter', 'enclosure', 'escape'] as $character) {
      if (1 !== strlen($this->configuration[$character])) {
        throw new \InvalidArgumentException(sprintf('%s must be a single character; %s given', $character, $this->configuration[$character]));
      }
    }
    // The configuration "header_offset" must be null or an integer.
    if (!(NULL === $this->configuration['header_offset'] || is_int($this->configuration['header_offset']))) {
      throw new \InvalidArgumentException('The configuration "header_offset" must be null or an integer.');
    }
    // The configuration "header_offset" must be greater or equal to 0.
    if (NULL !== $this->configuration['header_offset'] && 0 > $this->configuration['header_offset']) {
      throw new \InvalidArgumentException('The configuration "header_offset" must be greater or equal to 0.');
    }
    // If set, all fields must have a least a defined "name" property.
    if ($this->configuration['fields']) {
      foreach ($this->configuration['fields'] as $delta => $field) {
        if (!isset($field['name'])) {
          throw new \InvalidArgumentException(sprintf('The "name" configuration for "fields" in index position %s is not defined.', $delta));
        }
      }
    }
137
138
139
140
141
    // If "create_record_number" is specified, "record_number_field" must be a
    // non-empty string.
    if ($this->configuration['create_record_number'] && (!is_scalar($this->configuration['record_number_field']) || (empty($this->configuration['record_number_field'])))) {
      throw new \InvalidArgumentException('The configuration "record_number_field" must be a non-empty string.');
    }
142
  }
143

144
  /**
145
   * {@inheritdoc}
146
   */
147
148
149
150
151
152
153
154
155
  public function defaultConfiguration() {
    return [
      'path' => '',
      'ids' => [],
      'header_offset' => 0,
      'fields' => [],
      'delimiter' => ",",
      'enclosure' => "\"",
      'escape' => "\\",
156
157
      'create_record_number' => FALSE,
      'record_number_field' => 'record_number',
158
159
    ];
  }
160
161

  /**
162
   * {@inheritdoc}
163
   */
164
165
166
  public function getConfiguration() {
    return $this->configuration;
  }
167

168
169
170
  /**
   * {@inheritdoc}
   */
171
172
173
  public function setConfiguration(array $configuration) {
    // We must preserve integer keys for column_name mapping.
    $this->configuration = NestedArray::mergeDeepArray([$this->defaultConfiguration(), $configuration], TRUE);
174
175
176
  }

  /**
177
   * Return a string representing the source file path.
178
179
180
181
182
   *
   * @return string
   *   The file path.
   */
  public function __toString() {
183
    return $this->configuration['path'];
184
185
186
187
  }

  /**
   * {@inheritdoc}
188
   *
189
190
   * @throws \Drupal\migrate\MigrateException
   * @throws \League\Csv\Exception
191
   */
192
193
194
195
196
197
  public function initializeIterator() {
    $header = $this->getReader()->getHeader();
    if ($this->configuration['fields']) {
      // If there is no header record, we need to flip description and name so
      // the name becomes the header record.
      $header = array_flip($this->fields());
198
    }
199
    return $this->getGenerator($this->getReader()->getRecords($header));
200
201
202
203
204
  }

  /**
   * {@inheritdoc}
   */
205
  public function getIds() {
206
    $ids = [];
207
208
    foreach ($this->configuration['ids'] as $value) {
      $ids[$value]['type'] = 'string';
209
210
211
212
213
214
215
216
    }
    return $ids;
  }

  /**
   * {@inheritdoc}
   */
  public function fields() {
217
218
219
220
    // If fields are not defined, use the header record.
    if (empty($this->configuration['fields'])) {
      $header = $this->getReader()->getHeader();
      return array_combine($header, $header);
221
    }
222
223
224
    $fields = [];
    foreach ($this->configuration['fields'] as $field) {
      $fields[$field['name']] = isset($field['label']) ? $field['label'] : $field['name'];
225
226
227
228
    }
    return $fields;
  }

229
  /**
230
   * Get the generator.
231
   *
232
233
234
235
236
237
238
239
240
   * @param \Iterator $records
   *   The CSV records.
   *
   * @codingStandardsIgnoreStart
   *
   * @return \Generator
   *   The records generator.
   *
   * @codingStandardsIgnoreEnd
241
   */
242
  protected function getGenerator(\Iterator $records) {
243
    $record_num = $this->configuration['header_offset'] ?? 0;
244
    foreach ($records as $record) {
245
246
247
      if ($this->configuration['create_record_number']) {
        $record[$this->configuration['record_number_field']] = ++$record_num;
      }
248
249
      yield $record;
    }
250
251
252
  }

  /**
253
254
255
256
257
258
259
   * Get the CSV reader.
   *
   * @return \League\Csv\Reader
   *   The reader.
   *
   * @throws \Drupal\migrate\MigrateException
   * @throws \League\Csv\Exception
260
   */
261
262
263
264
265
266
267
  protected function getReader() {
    $reader = $this->createReader();
    $reader->setDelimiter($this->configuration['delimiter']);
    $reader->setEnclosure($this->configuration['enclosure']);
    $reader->setEscape($this->configuration['escape']);
    $reader->setHeaderOffset($this->configuration['header_offset']);
    return $reader;
268
269
270
  }

  /**
271
272
273
274
   * Construct a new CSV reader.
   *
   * @return \League\Csv\Reader
   *   The reader.
275
   */
276
  protected function createReader() {
277
278
279
280
281
282
283
284
    if (!file_exists($this->configuration['path'])) {
      throw new \RuntimeException(sprintf('File "%s" was not found.', $this->configuration['path']));
    }
    $csv = fopen($this->configuration['path'], 'r');
    if (!$csv) {
      throw new \RuntimeException(sprintf('File "%s" could not be opened.', $this->configuration['path']));
    }
    return Reader::createFromStream($csv);
285
  }
286

287
}