Commit 2036524e authored by heddn's avatar heddn Committed by heddn

Issue #3042160 by heddn, gaddamsr: Utilize leage/csv for CSV reader

parent f9f7417a
......@@ -19,7 +19,8 @@
"minimum-stability": "dev",
"prefer-stable": true,
"require": {
"drupal/core": "^8.1"
"drupal/core": "^8.1",
"league/csv": "^8.2|^9.1"
},
"require-dev": {
"mikey179/vfsStream": "^1",
......
<?php
namespace Drupal\migrate_source_csv;
/**
* Defines a CSV file object.
*
* @package Drupal\migrate_source_csv.
*
* Extends SPLFileObject to:
* - assume CSV format
* - skip header rows on rewind()
* - address columns by header row name instead of index.
*/
class CSVFileObject extends \SplFileObject {
/**
* The number of rows in the CSV file before the data starts.
*
* @var int
*/
protected $headerRowCount = 0;
/**
* The human-readable column headers, keyed by column index in the CSV.
*
* @var array
*/
protected $columnNames = [];
/**
* {@inheritdoc}
*/
public function __construct($file_name) {
// Necessary to use this approach because SplFileObject doesn't like NULL
// arguments passed to it.
call_user_func_array(['parent', '__construct'], func_get_args());
}
/**
* {@inheritdoc}
*/
public function rewind() {
$this->seek($this->getHeaderRowCount());
}
/**
* {@inheritdoc}
*/
public function current() {
$row = parent::current();
if ($row && !empty($this->columnNames)) {
// Only use columns specified in the defined CSV columns.
$row = array_intersect_key($row, $this->columnNames);
// Set meaningful keys for the columns mentioned in $this->csvColumns.
foreach ($this->columnNames as $key => $value) {
// Copy value to more descriptive key and unset original.
$value = key($value);
$row[$value] = isset($row[$key]) ? $row[$key] : NULL;
unset($row[$key]);
}
}
return $row;
}
/**
* Return a count of all available source records.
*/
public function count() {
return iterator_count($this);
}
/**
* Number of header rows.
*
* @return int
* Get the number of header rows, zero if no header row.
*/
public function getHeaderRowCount() {
return $this->headerRowCount;
}
/**
* Number of header rows.
*
* @param int $header_row_count
* Set the number of header rows, zero if no header row.
*/
public function setHeaderRowCount($header_row_count) {
$this->headerRowCount = $header_row_count;
}
/**
* CSV column names.
*
* @return array
* Get CSV column names.
*/
public function getColumnNames() {
return $this->columnNames;
}
/**
* CSV column names.
*
* @param array $column_names
* Set CSV column names.
*/
public function setColumnNames(array $column_names) {
$this->columnNames = $column_names;
}
}
......@@ -2,74 +2,164 @@
namespace Drupal\migrate_source_csv\Plugin\migrate\source;
use Drupal\Component\Plugin\ConfigurablePluginInterface;
use Drupal\Component\Plugin\Exception\InvalidPluginDefinitionException;
use Drupal\Component\Plugin\ConfigurableInterface;
use Drupal\Component\Utility\NestedArray;
use Drupal\migrate\MigrateException;
use Drupal\migrate\Plugin\migrate\source\SourcePluginBase;
use Drupal\migrate\Plugin\MigrationInterface;
use Drupal\migrate\Plugin\migrate\source\SourcePluginBase;
use League\Csv\Reader;
/**
* Source for CSV.
* Source for CSV files.
*
* Available configuration options:
* - path: Path to the CSV file. File streams are supported.
* - ids: Array of column names that uniquely identify each record.
* - header_offset: (optional) The record to be used as the CSV header and the
* thereby each record's field name. Defaults to 0 and because records are
* zero indexed. Can be set to null to indicate no header record.
* - fields: (optional) nested array of names and labels to use instead of a
* header record. Will overwrite values provided by header record. If used,
* name is required. If no label is provided, name is used instead for the
* field description.
* - delimiter: (optional) The field delimiter (one character only). Defaults to
* a comma (,).
* - enclosure: (optional) The field enclosure character (one character only).
* Defaults to double quote marks.
* - escape: (optional) The field escape character (one character only).
* Defaults to a backslash (\).
*
* @codingStandardsIgnoreStart
*
* Example with minimal options:
* @code
* source:
* plugin: csv
* path: /tmp/countries.csv
* ids: [id]
*
* # countries.csv
* id,country
* 1,Nicaragua
* 2,Spain
* 3,United States
* @endcode
*
* In this example above, the migration source will use a single-column id using the
* value from the 'id' column of the CSV file.
*
* If the CSV file contains non-ASCII characters, make sure it includes a
* UTF BOM (Byte Order Marker) so they are interpreted correctly.
* Example with most options configured:
* @code
* source:
* plugin: csv
* path: /tmp/countries.csv
* ids: [id]
* delimiter: '|'
* enclosure: "'"
* escape: '`'
* header_offset: null
* fields:
* -
* name: id
* label: ID
* -
* name: country
* label: Country
*
* # countries.csv
* 'really long string that makes this unique'|'United States'
* 'even longer really long string that makes this unique'|'Nicaragua'
* 'even more longer really long string that makes this unique'|'Spain'
* 'escaped data'|'one`'s country'
* @endcode
*
* In this example above, we override the default character controls for delimiter,
* enclosure and escape. We also set a null header offset to indicate no header.
*
* @codingStandardsIgnoreEnd
*
* @see http://php.net/manual/en/splfileobject.setcsvcontrol.php
*
* @MigrateSource(
* id = "csv"
* id = "csv",
* source_module = "migrate_source_csv"
* )
*/
class CSV extends SourcePluginBase implements ConfigurablePluginInterface {
class CSV extends SourcePluginBase implements ConfigurableInterface {
/**
* List of available source fields.
*
* Keys are the field machine names as used in field mappings, values are
* descriptions.
* {@inheritdoc}
*
* @var array
* @throws \InvalidArgumentException
* @throws \Drupal\migrate\MigrateException
*/
protected $fields = [];
public function __construct(array $configuration, $plugin_id, $plugin_definition, MigrationInterface $migration) {
parent::__construct($configuration, $plugin_id, $plugin_definition, $migration);
$this->setConfiguration($configuration);
/**
* List of key fields, as indexes.
*
* @var array
*/
protected $keys = [];
// Path is required.
if (empty($this->configuration['path'])) {
throw new \InvalidArgumentException('You must declare the "path" to the source CSV file in your source settings.');
}
// IDs are required.
if (empty($this->configuration['ids']) || !is_array($this->configuration['ids'])) {
throw new \InvalidArgumentException('You must declare "ids" as a unique array of fields in your source settings.');
}
// IDs must be an array of strings.
if ($this->configuration['ids'] !== array_unique(array_filter($this->configuration['ids'], 'is_string'))) {
throw new \InvalidArgumentException('The ids must a flat array with unique string values.');
}
// CSV character control characters must be exactly 1 character.
foreach (['delimiter', 'enclosure', 'escape'] as $character) {
if (1 !== strlen($this->configuration[$character])) {
throw new \InvalidArgumentException(sprintf('%s must be a single character; %s given', $character, $this->configuration[$character]));
}
}
// The configuration "header_offset" must be null or an integer.
if (!(NULL === $this->configuration['header_offset'] || is_int($this->configuration['header_offset']))) {
throw new \InvalidArgumentException('The configuration "header_offset" must be null or an integer.');
}
// The configuration "header_offset" must be greater or equal to 0.
if (NULL !== $this->configuration['header_offset'] && 0 > $this->configuration['header_offset']) {
throw new \InvalidArgumentException('The configuration "header_offset" must be greater or equal to 0.');
}
// If set, all fields must have a least a defined "name" property.
if ($this->configuration['fields']) {
foreach ($this->configuration['fields'] as $delta => $field) {
if (!isset($field['name'])) {
throw new \InvalidArgumentException(sprintf('The "name" configuration for "fields" in index position %s is not defined.', $delta));
}
}
}
}
/**
* The file class to read the file.
*
* @var string
* {@inheritdoc}
*/
protected $fileClass = '';
public function defaultConfiguration() {
return [
'path' => '',
'ids' => [],
'header_offset' => 0,
'fields' => [],
'delimiter' => ",",
'enclosure' => "\"",
'escape' => "\\",
];
}
/**
* The file object that reads the CSV file.
*
* @var \SplFileObject
* {@inheritdoc}
*/
protected $file = NULL;
public function getConfiguration() {
return $this->configuration;
}
/**
* {@inheritdoc}
*/
public function __construct(array $configuration, $plugin_id, $plugin_definition, MigrationInterface $migration) {
parent::__construct($configuration, $plugin_id, $plugin_definition, $migration);
$this->setConfiguration($configuration);
// Path is required.
if (empty($this->getConfiguration()['path'])) {
throw new MigrateException('You must declare the "path" to the source CSV file in your source settings.');
}
// Key field(s) are required.
if (empty($this->getConfiguration()['keys'])) {
throw new MigrateException('You must declare "keys" as a unique array of fields in your source settings.');
}
$this->fileClass = $this->getConfiguration()['file_class'];
public function setConfiguration(array $configuration) {
// We must preserve integer keys for column_name mapping.
$this->configuration = NestedArray::mergeDeepArray([$this->defaultConfiguration(), $configuration], TRUE);
}
/**
......@@ -79,57 +169,23 @@ class CSV extends SourcePluginBase implements ConfigurablePluginInterface {
* The file path.
*/
public function __toString() {
return $this->getConfiguration()['path'];
return $this->configuration['path'];
}
/**
* {@inheritdoc}
*/
public function initializeIterator() {
if (!file_exists($this->getConfiguration()['path'])) {
throw new InvalidPluginDefinitionException($this->getPluginId(), sprintf('File path (%s) does not exist.', $this->getConfiguration()['path']));
}
// File handler using header-rows-respecting extension of SPLFileObject.
$this->file = new $this->fileClass($this->getConfiguration()['path']);
return $this->setupFile();
}
/**
* Setup the file.
*
* @return \SplFileObject
* Returns the file object.
* @throws \Drupal\migrate\MigrateException
* @throws \League\Csv\Exception
*/
protected function setupFile() {
// Set basics of CSV behavior based on configuration.
$delimiter = $this->getConfiguration()['delimiter'];
$enclosure = $this->getConfiguration()['enclosure'];
$escape = $this->getConfiguration()['escape'];
$this->file->setCsvControl($delimiter, $enclosure, $escape);
$this->file->setFlags($this->getConfiguration()['file_flags']);
// Figure out what CSV column(s) to use. Use either the header row(s) or
// explicitly provided column name(s).
if ($this->getConfiguration()['header_row_count']) {
$this->file->setHeaderRowCount($this->getConfiguration()['header_row_count']);
// Find the last header line.
$this->file->rewind();
$this->file->seek($this->file->getHeaderRowCount() - 1);
$row = $this->file->current();
foreach ($row as $header) {
$header = trim($header);
$column_names[] = [$header => $header];
}
$this->file->setColumnNames($column_names);
}
// An explicit list of column name(s) will override any header row(s).
if ($this->getConfiguration()['column_names']) {
$this->file->setColumnNames($this->getConfiguration()['column_names']);
public function initializeIterator() {
$header = $this->getReader()->getHeader();
if ($this->configuration['fields']) {
// If there is no header record, we need to flip description and name so
// the name becomes the header record.
$header = array_flip($this->fields());
}
return $this->file;
return $this->getGenerator($this->getReader()->getRecords($header));
}
/**
......@@ -137,13 +193,8 @@ class CSV extends SourcePluginBase implements ConfigurablePluginInterface {
*/
public function getIds() {
$ids = [];
foreach ($this->getConfiguration()['keys'] as $delta => $value) {
if (is_array($value)) {
$ids[$delta] = $value;
}
else {
$ids[$value]['type'] = 'string';
}
foreach ($this->configuration['ids'] as $value) {
$ids[$value]['type'] = 'string';
}
return $ids;
}
......@@ -152,69 +203,63 @@ class CSV extends SourcePluginBase implements ConfigurablePluginInterface {
* {@inheritdoc}
*/
public function fields() {
$fields = [];
if (!$this->file) {
$this->initializeIterator();
// If fields are not defined, use the header record.
if (empty($this->configuration['fields'])) {
$header = $this->getReader()->getHeader();
return array_combine($header, $header);
}
foreach ($this->file->getColumnNames() as $column) {
$fields[key($column)] = reset($column);
$fields = [];
foreach ($this->configuration['fields'] as $field) {
$fields[$field['name']] = isset($field['label']) ? $field['label'] : $field['name'];
}
// Any caller-specified fields with the same names as extracted fields will
// override them; any others will be added.
$fields = $this->getConfiguration()['fields'] + $fields;
return $fields;
}
/**
* {@inheritdoc}
*/
public function getConfiguration() {
return $this->configuration;
}
/**
* Gets the file object.
* Get the generator.
*
* @return \SplFileObject
* The file object.
*/
public function getFile() {
return $this->file;
}
/**
* {@inheritdoc}
* @param \Iterator $records
* The CSV records.
*
* @codingStandardsIgnoreStart
*
* @return \Generator
* The records generator.
*
* @codingStandardsIgnoreEnd
*/
public function setConfiguration(array $configuration) {
// We must preserve integer keys for column_name mapping.
$this->configuration = NestedArray::mergeDeepArray([$this->defaultConfiguration(), $configuration], TRUE);
protected function getGenerator(\Iterator $records) {
foreach ($records as $record) {
yield $record;
}
}
/**
* {@inheritdoc}
* Get the CSV reader.
*
* @return \League\Csv\Reader
* The reader.
*
* @throws \Drupal\migrate\MigrateException
* @throws \League\Csv\Exception
*/
public function defaultConfiguration() {
return [
'fields' => [],
'keys' => [],
'column_names' => [],
'header_row_count' => 0,
'file_flags' => \SplFileObject::READ_CSV | \SplFileObject::READ_AHEAD | \SplFileObject::DROP_NEW_LINE | \SplFileObject::SKIP_EMPTY,
'delimiter' => ',',
'enclosure' => '"',
'escape' => '\\',
'path' => '',
'file_class' => 'Drupal\migrate_source_csv\CSVFileObject',
];
protected function getReader() {
$reader = $this->createReader();
$reader->setDelimiter($this->configuration['delimiter']);
$reader->setEnclosure($this->configuration['enclosure']);
$reader->setEscape($this->configuration['escape']);
$reader->setHeaderOffset($this->configuration['header_offset']);
return $reader;
}
/**
* {@inheritdoc}
* Construct a new CSV reader.
*
* @return \League\Csv\Reader
* The reader.
*/
public function calculateDependencies() {
return [];
protected function createReader() {
return Reader::createFromStream(fopen($this->configuration['path'], 'r+'));
}
}
type: module
name: Source plugin using yield
description: 'Provides a source plugin using yield'
package: Migration
type: module
package: Testing
core: 8.x
dependencies:
- drupal:migrate (>=8.1)
- migrate
<?php
namespace Drupal\source_plugin_yield_test\Plugin\migrate\source;
namespace Drupal\csv_source_yield_test\Plugin\migrate\source;
use Drupal\migrate_source_csv\Plugin\migrate\source\CSV;
use League\Csv\Reader;
/**
* Yields each image and sku.
......@@ -17,25 +18,24 @@ class YieldRows extends CSV {
* {@inheritdoc}
*/
public function initializeIterator() {
$file = parent::initializeIterator();
return $this->getYield($file);
return $this->getYield(parent::initializeIterator());
}
/**
* Prepare a test row using yield.
*
* @param \SplFileObject $file
* The source CSV file object.
* @param \League\Csv\Reader $reader
* The CSV reader.
*
* @codingStandardsIgnoreStart
*
* @return \Generator
* A new row with only the id value.
* A generator with only the id value.
*
* @codingStandardsIgnoreEnd
*/
public function getYield(\SplFileObject $file) {
foreach ($file as $row_num => $row) {
public function getYield(Reader $reader) {
foreach ($reader as $row_num => $row) {
$new_row = [];
$new_row['id'] = $row['id'];
yield($new_row);
......
id,first_name,last_name,email,country,ip_address,date_of_birth
1,Justin,Dean,jdean0@example.com,Indonesia,60.242.130.40,01/05/1955
2,Joan,Jordan,jjordan1@example.com,Thailand,137.230.209.171,10/14/1958
3,William,Ray,wray2@example.com,Germany,4.75.251.71,08/13/1962
4,Jack,Collins,jcollins3@example.com,Indonesia,118.241.243.64,08/16/1977
5,Jean,Moreno,jmoreno4@example.com,Portugal,12.24.215.20,10/18/1940
6,Dennis,Mitchell,dmitchell5@example.com,Mexico,185.24.131.116,08/21/1999
7,Harry,West,hwest6@example.com,Uzbekistan,101.74.110.171,10/05/1987
8,Rebecca,Hunt,rhunt7@example.com,France,253.107.6.23,04/21/1922
9,Rose,Rogers,rrogers8@example.com,China,21.2.126.228,08/14/2005
10,Juan,Walker,jwalker9@example.com,Angola,192.118.77.225,03/09/1958
11,Lois,Price,lpricea@example.com,Greece,231.185.100.19,04/08/1944
12,Patricia,Bell,pbellb@example.com,Sweden,226.2.254.94,02/26/1950
13,Gerald,Kelly,gkellyc@example.com,China,31.204.2.163,09/25/1948
14,Kimberly,Jackson,kjacksond@example.com,Thailand,19.187.65.116,06/21/1926
15,Jason,Mason,jmasone@example.com,Greece,225.129.68.203,10/01/1950
id,first_name,last_name,email,ip_address,date_of_birth
1,Justin,Dean,jdean0@example.com,60.242.130.40,01/05/1955
2,Joan,Jordan,jjordan1@example.com,137.230.209.171,10/14/1958
3,William,Ray,wray2@example.com,4.75.251.71,08/13/1962
4,Jack,Collins,jcollins3@example.com,118.241.243.64,08/16/1977
5,Jean,Moreno,jmoreno4@example.com,12.24.215.20,10/18/1940