Commit 62b3784b authored by heddn's avatar heddn

Initial commit after migrating from migrate_plus.

parents
type: module
name: Migrate Source CSV
description: 'CSV source migration.'
package: Migration
core: 8.x
dependencies:
- migrate
<?php
/**
* @file
* Contains \Drupal\migrate_source_csv\CSVFileObject.php.
*/
namespace Drupal\migrate_source_csv;
/**
* Defines a CSV file object.
*
* @package Drupal\migrate_source_csv.
*
* Extends SPLFileObject to:
* - assume CSV format
* - skip header rows on rewind()
* - address columns by header row name instead of index.
*/
class CSVFileObject extends \SplFileObject {
/**
* The number of rows in the CSV file before the data starts.
*
* @var integer
*/
protected $headerRowCount = 0;
/**
* The human-readable column headers, keyed by column index in the CSV.
*
* @var array
*/
protected $columnNames = array();
/**
* {@inheritdoc}
*/
public function __construct($file_name) {
// Necessary to use this approach because SplFileObject doesn't like NULL
// arguments passed to it.
call_user_func_array(array('parent', '__construct'), func_get_args());
$this->setFlags(CSVFileObject::READ_CSV | CSVFileObject::READ_AHEAD | CSVFileObject::DROP_NEW_LINE | CSVFileObject::SKIP_EMPTY);
}
/**
* {@inheritdoc}
*/
public function rewind() {
$this->seek($this->getHeaderRowCount());
}
/**
* {@inheritdoc}
*/
public function current() {
$row = parent::current();
if ($row && !empty($this->columnNames)) {
// Only use columns specified in the defined CSV columns.
$row = array_intersect_key($row, $this->columnNames);
// Set meaningful keys for the columns mentioned in $this->csvColumns.
foreach ($this->columnNames as $key => $value) {
// Copy value to more descriptive key and unset original.
$row[$value] = isset($row[$key]) ? $row[$key] : NULL;
unset($row[$key]);
}
}
return $row;
}
/**
* Return a count of all available source records.
*/
public function count() {
return iterator_count($this);
}
/**
* Number of header rows.
*
* @return int
* Get the number of header rows, zero if no header row.
*/
public function getHeaderRowCount() {
return $this->headerRowCount;
}
/**
* Number of header rows.
*
* @param int $header_row_count
* Set the number of header rows, zero if no header row.
*/
public function setHeaderRowCount($header_row_count) {
$this->headerRowCount = $header_row_count;
}
/**
* CSV column names.
*
* @return array
* Get CSV column names.
*/
public function getColumnNames() {
return $this->columnNames;
}
/**
* CSV column names.
*
* @param array $column_names
* Set CSV column names.
*/
public function setColumnNames(array $column_names) {
$this->columnNames = $column_names;
}
}
<?php
/**
* @file
* Contains \Drupal\migrate_source_csv\Plugin\migrate\source\CSV.
*/
namespace Drupal\migrate_source_csv\Plugin\migrate\source;
use Drupal\migrate\Entity\MigrationInterface;
use Drupal\migrate\MigrateException;
use Drupal\migrate\Plugin\migrate\source\SourcePluginBase;
use Drupal\migrate_source_csv\CSVFileObject;
/**
* Source for CSV.
*
* If the CSV file contains non-ASCII characters, make sure it includes a
* UTF BOM (Byte Order Marker) so they are interpreted correctly.
*
* @MigrateSource(
* id = "csv"
* )
*/
class CSV extends SourcePluginBase {
/**
* List of available source fields.
*
* Keys are the field machine names as used in field mappings, values are
* descriptions.
*
* @var array
*/
protected $fields = array();
/**
* List of key fields, as indexes.
*
* @var array
*/
protected $keys = array();
/**
* {@inheritdoc}
*/
public function __construct(array $configuration, $plugin_id, $plugin_definition, MigrationInterface $migration) {
parent::__construct($configuration, $plugin_id, $plugin_definition, $migration);
// Path is required.
if (empty($this->configuration['path'])) {
throw new MigrateException('You must declare the "path" to the source CSV file in your source settings.');
}
// Key field(s) are required.
if (empty($this->configuration['keys'])) {
throw new MigrateException('You must declare "keys" as a unique array of fields in your source settings.');
}
}
/**
* Return a string representing the source query.
*
* @return string
* The file path.
*/
public function __toString() {
return $this->configuration['path'];
}
/**
* {@inheritdoc}
*/
public function initializeIterator() {
// File handler using header-rows-respecting extension of SPLFileObject.
$file = new CSVFileObject($this->configuration['path']);
// Set basics of CSV behavior based on configuration.
$delimiter = !empty($this->configuration['delimiter']) ? $this->configuration['delimiter'] : ',';
$enclosure = !empty($this->configuration['enclosure']) ? $this->configuration['enclosure'] : '"';
$escape = !empty($this->configuration['escape']) ? $this->configuration['escape'] : '\\';
$file->setCsvControl($delimiter, $enclosure, $escape);
// Figure out what CSV column(s) to use. Use either the header row(s) or
// explicitly provided column name(s).
if (!empty($this->configuration['header_row_count'])) {
$file->setHeaderRowCount($this->configuration['header_row_count']);
// Find the last header line.
$file->rewind();
$file->seek($file->getHeaderRowCount() - 1);
$row = $file->current();
foreach ($row as $header) {
$header = trim($header);
$column_names[] = $header;
}
$file->setColumnNames($column_names);
}
// An explicit list of column name(s) will override any header row(s).
if (!empty($this->configuration['column_names'])) {
$file->setColumnNames($this->configuration['column_names']);
}
return $file;
}
/**
* {@inheritdoc}
*/
public function getIDs() {
$ids = array();
foreach ($this->configuration['keys'] as $key) {
$ids[$key]['type'] = 'string';
}
return $ids;
}
/**
* {@inheritdoc}
*/
public function fields() {
$fields = array();
foreach ($this->getIterator()->getColumnNames() as $column) {
$fields[$column] = $column;
}
// Any caller-specified fields with the same names as extracted fields will
// override them; any others will be added.
if (!empty($this->configuration['fields'])) {
$fields = $this->configuration['fields'] + $fields;
}
return $fields;
}
}
<?php
/**
* @file
* Code for CSVFileObjectTest.php.
*/
namespace Drupal\Tests\migrate_source_csv\Unit;
use Drupal\migrate_source_csv\CSVFileObject;
use Drupal\Tests\UnitTestCase;
use org\bovigo\vfs\vfsStream;
use org\bovigo\vfs\content\LargeFileContent;
/**
* @coversDefaultClass \Drupal\migrate_source_csv\CSVFileObject
*
* @group migrate_source_csv
*/
class CSVFileObjectTest extends UnitTestCase {
/**
* The class under test.
*
* @var \Drupal\migrate_source_csv\CSVFileObject
*/
protected $csvFileObject;
/**
* {@inheritdoc}
*/
public function setUp() {
$this->csvFileObject = new CSVFileObject(dirname(__FILE__) . '/artifacts/data.csv');
}
/**
* Happy path CSV data provider.
*
* @return array
* File path as a string. This will be used as a virtual file.
*/
public function providerCSVFile() {
$file_data = <<<'EOD'
id,first_name,last_name,email,country,ip_address
1,Justin,Dean,jdean0@example.com,Indonesia,60.242.130.40
2,Joan,Jordan,jjordan1@example.com,Thailand,137.230.209.171
3,William,Ray,wray2@example.com,Germany,4.75.251.71
4,Jack,Collins,jcollins3@example.com,Indonesia,118.241.243.64
5,Jean,Moreno,jmoreno4@example.com,Portugal,12.24.215.20
6,Dennis,Mitchell,dmitchell5@example.com,Mexico,185.24.131.116
7,Harry,West,hwest6@example.com,Uzbekistan,101.74.110.171
8,Rebecca,Hunt,rhunt7@example.com,France,253.107.6.23
9,Rose,Rogers,rrogers8@example.com,China,21.2.126.228
10,Juan,Walker,jwalker9@example.com,Angola,192.118.77.225
11,Lois,Price,lpricea@example.com,Greece,231.185.100.19
12,Patricia,Bell,pbellb@example.com,Sweden,226.2.254.94
13,Gerald,Kelly,gkellyc@example.com,China,31.204.2.163
14,Kimberly,Jackson,kjacksond@example.com,Thailand,19.187.65.116
15,Jason,Mason,jmasone@example.com,Greece,225.129.68.203
EOD;
;
return array(
array(vfsStream::newFile('data.csv')->at(vfsStream::setup('directory'))->withContent($file_data)->url()),
);
}
/**
* @test
* @covers ::__construct
* @dataProvider providerCSVFile
*/
function create($file_path) {
$csvFileObject = new CSVFileObject($file_path);
$this->assertInstanceOf('\Drupal\migrate_source_csv\CSVFileObject', $csvFileObject);
$flags = CSVFileObject::READ_CSV | CSVFileObject::READ_AHEAD | CSVFileObject::DROP_NEW_LINE | CSVFileObject::SKIP_EMPTY;
$this->assertEquals($flags, $csvFileObject->getFlags());
}
/**
* @test
* @covers ::getHeaderRowCount
* @covers ::setHeaderRowCount
* @dataProvider providerCSVFile
*/
public function headerRowCount($file_path) {
$csvFileObject = new CSVFileObject($file_path);
$expected = 1;
$csvFileObject->setHeaderRowCount($expected);
$actual = $csvFileObject->getHeaderRowCount();
$this->assertEquals($expected, $actual);
}
/**
* @test
* @covers ::count
* @dataProvider providerCSVFile
*/
public function countLines($file_path) {
$csvFileObject = new CSVFileObject($file_path);
$expected = 15;
$csvFileObject->setHeaderRowCount(1);
$actual = $csvFileObject->count();
$this->assertEquals($expected, $actual);
}
/**
* @test
* @covers ::count
* @large
*/
public function countLinesLargeFile() {
$time_file_creation = microtime(TRUE);
$file_uri = vfsStream::newFile('data.csv')
->at(vfsStream::setup('directory'))
->setContent(LargeFileContent::withMegabytes(15))
->url();
$time_create = microtime(true);
$csvFileObject = new CSVFileObject($file_uri);
$expected = 10000;
$time_header = microtime(true);
$csvFileObject->setHeaderRowCount(1);
$time_count = microtime(true);
$actual = $csvFileObject->count();
$time_end = microtime(true);
$execution_file_creation = $time_end - $time_file_creation;
$execution_time_create = $time_end - $time_create;
$execution_time_header = $time_end - $time_header;
$execution_time_count = $time_end - $time_count;
$this->assertEquals($expected, $actual);
$this->assertGreaterThan($expected, $actual);
}
/**
* @test
* @covers ::current
* @covers ::rewind
* @covers ::getColumnNames
* @covers ::setColumnNames
* @dataProvider providerCSVFile
*/
public function current($file_path) {
$csvFileObject = new CSVFileObject($file_path);
$columns = array(
'id',
'first_name',
'last_name',
'email',
'country',
'ip_address',
);
$row = array(
'1',
'Justin',
'Dean',
'jdean0@example.com',
'Indonesia',
'60.242.130.40',
);
$csvFileObject->rewind();
$current = $csvFileObject->current();
$this->assertArrayEquals($columns, $current);
$csvFileObject->setHeaderRowCount(1);
$csvFileObject->rewind();
$current = $csvFileObject->current();
$this->assertArrayEquals($row, $current);
$csvFileObject->setColumnNames($columns);
$csvFileObject->rewind();
$current = $csvFileObject->current();
$this->assertArrayEquals($columns, array_keys($current));
$this->assertArrayEquals($row, array_values($current));
$this->assertArrayEquals($columns, $csvFileObject->getColumnNames());
}
}
<?php
/**
* @file
* Code for CSVTest.php.
*/
namespace Drupal\Tests\migrate_source_csv\Unit\Plugin\migrate\source;
use \Drupal\Tests\UnitTestCase;
use \Drupal\migrate_source_csv\Plugin\migrate\source\CSV;
/**
* @coversDefaultClass \Drupal\migrate_source_csv\Plugin\migrate\source\CSV
*
* @group migrate_source_csv
*/
class CSVTest extends UnitTestCase {
/**
* The class under test.
*
* @var \Drupal\migrate_source_csv\Plugin\migrate\source\CSV
*/
protected $csv;
/**
* The configuration.
*
* @var array
*/
protected $configuration;
/**
* The plugin id.
*
* @var string
*/
protected $plugin_id;
/**
* The plugin definition.
*
* @var array
*/
protected $plugin_definition;
/**
* The mock migration plugin.
*
* @var \Drupal\migrate\Entity\MigrationInterface
*/
protected $plugin;
/**
* {@inheritdoc}
*/
public function setUp() {
$this->configuration = array(
'path' => dirname(__FILE__) . '/../../../artifacts/data.csv',
'keys' => array('id'),
'header_row_count' => 1,
);
$this->plugin_id = 'test csv migration';
$this->plugin_definition = array();
$this->plugin = $this->getMock('\Drupal\migrate\Entity\MigrationInterface');
$this->csv = new CSV($this->configuration, $this->plugin_id, $this->plugin_definition, $this->plugin);
}
/**
* @covers ::__construct
*/
function testCreate() {
$this->assertInstanceOf('\Drupal\migrate_source_csv\Plugin\migrate\source\CSV', $this->csv);
}
/**
* @expectedException \Drupal\migrate\MigrateException
* @expectedExceptionMessage You must declare the "path" to the source CSV file in your source settings.
*/
public function testMigrateExceptionPathMissing() {
new CSV(array(), $this->plugin_id, $this->plugin_definition, $this->plugin);
}
/**
* @expectedException \Drupal\migrate\MigrateException
* @expectedExceptionMessage You must declare "keys" as a unique array of fields in your source settings.
*/
public function testMigrateExceptionKeysMissing() {
new CSV(array('path' => 'foo'), $this->plugin_id, $this->plugin_definition, $this->plugin);
}
/**
* @covers ::__toString
*/
function testToString() {
$this->assertEquals($this->configuration['path'], (string) $this->csv);
}
/**
* @covers ::initializeIterator
*/
function testInitializeIterator() {
$config_common = array(
'path' => dirname(__FILE__) . '/../../../artifacts/data_edge_cases.csv',
'keys' => array('id'),
);
$config_delimiter = array('delimiter' => '|');
$config_enclosure = array('enclosure' => '%');
$config_escape = array('escape' => '`');
$csv = new CSV($config_common + $config_delimiter, $this->plugin_id, $this->plugin_definition, $this->plugin);
$this->assertEquals(current($config_delimiter), $csv->initializeIterator()
->getCsvControl()[0]);
$this->assertEquals('"', $csv->initializeIterator()->getCsvControl()[1]);
$csv = new CSV($config_common + $config_enclosure, $this->plugin_id, $this->plugin_definition, $this->plugin);
$this->assertEquals(',', $csv->initializeIterator()->getCsvControl()[0]);
$this->assertEquals(current($config_enclosure), $csv->initializeIterator()
->getCsvControl()[1]);
$csv = new CSV($config_common + $config_delimiter + $config_enclosure + $config_escape, $this->plugin_id, $this->plugin_definition, $this->plugin);
$csvFileObject = $csv->getIterator();
$row = array(
'1',
'Justin',
'Dean',
'jdean0@prlog.org',
'Indonesia',
'60.242.130.40',
);
$csvFileObject->rewind();
$current = $csvFileObject->current();
$this->assertArrayEquals($row, $current);
$csvFileObject = $this->csv->getIterator();
$row = array(
'id' => '1',
'first_name' => 'Justin',
'last_name' => 'Dean',
'email' => 'jdean0@prlog.org',
'country' => 'Indonesia',
'ip_address' => '60.242.130.40',
);
$second_row = array(
'id' => '2',
'first_name' => 'Joan',
'last_name' => 'Jordan',
'email' => 'jjordan1@tamu.edu',
'country' => 'Thailand',
'ip_address' => '137.230.209.171',
);
$csvFileObject->rewind();
$current = $csvFileObject->current();
$this->assertArrayEquals($row, $current);
$csvFileObject->next();
$next = $csvFileObject->current();
$this->assertArrayEquals($second_row, $next);
$column_names = array(
'column_names' => array(
'id',
'first_name',
),
);
$csv = new CSV($this->configuration + $column_names, $this->plugin_id, $this->plugin_definition, $this->plugin);
$csvFileObject = $csv->getIterator();
$row = array(
'id' => '1',
'first_name' => 'Justin',
);