Commit 54420861 authored by heddn's avatar heddn Committed by heddn

Issue #2835928 by heddn, TrevorBradley: CSVFileObject::DROP_NEW_LINE mangles CSV multi-line cells

parent 7a49df28
......@@ -29,3 +29,6 @@ migrate_plus.source.csv:
escape:
type: string
label: 'The field escape character (one character only)'
file_flags:
type: integer
label: 'Bitmask flags for the SplFileObject'
......@@ -35,8 +35,6 @@ class CSVFileObject extends \SplFileObject {
// Necessary to use this approach because SplFileObject doesn't like NULL
// arguments passed to it.
call_user_func_array(['parent', '__construct'], func_get_args());
$this->setFlags(CSVFileObject::READ_CSV | CSVFileObject::READ_AHEAD | CSVFileObject::DROP_NEW_LINE | CSVFileObject::SKIP_EMPTY);
}
/**
......
......@@ -2,6 +2,8 @@
namespace Drupal\migrate_source_csv\Plugin\migrate\source;
use Drupal\Component\Plugin\ConfigurablePluginInterface;
use Drupal\Component\Utility\NestedArray;
use Drupal\migrate\MigrateException;
use Drupal\migrate\Plugin\migrate\source\SourcePluginBase;
use Drupal\migrate\Plugin\MigrationInterface;
......@@ -16,7 +18,7 @@ use Drupal\migrate\Plugin\MigrationInterface;
* id = "csv"
* )
*/
class CSV extends SourcePluginBase {
class CSV extends SourcePluginBase implements ConfigurablePluginInterface {
/**
* List of available source fields.
......@@ -54,18 +56,19 @@ class CSV extends SourcePluginBase {
*/
public function __construct(array $configuration, $plugin_id, $plugin_definition, MigrationInterface $migration) {
parent::__construct($configuration, $plugin_id, $plugin_definition, $migration);
$this->setConfiguration($configuration);
// Path is required.
if (empty($this->configuration['path'])) {
if (empty($this->getConfiguration()['path'])) {
throw new MigrateException('You must declare the "path" to the source CSV file in your source settings.');
}
// Key field(s) are required.
if (empty($this->configuration['keys'])) {
if (empty($this->getConfiguration()['keys'])) {
throw new MigrateException('You must declare "keys" as a unique array of fields in your source settings.');
}
$this->fileClass = empty($configuration['file_class']) ? 'Drupal\migrate_source_csv\CSVFileObject' : $configuration['file_class'];
$this->fileClass = $this->getConfiguration()['file_class'];
}
/**
......@@ -75,7 +78,7 @@ class CSV extends SourcePluginBase {
* The file path.
*/
public function __toString() {
return $this->configuration['path'];
return $this->getConfiguration()['path'];
}
/**
......@@ -83,18 +86,19 @@ class CSV extends SourcePluginBase {
*/
public function initializeIterator() {
// File handler using header-rows-respecting extension of SPLFileObject.
$this->file = new $this->fileClass($this->configuration['path']);
$this->file = new $this->fileClass($this->getConfiguration()['path']);
// Set basics of CSV behavior based on configuration.
$delimiter = !empty($this->configuration['delimiter']) ? $this->configuration['delimiter'] : ',';
$enclosure = !empty($this->configuration['enclosure']) ? $this->configuration['enclosure'] : '"';
$escape = !empty($this->configuration['escape']) ? $this->configuration['escape'] : '\\';
$delimiter = $this->getConfiguration()['delimiter'];
$enclosure = $this->getConfiguration()['enclosure'];
$escape = $this->getConfiguration()['escape'];
$this->file->setCsvControl($delimiter, $enclosure, $escape);
$this->file->setFlags($this->getConfiguration()['file_flags']);
// Figure out what CSV column(s) to use. Use either the header row(s) or
// explicitly provided column name(s).
if (!empty($this->configuration['header_row_count'])) {
$this->file->setHeaderRowCount($this->configuration['header_row_count']);
if ($this->getConfiguration()['header_row_count']) {
$this->file->setHeaderRowCount($this->getConfiguration()['header_row_count']);
// Find the last header line.
$this->file->rewind();
......@@ -108,8 +112,8 @@ class CSV extends SourcePluginBase {
$this->file->setColumnNames($column_names);
}
// An explicit list of column name(s) will override any header row(s).
if (!empty($this->configuration['column_names'])) {
$this->file->setColumnNames($this->configuration['column_names']);
if ($this->getConfiguration()['column_names']) {
$this->file->setColumnNames($this->getConfiguration()['column_names']);
}
return $this->file;
......@@ -120,7 +124,7 @@ class CSV extends SourcePluginBase {
*/
public function getIDs() {
$ids = [];
foreach ($this->configuration['keys'] as $delta => $value) {
foreach ($this->getConfiguration()['keys'] as $delta => $value) {
if (is_array($value)) {
$ids[$delta] = $value;
}
......@@ -142,11 +146,48 @@ class CSV extends SourcePluginBase {
// Any caller-specified fields with the same names as extracted fields will
// override them; any others will be added.
if (!empty($this->configuration['fields'])) {
$fields = $this->configuration['fields'] + $fields;
}
$fields = $this->getConfiguration()['fields'] + $fields;
return $fields;
}
/**
* {@inheritdoc}
*/
public function getConfiguration() {
return $this->configuration;
}
/**
* {@inheritdoc}
*/
public function setConfiguration(array $configuration) {
// We must preserve integer keys for column_name mapping.
$this->configuration = NestedArray::mergeDeepArray([$this->defaultConfiguration(), $configuration], TRUE);
}
/**
* {@inheritdoc}
*/
public function defaultConfiguration() {
return [
'fields' => [],
'keys' => [],
'column_names' => [],
'header_row_count' => 0,
'file_flags' => \SplFileObject::READ_CSV | \SplFileObject::READ_AHEAD | \SplFileObject::DROP_NEW_LINE | \SplFileObject::SKIP_EMPTY,
'delimiter' => ',',
'enclosure' => '"',
'escape' => '\\',
'path' => '',
'file_class' => 'Drupal\migrate_source_csv\CSVFileObject',
];
}
/**
* {@inheritdoc}
*/
public function calculateDependencies() {
return [];
}
}
......@@ -24,6 +24,7 @@ class CSVFileObjectTest extends CSVUnitBase {
protected function setUp() {
parent::setUp();
$this->csvFileObject = new CSVFileObject($this->happyPath);
$this->csvFileObject->setFlags(\SplFileObject::READ_CSV | \SplFileObject::READ_AHEAD | \SplFileObject::DROP_NEW_LINE | \SplFileObject::SKIP_EMPTY);
}
/**
......@@ -33,8 +34,6 @@ class CSVFileObjectTest extends CSVUnitBase {
*/
public function testCreate() {
$this->assertInstanceOf(CSVFileObject::class, $this->csvFileObject);
$flags = CSVFileObject::READ_CSV | CSVFileObject::READ_AHEAD | CSVFileObject::DROP_NEW_LINE | CSVFileObject::SKIP_EMPTY;
$this->assertSame($flags, $this->csvFileObject->getFlags());
}
/**
......
......@@ -25,6 +25,13 @@ abstract class CSVUnitBase extends UnitTestCase {
*/
protected $sad;
/**
* The multi line file url.
*
* @var string
*/
protected $multiLine;
/**
* {@inheritdoc}
*/
......@@ -67,6 +74,18 @@ EOD;
14|Kimberly|Jackson|kjacksond@example.com|Thailand|19.187.65.116
15|Jason|Mason|jmasone@example.com|Greece|225.129.68.203
EOD;
$multiLineContent = <<<'EOD'
id,title,description
1,"Title 1","Description 1 Line 1
Description 1 Line 2
Description 1 Line 3"
2,"Title 2","Description 2 Line 1
Description 2 Line 2
Description 2 Line 3"
3,"Title 3","Description 3 Line 1
Description 3 Line 2
Description 3 Line 3"
EOD;
$this->happyPath = vfsStream::newFile('data.csv')
......@@ -77,6 +96,9 @@ EOD;
->at($root_dir)
->withContent($sad)
->url();
$this->multiLine = vfsStream::newFile('multi-line.csv')
->at($root_dir)
->withContent($multiLineContent)
->url();
}
}
......@@ -312,6 +312,32 @@ class CSVUnitTest extends CSVUnitBase {
$this->assertInstanceOf(FooCSVFileObject::class, $fileObject);
}
/**
* Tests configurable CSV file object.
*
* @covers ::current
* @covers ::rewind
*/
public function testConfigurableCSVFileObjectFlags() {
$configuration = [
'path' => $this->multiLine,
'keys' => ['id'],
'header_row_count' => 1,
'file_flags' => \SplFileObject::READ_CSV | \SplFileObject::READ_AHEAD | \SplFileObject::SKIP_EMPTY,
];
$csv = new CSV($configuration, $this->pluginId, $this->pluginDefinition, $this->migration);
$csv_file_object = $csv->initializeIterator();
$row = [
'id' => '1',
'title' => 'Title 1',
'description' => "Description 1 Line 1\nDescription 1 Line 2\nDescription 1 Line 3",
];
$csv_file_object->rewind();
$current = $csv_file_object->current();
$this->assertArrayEquals($row, $current);
}
}
/**
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment