Commit 48334f0a authored by claudiu.cristea's avatar claudiu.cristea

Issue #2818509 by claudiu.cristea: Exclude columns with empty header

parent e9d3ab45
......@@ -31,6 +31,7 @@ source:
# Columns to be returned, basically a list of table header cell values.
columns:
- ID
- Revision
- 'First name'
- 'Sure name'
- Gender
......@@ -39,10 +40,15 @@ source:
# 'Row index' can be used later in `keys:` list to make this column a primary
# key column.
row_index_column: 'Row index'
# This points to the column or columns that provides the primary key. If is
# missed, the current row position will be returned as primary key.
# This is a list of source columns that are composing the primary key. The
# list is keyed by column name and has the field schema as value. If no keys
# are defined, the current row position will be returned as primary key, but
# in this case, 'row_index_column' should have a value.
keys:
- ID
ID:
type: integer
Revision:
type: string
destination:
...
```
......
......@@ -24,5 +24,9 @@ migrate.source.spreadsheet:
type: sequence
label: 'Column name(s) which represent the key uniquely identifying each record'
sequence:
type: string
label: 'Key field'
type: mapping
label: 'Schema'
mapping:
type:
type: string
label: 'Field type'
......@@ -91,11 +91,7 @@ class Spreadsheet extends SourcePluginBase implements ConfigurablePluginInterfac
return [$config['row_index_column'] => ['type' => 'integer']];
}
return array_map(function () {
return ['type' => 'string'];
},
array_flip($config['keys'])
);
return $config['keys'];
}
/**
......@@ -110,16 +106,13 @@ class Spreadsheet extends SourcePluginBase implements ConfigurablePluginInterfac
* {@inheritdoc}
*/
public function initializeIterator() {
$config = $this->getConfiguration();
/** @var \Drupal\migrate_spreadsheet\SpreadsheetIteratorInterface $iterator */
$iterator = \Drupal::service('migrate_spreadsheet.iterator');
$iterator
->setWorksheet($this->loadWorksheet())
->setColumns($config['columns'])
->setKeys($config['keys'])
->setHeaderRow($config['header_row'])
->setRowIndexColumn($config['row_index_column']);
return $iterator;
$configuration = $this->getConfiguration();
$configuration['worksheet'] = $this->loadWorksheet();
$configuration['keys'] = array_keys($configuration['keys']);
// The 'file' and 'plugin' items are not part of iterator configuration.
unset($configuration['file'], $configuration['plugin']);
return \Drupal::service('migrate_spreadsheet.iterator')
->setConfiguration($configuration);
}
/**
......
......@@ -11,18 +11,11 @@ use PhpOffice\PhpSpreadsheet\Worksheet;
class SpreadsheetIterator implements SpreadsheetIteratorInterface {
/**
* The worksheet object.
* The iterator configuration.
*
* @var \PhpOffice\PhpSpreadsheet\Worksheet
*/
protected $worksheet;
/**
* The first row from where the table starts. It's a 'zero based' value.
*
* @var int
* @var array
*/
protected $headerRow = 0;
protected $configuration = [];
/**
* Columns list keyed by header cell and having column index as value.
......@@ -34,16 +27,9 @@ class SpreadsheetIterator implements SpreadsheetIteratorInterface {
/**
* Primary keys list keyed by header cell and having column index as value.
*
* @var string[]|null
*/
protected $keys = NULL;
/**
* The name to be used for row index/position/delta 'zero based' value.
*
* @var string|null
* @var string[]
*/
protected $rowIndexColumn = NULL;
protected $keys = [];
/**
* All headers keyed by cell value and having column index as value.
......@@ -77,7 +63,7 @@ class SpreadsheetIterator implements SpreadsheetIteratorInterface {
* {@inheritdoc}
*/
public function key() {
if (($keys = $this->getKeys()) === NULL) {
if (empty($keys = $this->getKeys())) {
// If no keys were passed, use the spreadsheet current row position.
if (!$this->getRowIndexColumn()) {
throw new \RuntimeException("Row index should act as key but no name has been provided. Use SpreadsheetIterator::setRowIndexColumn() to provide a name for this column.");
......@@ -89,7 +75,7 @@ class SpreadsheetIterator implements SpreadsheetIteratorInterface {
function ($column_delta) {
return $this->getWorksheet()->getCellByColumnAndRow($column_delta, $this->getAbsoluteRowIndex(), FALSE)->getValue();
},
$this->getKeys()
$keys
));
}
......@@ -111,12 +97,12 @@ class SpreadsheetIterator implements SpreadsheetIteratorInterface {
* {@inheritdoc}
*/
public function current() {
if (($keys = $this->getKeys()) === NULL) {
$row_delta_field = $this->getRowIndexColumn();
if (!$row_delta_field) {
throw new \RuntimeException("Row index should act as key but no name has been provided. Use SpreadsheetIterator::setRowIndexColumn() to provide a name for this column.");
if (empty($keys = $this->getKeys())) {
$row_index_column = $this->getRowIndexColumn();
if (!$row_index_column) {
throw new \InvalidArgumentException("Row index should act as key but no name has been provided. Pass a string in \$config['row_index_column'] key when setting the configuration in SpreadsheetIterator::setConfiguration(\$config), to provide a name for this column.");
}
$keys = [$row_delta_field => -1];
$keys = [$row_index_column => -1];
}
$all_columns = $keys + $this->getColumns();
......@@ -128,12 +114,6 @@ class SpreadsheetIterator implements SpreadsheetIteratorInterface {
if ($column_delta === -1) {
return $this->currentRow;
}
if (!$this->getWorksheet()->getCellByColumnAndRow($column_delta, $this->getAbsoluteRowIndex(), FALSE)) {
print_r($this->key());
print "\n";
print_r($this->getAbsoluteRowIndex());
}
return $this->getWorksheet()->getCellByColumnAndRow($column_delta, $this->getAbsoluteRowIndex(), FALSE)->getValue();
},
$all_columns
......@@ -150,106 +130,86 @@ class SpreadsheetIterator implements SpreadsheetIteratorInterface {
/**
* {@inheritdoc}
*/
public function setWorksheet(Worksheet $worksheet) {
// Unset the computed values.
unset($this->rowsCount, $this->columnsCount, $this->headers);
$this->worksheet = $worksheet;
public function setConfiguration(array $configuration) {
$this->configuration = $configuration;
// Unset cached values.
unset($this->columns, $this->keys, $this->rowsCount, $this->columnsCount, $this->headers);
return $this;
}
/**
* {@inheritdoc}
*/
public function getWorksheet() {
if (!isset($this->worksheet) || !$this->worksheet instanceof Worksheet) {
throw new \Exception('No worksheet has been set.');
}
return $this->worksheet;
public function getConfiguration() {
return $this->configuration;
}
/**
* {@inheritdoc}
*/
public function setColumns(array $columns) {
$headers = $this->getHeaders();
// If no columns were passed, all columns will be used.
if (empty($columns)) {
$this->columns = $headers;
}
else {
$this->columns = [];
foreach ($columns as $column) {
if (!isset($headers[$column])) {
throw new \RuntimeException("Column '$column' doesn't exist in the table header.");
}
$this->columns[$column] = $headers[$column];
}
public function getWorksheet() {
if (empty($this->configuration['worksheet']) || !$this->configuration['worksheet'] instanceof Worksheet) {
throw new \InvalidArgumentException("No valid 'worksheet' configuration.");
}
return $this;
return $this->configuration['worksheet'];
}
/**
* {@inheritdoc}
*/
public function getColumns() {
return $this->columns;
}
/**
* {@inheritdoc}
*/
public function setKeys(array $keys) {
if (empty($keys)) {
$this->keys = NULL;
}
else {
if (!isset($this->columns)) {
$headers = $this->getHeaders();
$this->keys = [];
foreach ($keys as $key) {
if (!isset($headers[$key])) {
throw new \RuntimeException("Key '$key' doesn't exist in the table header.");
if (empty($this->configuration['columns'])) {
// If no columns were passed, all columns will be used.
$this->columns = $headers;
}
else {
$this->columns = [];
foreach ($this->configuration['columns'] as $column) {
$column = trim($column);
if (!isset($headers[$column])) {
throw new \InvalidArgumentException("Column '$column' doesn't exist in the table header.");
}
$this->columns[$column] = $headers[$column];
}
$this->keys[$key] = $headers[$key];
}
}
return $this;
return $this->columns;
}
/**
* {@inheritdoc}
*/
public function getKeys() {
if (!isset($this->keys)) {
$this->keys = [];
if (!empty($this->configuration['keys'])) {
$headers = $this->getHeaders();
$this->keys = [];
foreach ($this->configuration['keys'] as $key) {
if (!isset($headers[$key])) {
throw new \InvalidArgumentException("Key '$key' doesn't exist in the table header.");
}
$this->keys[$key] = $headers[$key];
}
}
}
return $this->keys;
}
/**
* {@inheritdoc}
*/
public function setHeaderRow($header_row) {
$this->headerRow = $header_row;
return $this;
}
/**
* {@inheritdoc}
*/
public function getHeaderRow() {
return $this->headerRow;
}
/**
* {@inheritdoc}
*/
public function setRowIndexColumn($row_index_column) {
$this->rowIndexColumn = $row_index_column;
return empty($this->configuration['header_row']) ? 0 : $this->configuration['header_row'];
}
/**
* {@inheritdoc}
*/
public function getRowIndexColumn() {
return $this->rowIndexColumn;
return empty($this->configuration['row_index_column']) ? NULL : $this->configuration['row_index_column'];
}
/**
......@@ -258,11 +218,13 @@ class SpreadsheetIterator implements SpreadsheetIteratorInterface {
public function getHeaders() {
if (!isset($this->headers)) {
for ($col = 0; $col < $this->getColumnsCount(); ++$col) {
$value = $this->getWorksheet()->getCellByColumnAndRow($col, $this->getHeaderRow() + 1)->getValue();
$value = trim($this->getWorksheet()->getCellByColumnAndRow($col, $this->getHeaderRow() + 1)->getValue());
if (isset($this->headers[$value])) {
throw new \RuntimeException("Table header '{$value}' is duplicated.");
}
$this->headers[$value] = $col;
if ($value) {
$this->headers[$value] = $col;
}
}
}
return $this->headers;
......@@ -294,7 +256,13 @@ class SpreadsheetIterator implements SpreadsheetIteratorInterface {
* @return int
*/
protected function getAbsoluteRowIndex() {
return $this->headerRow + $this->currentRow + 2;
return
$this->getHeaderRow() +
$this->currentRow +
// Add 1 because PhpSpreadsheet expects a '1 based' row.
1 +
// Add 1 because the first data row starts immediately after header row.
1;
}
}
......@@ -2,44 +2,53 @@
namespace Drupal\migrate_spreadsheet;
use PhpOffice\PhpSpreadsheet\Worksheet;
/**
* Provides an interface for spreadsheet iterators.
*/
interface SpreadsheetIteratorInterface extends \Iterator{
/**
* Sets the worksheet object.
*
* @param \PhpOffice\PhpSpreadsheet\Worksheet $worksheet
* The spreadsheet worksheet.
* Sets the iterator configuration.
*
* The caller should assure sane values.
*
* @param array $configuration
* An associative array with the next keys:
* - worksheet (\PhpOffice\PhpSpreadsheet\Worksheet): The worksheet object.
* - columns (string[]): An indexed array of columns.
* - keys (string[]): A list of columns that are giving the primary key.
* - header_row (int): The index of the first row from where the table
* starts. It's a 'zero based' value that points to the row that contains
* the table header. If the table row is the first this should be 0. A
* value of 3 would mean that the table header is on the fourth row.
* - row_index_column (string): The row index column name. The 'row index
* column' is a pseudo-column, that not exist on the worksheet, containing
* the 'zero based' current index/position/delta of each row. The caller
* can pass a name to be used for that column. If a name was passed, that
* column will be also outputted along with the row, in ::current(). The
* name can be passed also in 'keys' list. In that case the row index will
* be or will be part of the primary key.
*
* @return $this
*/
public function setWorksheet(Worksheet $worksheet);
public function setConfiguration(array $configuration);
/**
* Gets the worksheet.
* Gets the iterator configuration.
*
* @return \PhpOffice\PhpSpreadsheet\Worksheet
*
* @see \Drupal\migrate_spreadsheet\SpreadsheetIteratorInterface::setWorksheet()
* @return array
*/
public function getWorksheet();
public function getConfiguration();
/**
* Sets the list of relevant columns to be returned.
*
* @param string[] $columns
* An indexed array of columns.
* Gets the worksheet.
*
* @return $this
* @return \PhpOffice\PhpSpreadsheet\Worksheet
*
* @throws \RuntimeException
* If a columns does not exist in the header.
* @throws \InvalidArgumentException
* If an empty or invalid 'worksheet' has been passed.
*/
public function setColumns(array $columns);
public function getWorksheet();
/**
* Gets the list of columns.
......@@ -47,84 +56,35 @@ interface SpreadsheetIteratorInterface extends \Iterator{
* @return string[]
* The list of columns.
*
* @throws \InvalidArgumentException
* If a column passed in 'columns' does not exist in the header.
*
* @see \Drupal\migrate_spreadsheet\SpreadsheetIteratorInterface::setColumns()
*/
public function getColumns();
/**
* Sets the list of columns that arge giving the primary key.
*
* In nothing is passed, the iterator will return the index of the current row
* relative to the table header.
*
* @param string[] $keys
* A list of columns that are defining the primary index.
*
* @return $this
*
* @throws \RuntimeException
* If a key does not exist in the header.
*/
public function setKeys(array $keys);
/**
* Gets the list of columns that arge giving the primary key.
* Gets the list of columns that are composing the primary key.
*
* @return string[]
* A list of column names.
*
* @see \Drupal\migrate_spreadsheet\SpreadsheetIteratorInterface::setKeys()
* @throws \InvalidArgumentException
* If a key passed in 'keys' does not exist in the header.
*/
public function getKeys();
/**
* Sets the index of the first row from where the table starts.
*
* It's a 'zero based' value that points to the row that contains
* the table header. If the table row is the first this should be 0. A value
* of 3 would mean that the table header is on the fourth row.
*
* @param int $header_row
* The header row index.
*
* @return $this
*/
public function setHeaderRow($header_row);
/**
* Gets the header row index.
*
* @return int
*
* @see \Drupal\migrate_spreadsheet\SpreadsheetIteratorInterface::setHeaderRow()
*/
public function getHeaderRow();
/**
* Sets the row index column name.
*
* The 'row index column' is a pseudo-column, that not exist on the worksheet,
* containing the 'zero based' current index/position/delta of each row. The
* caller can use this method to set a name for that column. If a name was set
* that column will be also outputted along with the row, in ::current(). The
* name can be passed also in ::setKeys() list. In that case row index will be
* or will be part of the primary key.
*
* @param string $row_index_column
* The name to be used for the row index/position/delta columsn.
*
* @return $this
*
* @see \Drupal\migrate_spreadsheet\SpreadsheetIteratorInterface::setKeys()
*/
public function setRowIndexColumn($row_index_column);
/**
* Gets the name of the row index column.
*
* @return string
*
* @see \Drupal\migrate_spreadsheet\SpreadsheetIteratorInterface::setRowIndexColumn()
*/
public function getRowIndexColumn();
......@@ -133,6 +93,9 @@ interface SpreadsheetIteratorInterface extends \Iterator{
*
* @return string[]
* An array having the column index as key and header name as value.
*
* @throws \RuntimeException
* If a header cell is duplicated.
*/
public function getHeaders();
......
......@@ -34,9 +34,11 @@ class SpreadsheetIteratorTest extends UnitTestCase {
parent::setUp();
$this->iterator = (new SpreadsheetIterator())
->setWorksheet($this->getWorksheet())
->setHeaderRow(1)
->setColumns(['a', 'c', 'd']);
->setConfiguration([
'worksheet' => $this->getWorksheet(),
'header_row' => 1,
'columns' => ['a', 'c', 'd'],
]);
}
/**
......@@ -66,7 +68,9 @@ class SpreadsheetIteratorTest extends UnitTestCase {
* @covers ::current
*/
public function testIteration() {
$this->iterator->setRowIndexColumn('row');
$config = $this->iterator->getConfiguration();
$config['row_index_column'] = 'row';
$this->iterator->setConfiguration($config);
$this->assertTrue($this->iterator->valid());
$this->assertSame([0], $this->iterator->key());
......@@ -95,15 +99,16 @@ class SpreadsheetIteratorTest extends UnitTestCase {
$this->assertSame(['row' => 0, 'a' => 'a0', 'c' => 'c0', 'd' => 'd0'], $this->iterator->current());
// Try to return all columns.
$this->iterator->setColumns([]);
$config['columns'] = [];
$this->iterator->setConfiguration($config);
$this->assertTrue($this->iterator->valid());
$this->assertSame([0], $this->iterator->key());
$this->assertSame(['row' => 0, 'a' => 'a0', 'b' => 'b0', 'c' => 'c0', 'd' => 'd0'], $this->iterator->current());
// Use different primary keys.
$this->iterator
->setColumns(['a', 'd'])
->setKeys(['b', 'c']);
$config['columns'] = ['a', 'd'];
$config['keys'] = ['b', 'c'];
$this->iterator->setConfiguration($config);
$this->assertTrue($this->iterator->valid());
$this->assertSame(['b0', 'c0'], $this->iterator->key());
$this->assertSame(['a' => 'a0', 'b' => 'b0', 'c' => 'c0', 'd' => 'd0'], $this->iterator->current());
......@@ -142,8 +147,7 @@ class SpreadsheetIteratorTest extends UnitTestCase {
->setCellValue('A5', 'a2')
->setCellValue('B5', 'b2')
->setCellValue('C5', 'c2')
->setCellValue('D5', 'd2')
;
->setCellValue('D5', 'd2');
}
return $this->worksheet;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment