Commit 1e3b25e1 authored by claudiu.cristea's avatar claudiu.cristea

Issue #2816041 by claudiu.cristea: Create the initial codebase

parents
# Migrate Spreadsheet
## Overview
The module provides a migrate source plugin for importing data from spreadsheet files. This source plugin uses the [PhpOffice/PhpSpreadsheet](https://github.com/PHPOffice/PhpSpreadsheet) library to read from the spreadsheet files.
[The supported source files](https://github.com/PHPOffice/PhpSpreadsheet#file-formats-supported) includes .ods, .xls, .xlsx, .csv.
## Usage
In your migration file:
```yaml
id: ...
source:
plugin: spreadsheet
# The source file. The path can be either relative to Drupal root but it can
# be a also an absolute reference such as a stream wrapper.
file: ../resources/source_file.xlsx
# The worksheet to be read.
worksheet: 'Personnel list'
# The first row from where the table starts. Points to the row that contains
# the table header. It's a "zero based" value. If the table row is the first
# row, this shoukd be 0. The value of 3 means that the table header is on the
# fourth row.
header_row: 3
# Columns to be returned, basically a list of table header cell values.
columns:
- ID
- 'First name'
- 'Sure name'
- Gender
# If this setting is specifified, the source will return also a column
# containing the 'zero based' row index under this name. For this example,
# 'Row index' can be used later in `keys:` list to make this column a primary
# key column.
row_index_column: 'Row index'
# This points to the column or columns that provides the primary key. If is
# missed, the current row position will be returned as primaru key.
keys:
- ID
destination:
...
```
## Author
Claudiu Cristea ([claudiu.cristea](https://www.drupal.org/u/claudiu.cristea))
{
"name": "drupal/migrate_spreadsheet",
"description": "Provides a migrate source plugin for importing data from spreadsheet files",
"type": "drupal-module",
"license": "GPL-2.0+",
"support": {
"issues": "https://www.drupal.org/project/issues/migrate_spreadsheet",
"source": "https://www.drupal.org/project/migrate_spreadsheet"
},
"keywords": [
"Drupal",
"Spreadsheet",
"Excel"
],
"repositories": [
{
"type": "package",
"package": {
"name": "phpoffice/phpspreadsheet",
"version": "0.0",
"dist": {
"url": "https://github.com/PHPOffice/PhpSpreadsheet/archive/develop.zip",
"type": "zip"
}
}
}
],
"require": {
"phpoffice/phpspreadsheet": "~0.0"
},
"autoload": {
"psr-4": {
"PhpOffice\\PhpSpreadsheet\\": "vendor/phpoffice/phpspreadsheet/src/PhpSpreadsheet"
}
}
}
migrate.source.spreadsheet:
type: migrate_source
label: 'Spreadsheet'
mapping:
file:
type: string
label: 'Path to the source spredsheet file'
worksheet:
type: string
label: 'The worksheet name'
header_row:
type: integer
label: 'The first row from where the table starts. It''s a "zero based" value.'
columns:
type: sequence
label: 'Columns to be read from spreadsheet'
sequence:
type: string
label: 'Column'
row_index_column:
type: string
label: 'Name of row index "zero based" value'
keys:
type: sequence
label: 'Column name(s) which represent the key uniquely identifying each record'
sequence:
type: string
label: 'Key field'
type: module
name: 'Migrate Spreadsheet'
description: 'Provides a migrate source plugin for importing data from spreadsheet files.'
core: '8.x'
package: Migration
dependencies:
- migrate
services:
migrate_spreadsheet.iterator:
class: Drupal\migrate_spreadsheet\SpreadsheetIterator
<?php
namespace Drupal\migrate_spreadsheet\Plugin\migrate\source;
use Drupal\Component\Plugin\ConfigurablePluginInterface;
use Drupal\Component\Utility\NestedArray;
use Drupal\migrate\MigrateException;
use Drupal\migrate\Plugin\migrate\source\SourcePluginBase;
use Drupal\migrate\Plugin\MigrationInterface;
use PhpOffice\PhpSpreadsheet\IOFactory;
/**
* Provides a source plugin that migrate from spreadsheet files.
*
* This source plugin uses the PhpOffice/PhpSpreadsheet library to read
* spreadsheet files.
*
* @MigrateSource(
* id = "spreadsheet"
* )
*/
class Spreadsheet extends SourcePluginBase implements ConfigurablePluginInterface/*, ContainerFactoryPluginInterface*/ {
/**
* Constructs a spreadsheet migration source plugin object.
*
* @param array $configuration
* A configuration array containing information about the plugin instance.
* @param string $plugin_id
* The plugin_id for the plugin instance.
* @param mixed $plugin_definition
* The plugin implementation definition.
* @param \Drupal\migrate\Plugin\MigrationInterface
* The current migration.
*/
public function __construct(array $configuration, $plugin_id, $plugin_definition, MigrationInterface $migration) {
parent::__construct($configuration, $plugin_id, $plugin_definition, $migration);
$this->setConfiguration($configuration);
}
/**
* {@inheritdoc}
*/
public function defaultConfiguration() {
return [
'file' => NULL,
'worksheet' => NULL,
'header_row' => 0,
'columns' => [],
'keys' => [],
'row_index_column' => NULL,
];
}
/**
* {@inheritdoc}
*/
public function setConfiguration(array $configuration) {
$this->configuration = NestedArray::mergeDeep(
$this->defaultConfiguration(),
$configuration
);
}
/**
* {@inheritdoc}
*/
public function getConfiguration() {
return $this->configuration;
}
/**
* {@inheritdoc}
*/
public function __toString() {
return $this->configuration['file'] . ':' . $this->configuration['worksheet'];
}
/**
* {@inheritdoc}
*/
public function getIds() {
$config = $this->getConfiguration();
if (empty($config['keys'])) {
if (empty($config['row_index_column'])) {
throw new \RuntimeException("Row index should act as key but no name has been provided. Set 'row_index_column' in source config to provide a name for this column.");
}
// If no keys are defined, we'll use the 'zero based' index of the
// spreadsheet current row.
return [$config['row_index_column'] => ['type' => 'integer']];
}
return array_map(function () {
return ['type' => 'string'];
},
array_flip($config['keys'])
);
}
/**
* {@inheritdoc}
*/
public function fields() {
$columns = $this->getConfiguration()['columns'];
return array_combine($columns, $columns);
}
/**
* {@inheritdoc}
*/
public function initializeIterator() {
$config = $this->getConfiguration();
/** @var \Drupal\migrate_spreadsheet\SpreadsheetIteratorInterface $iterator */
$iterator = \Drupal::service('migrate_spreadsheet.iterator');
$iterator
->setWorksheet($this->loadWorksheet())
->setColumns($config['columns'])
->setKeys($config['keys'])
->setHeaderRow($config['header_row'])
->setRowIndexColumn($config['row_index_column']);
return $iterator;
}
/**
* Loads the worksheet.
*
* @return \PhpOffice\PhpSpreadsheet\Worksheet
* The source worksheet.
*
* @throws \Drupal\migrate\MigrateException
* When it's impossible to load the file or the worksheet does not exist.
*/
protected function loadWorksheet() {
$config = $this->getConfiguration();
// Check that the file exists.
if (!file_exists($config['file'])) {
throw new MigrateException("File with path '{$config['file']}' doesn't exist.");
}
// Check that a non-empty worksheet has been passed.
if (empty($config['worksheet'])) {
throw new MigrateException('No worksheet was passed.');
}
// Load the workbook.
try {
// Identify the type of the input file.
$type = IOFactory::identify($config['file']);
// Create a new Reader of the file type.
/** @var \PhpOffice\PhpSpreadsheet\Reader\BaseReader $reader */
$reader = IOFactory::createReader($type);
// Advise the Reader that we only want to load cell data.
$reader->setReadDataOnly(TRUE);
// Advise the Reader of which worksheet we want to load.
$reader->setLoadSheetsOnly($config['worksheet']);
/** @var \PhpOffice\PhpSpreadsheet\Spreadsheet $workbook */
$workbook = $reader->load($config['file']);
return $workbook->getSheet();
}
catch (\Exception $e) {
$class = get_class($e);
throw new MigrateException("Got '$class', message '{$e->getMessage()}'.");
}
}
/**
* {@inheritdoc}
*/
public function calculateDependencies() {
return [];
}
}
<?php
namespace Drupal\migrate_spreadsheet;
use PhpOffice\PhpSpreadsheet\Cell;
use PhpOffice\PhpSpreadsheet\Worksheet;
/**
* Provides a spreadsheet iterator.
*/
class SpreadsheetIterator implements SpreadsheetIteratorInterface {
/**
* The worksheet object.
*
* @var \PhpOffice\PhpSpreadsheet\Worksheet
*/
protected $worksheet;
/**
* The first row from where the table starts. It's a 'zero based' value.
*
* @var int
*/
protected $headerRow = 0;
/**
* Columns list keyed by header cell and having column index as value.
*
* @var array
*/
protected $columns = [];
/**
* Primary keys list keyed by header cell and having column index as value.
*
* @var string[]|null
*/
protected $keys = NULL;
/**
* The name to be used for row index/position/delta 'zero based' value.
*
* @var string|null
*/
protected $rowIndexColumn = NULL;
/**
* All headers keyed by cell value and having column index as value.
*
* @var string[]
*/
protected $headers;
/**
*The total number of rows in the worksheet.
*
* @var integer
*/
protected $rowsCount;
/**
* The total number of columns in the worksheet.
*
* @var integer
*/
protected $columnsCount;
/**
* The relative index of the current row.
*
* @var int
*/
protected $currentRow = 0;
/**
* {@inheritdoc}
*/
public function key() {
if (($keys = $this->getKeys()) === NULL) {
// If no keys were passed, use the spreadsheet current row position.
if (!$this->getRowIndexColumn()) {
throw new \RuntimeException("Row index should act as key but no name has been provided. Use SpreadsheetIterator::setRowIndexColumn() to provide a name for this column.");
}
return [$this->currentRow];
}
return array_values(array_map(
function ($column_delta) {
return $this->getWorksheet()->getCellByColumnAndRow($column_delta, $this->getAbsoluteRowIndex(), FALSE)->getValue();
},
$this->getKeys()
));
}
/**
* {@inheritdoc}
*/
public function valid() {
return ($this->currentRow >= 0) && ($this->getAbsoluteRowIndex() <= $this->getRowsCount());
}
/**
* {@inheritdoc}
*/
public function rewind() {
$this->currentRow = 0;
}
/**
* {@inheritdoc}
*/
public function current() {
if (($keys = $this->getKeys()) === NULL) {
$row_delta_field = $this->getRowIndexColumn();
if (!$row_delta_field) {
throw new \RuntimeException("Row index should act as key but no name has been provided. Use SpreadsheetIterator::setRowIndexColumn() to provide a name for this column.");
}
$keys = [$row_delta_field => -1];
}
$all_columns = $keys + $this->getColumns();
// Arrange columns in their spreadsheet native order.
asort($all_columns);
return array_map(
function ($column_delta) {
if ($column_delta === -1) {
return $this->currentRow;
}
if (!$this->getWorksheet()->getCellByColumnAndRow($column_delta, $this->getAbsoluteRowIndex(), FALSE)) {
print_r($this->key());
print "\n";
print_r($this->getAbsoluteRowIndex());
}
return $this->getWorksheet()->getCellByColumnAndRow($column_delta, $this->getAbsoluteRowIndex(), FALSE)->getValue();
},
$all_columns
);
}
/**
* {@inheritdoc}
*/
public function next() {
$this->currentRow++;
}
/**
* {@inheritdoc}
*/
public function setWorksheet(Worksheet $worksheet) {
// Unset the computed values.
unset($this->rowsCount, $this->columnsCount, $this->headers);
$this->worksheet = $worksheet;
return $this;
}
/**
* {@inheritdoc}
*/
public function getWorksheet() {
if (!isset($this->worksheet) || !$this->worksheet instanceof Worksheet) {
throw new \Exception('No worksheet has been set.');
}
return $this->worksheet;
}
/**
* {@inheritdoc}
*/
public function setColumns(array $columns) {
$headers = $this->getHeaders();
// If no columns were passed, all columns will be used.
if (empty($columns)) {
$this->columns = $headers;
}
else {
$this->columns = [];
foreach ($columns as $column) {
if (!isset($headers[$column])) {
throw new \RuntimeException("Column '$column' doesn't exist in the table header.");
}
$this->columns[$column] = $headers[$column];
}
}
return $this;
}
/**
* {@inheritdoc}
*/
public function getColumns() {
return $this->columns;
}
/**
* {@inheritdoc}
*/
public function setKeys(array $keys) {
if (empty($keys)) {
$this->keys = NULL;
}
else {
$headers = $this->getHeaders();
$this->keys = [];
foreach ($keys as $key) {
if (!isset($headers[$key])) {
throw new \RuntimeException("Key '$key' doesn't exist in the table header.");
}
$this->keys[$key] = $headers[$key];
}
}
return $this;
}
/**
* {@inheritdoc}
*/
public function getKeys() {
return $this->keys;
}
/**
* {@inheritdoc}
*/
public function setHeaderRow($header_row) {
$this->headerRow = $header_row;
return $this;
}
/**
* {@inheritdoc}
*/
public function getHeaderRow() {
$this->headerRow;
}
/**
* {@inheritdoc}
*/
public function setRowIndexColumn($row_index_column) {
$this->rowIndexColumn = $row_index_column;
}
/**
* {@inheritdoc}
*/
public function getRowIndexColumn() {
return $this->rowIndexColumn;
}
/**
* {@inheritdoc}
*/
public function getHeaders() {
if (!isset($this->headers)) {
for ($col = 0; $col < $this->getColumnsCount(); ++$col) {
$value = $this->getWorksheet()->getCellByColumnAndRow($col, $this->getHeaderRow() + 2)->getValue();
if (isset($this->headers[$value])) {
throw new \RuntimeException("Table header '{$value}' is duplicated.");
}
$this->headers[$value] = $col;
}
}
return $this->headers;
}
/**
* {@inheritdoc}
*/
public function getRowsCount() {
if (!isset($this->rowsCount)) {
$this->rowsCount = $this->getWorksheet()->getHighestDataRow();
}
return $this->rowsCount;
}
/**
* {@inheritdoc}
*/
public function getColumnsCount() {
if (!isset($this->columnsCount)) {
$this->columnsCount = Cell::columnIndexFromString($this->getWorksheet()->getHighestDataColumn());
}
return $this->columnsCount;
}
/**
* Gets the absolute row index.
*
* @return int
*/
protected function getAbsoluteRowIndex() {
return $this->headerRow + $this->currentRow + 2;
}
}
<?php
namespace Drupal\migrate_spreadsheet;
use PhpOffice\PhpSpreadsheet\Worksheet;
/**
* Provides an interface for spreadsheet iterators.
*/
interface SpreadsheetIteratorInterface extends \Iterator{
/**
* Sets the worksheet object.
*