Commit 79bb4751 authored by Mike Ryan's avatar Mike Ryan
Browse files

Issue #2435231 by mikeryan: Documented and tweaked

parent c74d33df
INTRODUCTION
------------
The migrate_example module demonstrates how to implement custom migrations
for Drupal 8. It includes a group of "beer" migrations demonstrating a complete
simple migration scenario.
THE BEER SITE
-------------
In this scenario, we have a beer aficionado site which stores its data in MySQL
tables - there are content items for each beer on the site, user accounts with
profile data, categories to classify the beers, and user-generated comments on
the beers. We want to convert this site to Drupal with just a few modifications
to the basic structure.
To make the example as simple as to run as possible, the source data is placed
in tables directly in your Drupal database - in most real-world scenarios, your
source data will be in an external database. The migrate_example_setup submodule
creates and populates these tables, as well as configuring your Drupal 8 site
(creating a node type, vocabulary, fields, etc.) to receive the data.
STRUCTURE
---------
There are two primary components to this example:
1. Migration configuration, in the config/install directory. These YAML files
describe the migration process and provide the mappings from the source data
to Drupal's destination entities.
2. Source plugins, in src/Plugin/migrate/source. These are referenced from the
configuration files, and provide the source data to the migration processing
pipeline, as well as manipulating that data where necessary to put it into
a canonical form for migrations.
UNDERSTANDING THE MIGRATIONS
----------------------------
The YAML and PHP files are copiously documented in-line. To best understand
the concepts described in a more-or-less narrative form, it is recommended you
read the files in the following order:
1. migrate_plus.migration_group.beer.yml
2. migrate.migration.beer_term.yml
3. BeerTerm.php
4. migrate.migration.beer_user.yml
5. BeerUser.php
6. migrate.migration.beer_node.yml
7. BeerNode.php
8. migrate.migration.beer_comment.yml
9. BeerComment.php
RUNNING THE MIGRATIONS
----------------------
The migrate_tools module (also part of the migrate_plus project) provides the
tools you need to perform migration processes. At this time, the web UI only
provides status information - to perform migration operations, you need to use
the drush commands.
# Enable the tools and the example module if you haven't already.
drush en -y migrate_tools,migrate_example
# Look at the migrations. Just look at them. Notice that they are displayed in
# the order they will be run, which reflects their dependencies. For example,
# because the node migration references the imported terms and users, it must
# run after those migrations have been run.
drush ms # Abbreviation for migrate-status
# Run the import operation for all the beer migrations.
drush mi --group=beer # Abbreviation for migrate-import
# Look at what you've done! Also, visit the site and see the imported content,
# user accounts, etc.
drush ms
# Look at the duplicate username message.
drush mmsg beer_user # Abbreviation for migrate-messages
# Run the rollback operation for all the migrations (removing all the imported
# content, user accounts, etc.). Note that it will rollback the migrations in
# the opposite order as they were imported.
drush mr --group=beer # Abbreviation for migrate-rollback
# You can import specific migrations.
drush mi beer_term,beer_user
# At this point, go look at your content listing - you'll see beer nodes named
# "Stub", generated from the user's favbeers references.
drush mi beer_node,beer_comment
# Refresh your content listing - the stub nodes have been filled with real beer!
# You can rollback specific migrations.
drush mr beer_comment,beer_node
# Migration configuration for beer comments. No new concepts here.
id: beer_comment
label: Comments on beers
migration_group: beer
source:
plugin: beer_comment
constants:
entity_type: node
destination:
plugin: entity:comment
process:
......@@ -16,7 +15,9 @@ process:
plugin: migration
migration: beer_node
source: bid
entity_type: 'constants/entity_type'
entity_type:
plugin: default_value
default_value: node
field_name:
plugin: default_value
default_value: field_comments
......
# Migration configuration for beer content.
id: beer_node
label: Beers of the world
migration_group: beer
......@@ -6,11 +7,12 @@ source:
destination:
plugin: entity:node
process:
# Hardcode the destination node type (bundle) as 'migrate_example_beer'.
type:
plugin: default_value
default_value: migrate_example_beer
nid: bid
title: name
nid: bid
uid:
plugin: migration
migration: beer_user
......@@ -23,8 +25,24 @@ process:
plugin: migration
migration: beer_term
source: terms
# Some Drupal fields may have multiple components we may want to set
# separately. For example, text fields may have summaries (teasers) in
# addition to the full text value. We use / to separate the field name from
# the internal field value being set, and put it in quotes because / is a
# YAML special character.
'body/value': body
'body/summary': excerpt
# Our beer nodes have references to terms and users, so we want those to be
# imported first. We make that dependency explicit here - by putting those
# migrations under the 'required' key, we ensure that the tools will prevent
# us from running the beer_node migration unless the beer_term and beer_user
# migrations are complete (although we can override the dependency check by
# passing --force to the drush migrate-import command). We can also add
# 'optional' dependencies - these affect the order in which migrations are
# displayed, and run by default, but does not force you run them in that
# order.
# The general rule of thumb is that any migrations referenced by migration
# process plugins should be required here.
migration_dependencies:
required:
- beer_term
......
# A "migration" is, in technical terms, a configuration entity which describes
# how to read source data, process it (generally by mapping source fields to
# destination fields), and write it to Drupal.
# The machine name for a migration, used to uniquely identify it.
id: beer_term
label: Migrate styles from the source database to taxonomy terms
# A human-friendly description of the migration.
label: Migrate style categories from the source database to taxonomy terms
# The machine name of the group containing this migration (which contains
# shared configuration to be merged with our own configuration here).
migration_group: beer
# Every migration must have a source plugin, which controls the delivery of our
# source data. In this case, our source plugin has the name "beer_term", which
# Drupal resolves to the PHP class defined in
# src/Plugin/migrate/source/BeerTerm.php.
source:
plugin: beer_term
# Every migration must also have a destination plugin, which handles writing
# the migrated data in the appropriate form for that particular kind of data.
# Most Drupal content is an "entity" of one type or another, and we need to
# specify what entity type we are populating (in this case, taxonomy terms).
# Unlike the source plugin (which is specific to our particular scenario), this
# destination plugin is implemented in Drupal itself.
destination:
plugin: entity:taxonomy_term
# Here's the meat of the migration - the processing pipeline. This describes how
# each destination field is to be populated based on the source data. For each
# destination field, one or more process plugins may be invoked.
process:
# The simplest process plugin is named 'get' - it is the default plugin, so
# does not need to be explicitly named. It simply copies the source value
# (the 'style' field from the source database in this case) to the destination
# field (the taxonomy term 'name' field). You can see we simply copy the
# source 'details' field to destination 'description' field in the same way.
name: style
description: details
# Here is a new plugin - default_value. In its simplest usage here, it is used
# to hard-code a destination value, the vid (vocabulary ID) our taxonomy terms
# should be assigned to. It's important to note that while above the right
# side of the mappings was a source field name, here the right side of the
# 'default_value:' line is an actual value.
vid:
plugin: default_value
default_value: migrate_example_beer_styles
name: style
description: details
# Here's another new plugin - migration. When importing data from another
# system, typically the unique identifiers for items on the destination side
# are not the same as the identifiers were on the source side. For example, in
# our style data the term names are the unique identifiers for each term,
# while in Drupal each term is assigned a unique integer term ID (tid). When
# any such items are referenced in Drupal, the reference needs to be
# translated from the old ID ('ale') to the new ID (1). The migration
# framework keeps track of the relationships between source and destination
# IDs in map tables, and the migration plugin is the means of performing a
# lookup in those map tables during processing.
parent:
plugin: migration
# Here we reference the migration whose map table we're performing a lookup
# against. You'll note that in this case we're actually referencing this
# migration itself, since category parents are imported by the same
# migration. This works best when we're sure the parents are imported
# before the children, and in this case our source plugin is guaranteeing
# that.
migration: beer_term
# 'style_parent' is the parent reference field from the source data. The
# result of this plugin is that the destination 'parent' field is populated
# with the Drupal term ID of the referenced style (or NULL if style_parent
# was empty).
source: style_parent
# Migration configuration for user accounts. We've described most of what goes
# into migration configuration in migrate.migration.beer_term.yml, so won't
# repeat that here.
id: beer_user
label: Beer Drinkers of the world
migration_group: beer
......@@ -6,29 +9,86 @@ source:
destination:
plugin: entity:user
process:
pass: password
mail: email
init: email
status: status
roles:
plugin: default_value
default_value: 2
# Here's a new process plugin - dedupe_entity. Our source site allowed there
# to be multiple user accounts with the same username, but Drupal wants
# usernames to be unique. This plugin allows us to automatically generate
# unique usernames when we detect collisions.
name:
plugin: dedupe_entity
# The name of the source field containing the username.
source: username
# These next two settings identify the destination-side field to check for
# duplicates. They say "see if the incoming 'name' matches any existing
# 'name' field in any 'user' entity".
entity_type: user
field: name
# Finally, this specifies a string to use between the original value and the
# sequence number appended to make the value unique. Thus, the first 'alice'
# account gets the name 'alice' in Drupal, and the second one gets the name
# 'alice_1'.
postfix: _
source: name
pass: password
mail: mail
# Another new process plugin - callback. This allows us to filter an incoming
# source value through an arbitrary PHP function. The function called must
# have one required argument.
created:
-
plugin: callback
# The 'registered' timestamp in the source data is a string of the form
# 'yyyy-mm-dd hh:mm:ss', but Drupal wants a UNIX timestamp for 'created'.
source: registered
callable: strtotime
source: posted
access:
plugin: get
source: @created
login:
plugin: get
source: @created
status: status
init: mail
roles:
plugin: default_value
default_value: 2
field_migrate_example_gender: sex
# Our source data only has a single timestamp value, 'registered', which we
# want to use for all four of Drupal's user timestamp fields. We could
# duplicate the callback plugin we used for 'created' above - but we have a
# shortcut. Putting an @ sign at the beginning of the source value indicates
# that it is to be interpreted as a *destination* field name instead of a
# *source* field name. Thus, if a value we need in more than one place
# requires some processing beyond simply copying it directly, we can perform
# that processing a single time and use the result in multiple places.
changed: '@created'
access: '@created'
login: '@created'
# Yet another new process plugin - static_map. We're making a transformation
# in how we represent gender data - formerly it was integer values 0 for male
# and 1 for female, but in our modern Drupal site we will be making this a
# free-form text field, so we want to replace the obscure integers with
# simple strings.
field_migrate_example_gender:
plugin: static_map
# Specify the source field we're reading (containing 0's and 1's).
source: sex
# Tell it to transform 0 to 'Male', and 1 to 'Female'.
map:
0: Male
1: Female
field_migrate_example_favbeers: beers
# The following is blocked on https://www.drupal.org/node/2590993.
# This looks like a simple migration process plugin, but there's magic
# happening here. We import nodes after terms and users, because they have
# references to terms and users, so of course the terms and users must be
# migrated first - right? However, the favbeers field is a reference to the
# beer nodes which haven't yet been migrated - we have a circular relationship
# between users and nodes. The way the migration system resolves this
# situation is by creating stubs. In this case, because no beer nodes have
# been created, each time a beer is looked up against the beer_node migration
# nothing is found, and by default the migration process plugin creates an
# empty stub node as a placeholder so the favbeers reference field has
# something to point to. The stub is recorded in the beer_node map table, so
# when that migration runs it knows that each incoming beer should overwrite
# its stub instead of creating a new node.
# field_migrate_example_favbeers:
# plugin: migration
# source: beers
# migration: beer_node
# A "migration group" is - surprise! - a group of migrations. It is used to
# group migrations for display by our tools, and to perform operations on a
# specific set of migrations. It can also be used to hold any configuration
# common to those migrations, so it doesn't have to be duplicated in each one.
# The machine name of the group, by which it is referenced in individual
# migrations.
id: beer
# A human-friendly label of the group, displayed in the UI.
# A human-friendly label for the group.
label: Beer Imports
# More information about the group.
......@@ -16,5 +21,10 @@ source_type: Custom tables
# Drupal (default) database, but usually if your source data is in a
# database it will be external.
shared_configuration:
# Specifying 'source' here means that this configuration will be merged into
# the 'source' configuration of each migration.
source:
# A better practice for real-world migrations would be to add a database
# connection to your external database in settings.php and reference its
# key here.
key: default
......@@ -213,13 +213,13 @@ function migrate_example_beer_schema_account() {
'not null' => TRUE,
'description' => 'Blocked_Allowed',
),
'posted' => array(
'registered' => array(
'type' => 'varchar',
'length' => 255,
'not null' => TRUE,
'description' => 'Registration date',
),
'name' => array(
'username' => array(
'type' => 'varchar',
'length' => 255,
'not null' => FALSE,
......@@ -237,7 +237,7 @@ function migrate_example_beer_schema_account() {
'not null' => FALSE,
'description' => 'Account password (raw)',
),
'mail' => array(
'email' => array(
'type' => 'varchar',
'length' => 255,
'not null' => FALSE,
......@@ -276,10 +276,10 @@ function migrate_example_beer_data_node() {
$query->execute();
}
// Note that alice has duplicate username. Exercises dedupe() method.
// Note that alice has duplicate username. Exercises dedupe_entity plugin.
// @TODO duplicate email also.
function migrate_example_beer_data_account() {
$fields = array('status', 'posted', 'name', 'nickname', 'password', 'mail', 'sex', 'beers');
$fields = array('status', 'registered', 'username', 'nickname', 'password', 'email', 'sex', 'beers');
$query = db_insert('migrate_example_beer_account')
->fields($fields);
$data = array(
......@@ -303,7 +303,7 @@ function migrate_example_beer_data_comment() {
array(99999998, NULL, 'im second', 'aromatic', 'alice', 'alice@example.com', 0),
array(99999999, NULL, 'im parent', 'malty', 'alice', 'alice@example.com', 0),
array(99999999, 1, 'im child', 'cold body', 'bob', NULL, 1),
array(99999999, 2, 'im grandchild', 'bitter body', 'charlie@example.com', NULL, 1),
array(99999999, 4, 'im grandchild', 'bitter body', 'charlie@example.com', NULL, 1),
);
foreach ($data as $row) {
$query->values(array_combine($fields, $row));
......
......@@ -10,7 +10,7 @@ namespace Drupal\migrate_example\Plugin\migrate\source;
use Drupal\migrate\Plugin\migrate\source\SqlBase;
/**
* Drupal 6 comment source from database.
* Source plugin for beer comments.
*
* @MigrateSource(
* id = "beer_comment"
......@@ -23,8 +23,8 @@ class BeerComment extends SqlBase {
*/
public function query() {
$query = $this->select('migrate_example_beer_comment', 'mec')
->fields('mec', array('cid', 'cid_parent', 'name', 'mail', 'aid',
'body', 'bid', 'subject'))
->fields('mec', ['cid', 'cid_parent', 'name', 'mail', 'aid',
'body', 'bid', 'subject'])
->orderBy('cid_parent', 'ASC');
return $query;
}
......@@ -33,7 +33,7 @@ class BeerComment extends SqlBase {
* {@inheritdoc}
*/
public function fields() {
$fields = array(
$fields = [
'cid' => $this->t('Comment ID'),
'cid_parent' => $this->t('Parent comment ID in case of comment replies'),
'name' => $this->t('Comment name (if anon)'),
......@@ -41,7 +41,7 @@ class BeerComment extends SqlBase {
'aid' => $this->t('Account ID (if any)'),
'bid' => $this->t('Beer ID that is being commented upon'),
'subject' => $this->t('Comment subject'),
);
];
return $fields;
}
......@@ -50,12 +50,12 @@ class BeerComment extends SqlBase {
* {@inheritdoc}
*/
public function getIds() {
return array(
'cid' => array(
return [
'cid' => [
'type' => 'integer',
'alias' => 'mec',
),
);
],
];
}
}
......@@ -11,7 +11,7 @@ use Drupal\migrate\Plugin\migrate\source\SqlBase;
use Drupal\migrate\Row;
/**
* Drupal 6 node source from database.
* Source plugin for beer content.
*
* @MigrateSource(
* id = "beer_node"
......@@ -23,10 +23,21 @@ class BeerNode extends SqlBase {
* {@inheritdoc}
*/
public function query() {
/**
* An important point to note is that your query *must* return a single row
* for each item to be imported. Here we might be tempted to add a join to
* migrate_example_beer_topic_node in our query, to pull in the
* relationships to our categories. Doing this would cause the query to
* return multiple rows for a given node, once per related value, thus
* processing the same node multiple times, each time with only one of the
* multiple values that should be imported. To avoid that, we simply query
* the base node data here, and pull in the relationships in prepareRow()
* below.
*/
$query = $this->select('migrate_example_beer_node', 'b')
->fields('b', array('bid', 'name', 'body', 'excerpt', 'aid',
->fields('b', ['bid', 'name', 'body', 'excerpt', 'aid',
'countries', 'image', 'image_alt', 'image_title',
'image_description'));
'image_description']);
return $query;
}
......@@ -34,7 +45,7 @@ class BeerNode extends SqlBase {
* {@inheritdoc}
*/
public function fields() {
$fields = array(
$fields = [
'bid' => $this->t('Beer ID'),
'name' => $this->t('Name of beer'),
'body' => $this->t('Full description of the beer'),
......@@ -45,8 +56,11 @@ class BeerNode extends SqlBase {
'image_alt' => $this->t('Image ALT'),
'image_title' => $this->t('Image title'),
'image_description' => $this->t('Image description'),
// Note that this field is not part of the query above - it is populated
// by prepareRow() below. You should document all source properties that
// are available for mapping after prepareRow() is called.
'terms' => $this->t('Applicable styles'),
);
];
return $fields;
}
......@@ -55,32 +69,36 @@ class BeerNode extends SqlBase {
* {@inheritdoc}
*/
public function getIds() {
return array(
'bid' => array(
return [
'bid' => [
'type' => 'integer',
'alias' => 'b',
),
);
],
];
}
/**
* {@inheritdoc}
*/
public function prepareRow(Row $row) {
if (parent::prepareRow($row) === FALSE) {
return FALSE;
}
/**
* As explained above, we need to pull the style relationships into our
* source row here, as an array of 'style' values (the unique ID for
* the beer_term migration).
*/
$terms = $this->select('migrate_example_beer_topic_node', 'bt')
->fields('bt', array('style'))
->fields('bt', ['style'])
->condition('bid', $row->getSourceProperty('bid'))
->execute()
->fetchCol();
$row->setSourceProperty('terms', $terms);
// As we did for favorite beers in the user migration, we need to explode
// the multi-value country names.
if ($value = $row->getSourceProperty('countries')) {
$row->setSourceProperty('countries', explode('|', $value));
}
return parent::prepareRow($row);
}
}
......@@ -10,7 +10,15 @@ namespace Drupal\migrate_example\Plugin\migrate\source;
use Drupal\migrate\Plugin\migrate\source\SqlBase;
/**
* Drupal 6 user source from database.
* This is an example of a simple SQL-based source plugin. Source plugins are
* classes which deliver source data to the processing pipeline. For SQL
* sources, the SqlBase class provides most of the functionality needed - for
* a specific migration, you are required to implement the three simple public
* methods you see below.
*
* This annotation tells Drupal that the name of the MigrateSource plugin
* implemented by this class is "beer_term". This is the name that the migration
* configuration references with the source "plugin" key.
*
* @MigrateSource(
* id = "beer_term"
......@@ -22,9 +30,17 @@ class BeerTerm extends SqlBase {
* {@inheritdoc}
*/
public function query() {
/**
* The most important part of a SQL source plugin is the SQL query to
* retrieve the data to be imported. Note that the query is not executed
* here - the migration process will control execution of the query. Also
* note that it is constructed from a $this->select() call - this ensures
* that the query is executed against the database configured for this
* source plugin.
*/
return $this->select('migrate_example_beer_topic', 'met')
->fields('met', array('style', 'details', 'style_parent', 'region',
'hoppiness'))
->fields('met', ['style', 'details', 'style_parent', 'region', 'hoppiness'])
// We sort this way to ensure parent terms are imported first.
->orderBy('style_parent', 'ASC');
}
......@@ -32,13 +48,21 @@ class BeerTerm extends SqlBase {
* {@inheritdoc}
*/
public function fields() {
$fields = array(