Commit 79bb4751 authored by Mike Ryan's avatar Mike Ryan
Browse files

Issue #2435231 by mikeryan: Documented and tweaked

parent c74d33df
The migrate_example module demonstrates how to implement custom migrations
for Drupal 8. It includes a group of "beer" migrations demonstrating a complete
simple migration scenario.
In this scenario, we have a beer aficionado site which stores its data in MySQL
tables - there are content items for each beer on the site, user accounts with
profile data, categories to classify the beers, and user-generated comments on
the beers. We want to convert this site to Drupal with just a few modifications
to the basic structure.
To make the example as simple as to run as possible, the source data is placed
in tables directly in your Drupal database - in most real-world scenarios, your
source data will be in an external database. The migrate_example_setup submodule
creates and populates these tables, as well as configuring your Drupal 8 site
(creating a node type, vocabulary, fields, etc.) to receive the data.
There are two primary components to this example:
1. Migration configuration, in the config/install directory. These YAML files
describe the migration process and provide the mappings from the source data
to Drupal's destination entities.
2. Source plugins, in src/Plugin/migrate/source. These are referenced from the
configuration files, and provide the source data to the migration processing
pipeline, as well as manipulating that data where necessary to put it into
a canonical form for migrations.
The YAML and PHP files are copiously documented in-line. To best understand
the concepts described in a more-or-less narrative form, it is recommended you
read the files in the following order:
2. migrate.migration.beer_term.yml
3. BeerTerm.php
4. migrate.migration.beer_user.yml
5. BeerUser.php
6. migrate.migration.beer_node.yml
7. BeerNode.php
8. migrate.migration.beer_comment.yml
9. BeerComment.php
The migrate_tools module (also part of the migrate_plus project) provides the
tools you need to perform migration processes. At this time, the web UI only
provides status information - to perform migration operations, you need to use
the drush commands.
# Enable the tools and the example module if you haven't already.
drush en -y migrate_tools,migrate_example
# Look at the migrations. Just look at them. Notice that they are displayed in
# the order they will be run, which reflects their dependencies. For example,
# because the node migration references the imported terms and users, it must
# run after those migrations have been run.
drush ms # Abbreviation for migrate-status
# Run the import operation for all the beer migrations.
drush mi --group=beer # Abbreviation for migrate-import
# Look at what you've done! Also, visit the site and see the imported content,
# user accounts, etc.
drush ms
# Look at the duplicate username message.
drush mmsg beer_user # Abbreviation for migrate-messages
# Run the rollback operation for all the migrations (removing all the imported
# content, user accounts, etc.). Note that it will rollback the migrations in
# the opposite order as they were imported.
drush mr --group=beer # Abbreviation for migrate-rollback
# You can import specific migrations.
drush mi beer_term,beer_user
# At this point, go look at your content listing - you'll see beer nodes named
# "Stub", generated from the user's favbeers references.
drush mi beer_node,beer_comment
# Refresh your content listing - the stub nodes have been filled with real beer!
# You can rollback specific migrations.
drush mr beer_comment,beer_node
# Migration configuration for beer comments. No new concepts here.
id: beer_comment id: beer_comment
label: Comments on beers label: Comments on beers
migration_group: beer migration_group: beer
source: source:
plugin: beer_comment plugin: beer_comment
entity_type: node
destination: destination:
plugin: entity:comment plugin: entity:comment
process: process:
...@@ -16,7 +15,9 @@ process: ...@@ -16,7 +15,9 @@ process:
plugin: migration plugin: migration
migration: beer_node migration: beer_node
source: bid source: bid
entity_type: 'constants/entity_type' entity_type:
plugin: default_value
default_value: node
field_name: field_name:
plugin: default_value plugin: default_value
default_value: field_comments default_value: field_comments
# Migration configuration for beer content.
id: beer_node id: beer_node
label: Beers of the world label: Beers of the world
migration_group: beer migration_group: beer
...@@ -6,11 +7,12 @@ source: ...@@ -6,11 +7,12 @@ source:
destination: destination:
plugin: entity:node plugin: entity:node
process: process:
# Hardcode the destination node type (bundle) as 'migrate_example_beer'.
type: type:
plugin: default_value plugin: default_value
default_value: migrate_example_beer default_value: migrate_example_beer
nid: bid
title: name title: name
nid: bid
uid: uid:
plugin: migration plugin: migration
migration: beer_user migration: beer_user
...@@ -23,8 +25,24 @@ process: ...@@ -23,8 +25,24 @@ process:
plugin: migration plugin: migration
migration: beer_term migration: beer_term
source: terms source: terms
# Some Drupal fields may have multiple components we may want to set
# separately. For example, text fields may have summaries (teasers) in
# addition to the full text value. We use / to separate the field name from
# the internal field value being set, and put it in quotes because / is a
# YAML special character.
'body/value': body 'body/value': body
'body/summary': excerpt 'body/summary': excerpt
# Our beer nodes have references to terms and users, so we want those to be
# imported first. We make that dependency explicit here - by putting those
# migrations under the 'required' key, we ensure that the tools will prevent
# us from running the beer_node migration unless the beer_term and beer_user
# migrations are complete (although we can override the dependency check by
# passing --force to the drush migrate-import command). We can also add
# 'optional' dependencies - these affect the order in which migrations are
# displayed, and run by default, but does not force you run them in that
# order.
# The general rule of thumb is that any migrations referenced by migration
# process plugins should be required here.
migration_dependencies: migration_dependencies:
required: required:
- beer_term - beer_term
# A "migration" is, in technical terms, a configuration entity which describes
# how to read source data, process it (generally by mapping source fields to
# destination fields), and write it to Drupal.
# The machine name for a migration, used to uniquely identify it.
id: beer_term id: beer_term
label: Migrate styles from the source database to taxonomy terms
# A human-friendly description of the migration.
label: Migrate style categories from the source database to taxonomy terms
# The machine name of the group containing this migration (which contains
# shared configuration to be merged with our own configuration here).
migration_group: beer migration_group: beer
# Every migration must have a source plugin, which controls the delivery of our
# source data. In this case, our source plugin has the name "beer_term", which
# Drupal resolves to the PHP class defined in
# src/Plugin/migrate/source/BeerTerm.php.
source: source:
plugin: beer_term plugin: beer_term
# Every migration must also have a destination plugin, which handles writing
# the migrated data in the appropriate form for that particular kind of data.
# Most Drupal content is an "entity" of one type or another, and we need to
# specify what entity type we are populating (in this case, taxonomy terms).
# Unlike the source plugin (which is specific to our particular scenario), this
# destination plugin is implemented in Drupal itself.
destination: destination:
plugin: entity:taxonomy_term plugin: entity:taxonomy_term
# Here's the meat of the migration - the processing pipeline. This describes how
# each destination field is to be populated based on the source data. For each
# destination field, one or more process plugins may be invoked.
process: process:
# The simplest process plugin is named 'get' - it is the default plugin, so
# does not need to be explicitly named. It simply copies the source value
# (the 'style' field from the source database in this case) to the destination
# field (the taxonomy term 'name' field). You can see we simply copy the
# source 'details' field to destination 'description' field in the same way.
name: style
description: details
# Here is a new plugin - default_value. In its simplest usage here, it is used
# to hard-code a destination value, the vid (vocabulary ID) our taxonomy terms
# should be assigned to. It's important to note that while above the right
# side of the mappings was a source field name, here the right side of the
# 'default_value:' line is an actual value.
vid: vid:
plugin: default_value plugin: default_value
default_value: migrate_example_beer_styles default_value: migrate_example_beer_styles
name: style
description: details # Here's another new plugin - migration. When importing data from another
# system, typically the unique identifiers for items on the destination side
# are not the same as the identifiers were on the source side. For example, in
# our style data the term names are the unique identifiers for each term,
# while in Drupal each term is assigned a unique integer term ID (tid). When
# any such items are referenced in Drupal, the reference needs to be
# translated from the old ID ('ale') to the new ID (1). The migration
# framework keeps track of the relationships between source and destination
# IDs in map tables, and the migration plugin is the means of performing a
# lookup in those map tables during processing.
parent: parent:
plugin: migration plugin: migration
# Here we reference the migration whose map table we're performing a lookup
# against. You'll note that in this case we're actually referencing this
# migration itself, since category parents are imported by the same
# migration. This works best when we're sure the parents are imported
# before the children, and in this case our source plugin is guaranteeing
# that.
migration: beer_term migration: beer_term
# 'style_parent' is the parent reference field from the source data. The
# result of this plugin is that the destination 'parent' field is populated
# with the Drupal term ID of the referenced style (or NULL if style_parent
# was empty).
source: style_parent source: style_parent
# Migration configuration for user accounts. We've described most of what goes
# into migration configuration in migrate.migration.beer_term.yml, so won't
# repeat that here.
id: beer_user id: beer_user
label: Beer Drinkers of the world label: Beer Drinkers of the world
migration_group: beer migration_group: beer
...@@ -6,29 +9,86 @@ source: ...@@ -6,29 +9,86 @@ source:
destination: destination:
plugin: entity:user plugin: entity:user
process: process:
pass: password
mail: email
init: email
status: status
plugin: default_value
default_value: 2
# Here's a new process plugin - dedupe_entity. Our source site allowed there
# to be multiple user accounts with the same username, but Drupal wants
# usernames to be unique. This plugin allows us to automatically generate
# unique usernames when we detect collisions.
name: name:
plugin: dedupe_entity plugin: dedupe_entity
# The name of the source field containing the username.
source: username
# These next two settings identify the destination-side field to check for
# duplicates. They say "see if the incoming 'name' matches any existing
# 'name' field in any 'user' entity".
entity_type: user entity_type: user
field: name field: name
# Finally, this specifies a string to use between the original value and the
# sequence number appended to make the value unique. Thus, the first 'alice'
# account gets the name 'alice' in Drupal, and the second one gets the name
# 'alice_1'.
postfix: _ postfix: _
source: name
pass: password # Another new process plugin - callback. This allows us to filter an incoming
mail: mail # source value through an arbitrary PHP function. The function called must
# have one required argument.
created: created:
plugin: callback plugin: callback
# The 'registered' timestamp in the source data is a string of the form
# 'yyyy-mm-dd hh:mm:ss', but Drupal wants a UNIX timestamp for 'created'.
source: registered
callable: strtotime callable: strtotime
source: posted
access: # Our source data only has a single timestamp value, 'registered', which we
plugin: get # want to use for all four of Drupal's user timestamp fields. We could
source: @created # duplicate the callback plugin we used for 'created' above - but we have a
login: # shortcut. Putting an @ sign at the beginning of the source value indicates
plugin: get # that it is to be interpreted as a *destination* field name instead of a
source: @created # *source* field name. Thus, if a value we need in more than one place
status: status # requires some processing beyond simply copying it directly, we can perform
init: mail # that processing a single time and use the result in multiple places.
roles: changed: '@created'
plugin: default_value access: '@created'
default_value: 2 login: '@created'
field_migrate_example_gender: sex
# Yet another new process plugin - static_map. We're making a transformation
# in how we represent gender data - formerly it was integer values 0 for male
# and 1 for female, but in our modern Drupal site we will be making this a
# free-form text field, so we want to replace the obscure integers with
# simple strings.
plugin: static_map
# Specify the source field we're reading (containing 0's and 1's).
source: sex
# Tell it to transform 0 to 'Male', and 1 to 'Female'.
0: Male
1: Female
field_migrate_example_favbeers: beers field_migrate_example_favbeers: beers
# The following is blocked on
# This looks like a simple migration process plugin, but there's magic
# happening here. We import nodes after terms and users, because they have
# references to terms and users, so of course the terms and users must be
# migrated first - right? However, the favbeers field is a reference to the
# beer nodes which haven't yet been migrated - we have a circular relationship
# between users and nodes. The way the migration system resolves this
# situation is by creating stubs. In this case, because no beer nodes have
# been created, each time a beer is looked up against the beer_node migration
# nothing is found, and by default the migration process plugin creates an
# empty stub node as a placeholder so the favbeers reference field has
# something to point to. The stub is recorded in the beer_node map table, so
# when that migration runs it knows that each incoming beer should overwrite
# its stub instead of creating a new node.
# field_migrate_example_favbeers:
# plugin: migration
# source: beers
# migration: beer_node
# A "migration group" is - surprise! - a group of migrations. It is used to
# group migrations for display by our tools, and to perform operations on a
# specific set of migrations. It can also be used to hold any configuration
# common to those migrations, so it doesn't have to be duplicated in each one.
# The machine name of the group, by which it is referenced in individual # The machine name of the group, by which it is referenced in individual
# migrations. # migrations.
id: beer id: beer
# A human-friendly label of the group, displayed in the UI. # A human-friendly label for the group.
label: Beer Imports label: Beer Imports
# More information about the group. # More information about the group.
...@@ -16,5 +21,10 @@ source_type: Custom tables ...@@ -16,5 +21,10 @@ source_type: Custom tables
# Drupal (default) database, but usually if your source data is in a # Drupal (default) database, but usually if your source data is in a
# database it will be external. # database it will be external.
shared_configuration: shared_configuration:
# Specifying 'source' here means that this configuration will be merged into
# the 'source' configuration of each migration.
source: source:
# A better practice for real-world migrations would be to add a database
# connection to your external database in settings.php and reference its
# key here.
key: default key: default
...@@ -213,13 +213,13 @@ function migrate_example_beer_schema_account() { ...@@ -213,13 +213,13 @@ function migrate_example_beer_schema_account() {
'not null' => TRUE, 'not null' => TRUE,
'description' => 'Blocked_Allowed', 'description' => 'Blocked_Allowed',
), ),
'posted' => array( 'registered' => array(
'type' => 'varchar', 'type' => 'varchar',
'length' => 255, 'length' => 255,
'not null' => TRUE, 'not null' => TRUE,
'description' => 'Registration date', 'description' => 'Registration date',
), ),
'name' => array( 'username' => array(
'type' => 'varchar', 'type' => 'varchar',
'length' => 255, 'length' => 255,
'not null' => FALSE, 'not null' => FALSE,
...@@ -237,7 +237,7 @@ function migrate_example_beer_schema_account() { ...@@ -237,7 +237,7 @@ function migrate_example_beer_schema_account() {
'not null' => FALSE, 'not null' => FALSE,
'description' => 'Account password (raw)', 'description' => 'Account password (raw)',
), ),
'mail' => array( 'email' => array(
'type' => 'varchar', 'type' => 'varchar',
'length' => 255, 'length' => 255,
'not null' => FALSE, 'not null' => FALSE,
...@@ -276,10 +276,10 @@ function migrate_example_beer_data_node() { ...@@ -276,10 +276,10 @@ function migrate_example_beer_data_node() {
$query->execute(); $query->execute();
} }
// Note that alice has duplicate username. Exercises dedupe() method. // Note that alice has duplicate username. Exercises dedupe_entity plugin.
// @TODO duplicate email also. // @TODO duplicate email also.
function migrate_example_beer_data_account() { function migrate_example_beer_data_account() {
$fields = array('status', 'posted', 'name', 'nickname', 'password', 'mail', 'sex', 'beers'); $fields = array('status', 'registered', 'username', 'nickname', 'password', 'email', 'sex', 'beers');
$query = db_insert('migrate_example_beer_account') $query = db_insert('migrate_example_beer_account')
->fields($fields); ->fields($fields);
$data = array( $data = array(
...@@ -303,7 +303,7 @@ function migrate_example_beer_data_comment() { ...@@ -303,7 +303,7 @@ function migrate_example_beer_data_comment() {
array(99999998, NULL, 'im second', 'aromatic', 'alice', '', 0), array(99999998, NULL, 'im second', 'aromatic', 'alice', '', 0),
array(99999999, NULL, 'im parent', 'malty', 'alice', '', 0), array(99999999, NULL, 'im parent', 'malty', 'alice', '', 0),
array(99999999, 1, 'im child', 'cold body', 'bob', NULL, 1), array(99999999, 1, 'im child', 'cold body', 'bob', NULL, 1),
array(99999999, 2, 'im grandchild', 'bitter body', '', NULL, 1), array(99999999, 4, 'im grandchild', 'bitter body', '', NULL, 1),
); );
foreach ($data as $row) { foreach ($data as $row) {
$query->values(array_combine($fields, $row)); $query->values(array_combine($fields, $row));
...@@ -10,7 +10,7 @@ namespace Drupal\migrate_example\Plugin\migrate\source; ...@@ -10,7 +10,7 @@ namespace Drupal\migrate_example\Plugin\migrate\source;
use Drupal\migrate\Plugin\migrate\source\SqlBase; use Drupal\migrate\Plugin\migrate\source\SqlBase;
/** /**
* Drupal 6 comment source from database. * Source plugin for beer comments.
* *
* @MigrateSource( * @MigrateSource(
* id = "beer_comment" * id = "beer_comment"
...@@ -23,8 +23,8 @@ class BeerComment extends SqlBase { ...@@ -23,8 +23,8 @@ class BeerComment extends SqlBase {
*/ */
public function query() { public function query() {
$query = $this->select('migrate_example_beer_comment', 'mec') $query = $this->select('migrate_example_beer_comment', 'mec')
->fields('mec', array('cid', 'cid_parent', 'name', 'mail', 'aid', ->fields('mec', ['cid', 'cid_parent', 'name', 'mail', 'aid',
'body', 'bid', 'subject')) 'body', 'bid', 'subject'])
->orderBy('cid_parent', 'ASC'); ->orderBy('cid_parent', 'ASC');
return $query; return $query;
} }
...@@ -33,7 +33,7 @@ class BeerComment extends SqlBase { ...@@ -33,7 +33,7 @@ class BeerComment extends SqlBase {
* {@inheritdoc} * {@inheritdoc}
*/ */
public function fields() { public function fields() {
$fields = array( $fields = [
'cid' => $this->t('Comment ID'), 'cid' => $this->t('Comment ID'),
'cid_parent' => $this->t('Parent comment ID in case of comment replies'), 'cid_parent' => $this->t('Parent comment ID in case of comment replies'),
'name' => $this->t('Comment name (if anon)'), 'name' => $this->t('Comment name (if anon)'),
...@@ -41,7 +41,7 @@ class BeerComment extends SqlBase { ...@@ -41,7 +41,7 @@ class BeerComment extends SqlBase {
'aid' => $this->t('Account ID (if any)'), 'aid' => $this->t('Account ID (if any)'),
'bid' => $this->t('Beer ID that is being commented upon'), 'bid' => $this->t('Beer ID that is being commented upon'),
'subject' => $this->t('Comment subject'), 'subject' => $this->t('Comment subject'),
); ];
return $fields; return $fields;
} }
...@@ -50,12 +50,12 @@ class BeerComment extends SqlBase { ...@@ -50,12 +50,12 @@ class BeerComment extends SqlBase {
* {@inheritdoc} * {@inheritdoc}
*/ */
public function getIds() { public function getIds() {
return array( return [
'cid' => array( 'cid' => [
'type' => 'integer', 'type' => 'integer',
'alias' => 'mec', 'alias' => 'mec',
), ],
); ];
} }
} }
...@@ -11,7 +11,7 @@ use Drupal\migrate\Plugin\migrate\source\SqlBase; ...@@ -11,7 +11,7 @@ use Drupal\migrate\Plugin\migrate\source\SqlBase;
use Drupal\migrate\Row; use Drupal\migrate\Row;
/** /**
* Drupal 6 node source from database. * Source plugin for beer content.
* *
* @MigrateSource( * @MigrateSource(
* id = "beer_node" * id = "beer_node"
...@@ -23,10 +23,21 @@ class BeerNode extends SqlBase { ...@@ -23,10 +23,21 @@ class BeerNode extends SqlBase {
* {@inheritdoc} * {@inheritdoc}
*/ */
public function query() { public function query() {
* An important point to note is that your query *must* return a single row
* for each item to be imported. Here we might be tempted to add a join to
* migrate_example_beer_topic_node in our query, to pull in the
* relationships to our categories. Doing this would cause the query to
* return multiple rows for a given node, once per related value, thus
* processing the same node multiple times, each time with only one of the
* multiple values that should be imported. To avoid that, we simply query
* the base node data here, and pull in the relationships in prepareRow()
* below.
$query = $this