Commit 79bb4751 authored by Mike Ryan's avatar Mike Ryan
Browse files

Issue #2435231 by mikeryan: Documented and tweaked

parent c74d33df
INTRODUCTION
------------
The migrate_example module demonstrates how to implement custom migrations
for Drupal 8. It includes a group of "beer" migrations demonstrating a complete
simple migration scenario.
THE BEER SITE
-------------
In this scenario, we have a beer aficionado site which stores its data in MySQL
tables - there are content items for each beer on the site, user accounts with
profile data, categories to classify the beers, and user-generated comments on
the beers. We want to convert this site to Drupal with just a few modifications
to the basic structure.
To make the example as simple as to run as possible, the source data is placed
in tables directly in your Drupal database - in most real-world scenarios, your
source data will be in an external database. The migrate_example_setup submodule
creates and populates these tables, as well as configuring your Drupal 8 site
(creating a node type, vocabulary, fields, etc.) to receive the data.
STRUCTURE
---------
There are two primary components to this example:
1. Migration configuration, in the config/install directory. These YAML files
describe the migration process and provide the mappings from the source data
to Drupal's destination entities.
2. Source plugins, in src/Plugin/migrate/source. These are referenced from the
configuration files, and provide the source data to the migration processing
pipeline, as well as manipulating that data where necessary to put it into
a canonical form for migrations.
UNDERSTANDING THE MIGRATIONS
----------------------------
The YAML and PHP files are copiously documented in-line. To best understand
the concepts described in a more-or-less narrative form, it is recommended you
read the files in the following order:
1. migrate_plus.migration_group.beer.yml
2. migrate.migration.beer_term.yml
3. BeerTerm.php
4. migrate.migration.beer_user.yml
5. BeerUser.php
6. migrate.migration.beer_node.yml
7. BeerNode.php
8. migrate.migration.beer_comment.yml
9. BeerComment.php
RUNNING THE MIGRATIONS
----------------------
The migrate_tools module (also part of the migrate_plus project) provides the
tools you need to perform migration processes. At this time, the web UI only
provides status information - to perform migration operations, you need to use
the drush commands.
# Enable the tools and the example module if you haven't already.
drush en -y migrate_tools,migrate_example
# Look at the migrations. Just look at them. Notice that they are displayed in
# the order they will be run, which reflects their dependencies. For example,
# because the node migration references the imported terms and users, it must
# run after those migrations have been run.
drush ms # Abbreviation for migrate-status
# Run the import operation for all the beer migrations.
drush mi --group=beer # Abbreviation for migrate-import
# Look at what you've done! Also, visit the site and see the imported content,
# user accounts, etc.
drush ms
# Look at the duplicate username message.
drush mmsg beer_user # Abbreviation for migrate-messages
# Run the rollback operation for all the migrations (removing all the imported
# content, user accounts, etc.). Note that it will rollback the migrations in
# the opposite order as they were imported.
drush mr --group=beer # Abbreviation for migrate-rollback
# You can import specific migrations.
drush mi beer_term,beer_user
# At this point, go look at your content listing - you'll see beer nodes named
# "Stub", generated from the user's favbeers references.
drush mi beer_node,beer_comment
# Refresh your content listing - the stub nodes have been filled with real beer!
# You can rollback specific migrations.
drush mr beer_comment,beer_node
# Migration configuration for beer comments. No new concepts here.
id: beer_comment id: beer_comment
label: Comments on beers label: Comments on beers
migration_group: beer migration_group: beer
source: source:
plugin: beer_comment plugin: beer_comment
constants:
entity_type: node
destination: destination:
plugin: entity:comment plugin: entity:comment
process: process:
...@@ -16,7 +15,9 @@ process: ...@@ -16,7 +15,9 @@ process:
plugin: migration plugin: migration
migration: beer_node migration: beer_node
source: bid source: bid
entity_type: 'constants/entity_type' entity_type:
plugin: default_value
default_value: node
field_name: field_name:
plugin: default_value plugin: default_value
default_value: field_comments default_value: field_comments
......
# Migration configuration for beer content.
id: beer_node id: beer_node
label: Beers of the world label: Beers of the world
migration_group: beer migration_group: beer
...@@ -6,11 +7,12 @@ source: ...@@ -6,11 +7,12 @@ source:
destination: destination:
plugin: entity:node plugin: entity:node
process: process:
# Hardcode the destination node type (bundle) as 'migrate_example_beer'.
type: type:
plugin: default_value plugin: default_value
default_value: migrate_example_beer default_value: migrate_example_beer
nid: bid
title: name title: name
nid: bid
uid: uid:
plugin: migration plugin: migration
migration: beer_user migration: beer_user
...@@ -23,8 +25,24 @@ process: ...@@ -23,8 +25,24 @@ process:
plugin: migration plugin: migration
migration: beer_term migration: beer_term
source: terms source: terms
# Some Drupal fields may have multiple components we may want to set
# separately. For example, text fields may have summaries (teasers) in
# addition to the full text value. We use / to separate the field name from
# the internal field value being set, and put it in quotes because / is a
# YAML special character.
'body/value': body 'body/value': body
'body/summary': excerpt 'body/summary': excerpt
# Our beer nodes have references to terms and users, so we want those to be
# imported first. We make that dependency explicit here - by putting those
# migrations under the 'required' key, we ensure that the tools will prevent
# us from running the beer_node migration unless the beer_term and beer_user
# migrations are complete (although we can override the dependency check by
# passing --force to the drush migrate-import command). We can also add
# 'optional' dependencies - these affect the order in which migrations are
# displayed, and run by default, but does not force you run them in that
# order.
# The general rule of thumb is that any migrations referenced by migration
# process plugins should be required here.
migration_dependencies: migration_dependencies:
required: required:
- beer_term - beer_term
......
# A "migration" is, in technical terms, a configuration entity which describes
# how to read source data, process it (generally by mapping source fields to
# destination fields), and write it to Drupal.
# The machine name for a migration, used to uniquely identify it.
id: beer_term id: beer_term
label: Migrate styles from the source database to taxonomy terms
# A human-friendly description of the migration.
label: Migrate style categories from the source database to taxonomy terms
# The machine name of the group containing this migration (which contains
# shared configuration to be merged with our own configuration here).
migration_group: beer migration_group: beer
# Every migration must have a source plugin, which controls the delivery of our
# source data. In this case, our source plugin has the name "beer_term", which
# Drupal resolves to the PHP class defined in
# src/Plugin/migrate/source/BeerTerm.php.
source: source:
plugin: beer_term plugin: beer_term
# Every migration must also have a destination plugin, which handles writing
# the migrated data in the appropriate form for that particular kind of data.
# Most Drupal content is an "entity" of one type or another, and we need to
# specify what entity type we are populating (in this case, taxonomy terms).
# Unlike the source plugin (which is specific to our particular scenario), this
# destination plugin is implemented in Drupal itself.
destination: destination:
plugin: entity:taxonomy_term plugin: entity:taxonomy_term
# Here's the meat of the migration - the processing pipeline. This describes how
# each destination field is to be populated based on the source data. For each
# destination field, one or more process plugins may be invoked.
process: process:
# The simplest process plugin is named 'get' - it is the default plugin, so
# does not need to be explicitly named. It simply copies the source value
# (the 'style' field from the source database in this case) to the destination
# field (the taxonomy term 'name' field). You can see we simply copy the
# source 'details' field to destination 'description' field in the same way.
name: style
description: details
# Here is a new plugin - default_value. In its simplest usage here, it is used
# to hard-code a destination value, the vid (vocabulary ID) our taxonomy terms
# should be assigned to. It's important to note that while above the right
# side of the mappings was a source field name, here the right side of the
# 'default_value:' line is an actual value.
vid: vid:
plugin: default_value plugin: default_value
default_value: migrate_example_beer_styles default_value: migrate_example_beer_styles
name: style
description: details # Here's another new plugin - migration. When importing data from another
# system, typically the unique identifiers for items on the destination side
# are not the same as the identifiers were on the source side. For example, in
# our style data the term names are the unique identifiers for each term,
# while in Drupal each term is assigned a unique integer term ID (tid). When
# any such items are referenced in Drupal, the reference needs to be
# translated from the old ID ('ale') to the new ID (1). The migration
# framework keeps track of the relationships between source and destination
# IDs in map tables, and the migration plugin is the means of performing a
# lookup in those map tables during processing.
parent: parent:
plugin: migration plugin: migration
# Here we reference the migration whose map table we're performing a lookup
# against. You'll note that in this case we're actually referencing this
# migration itself, since category parents are imported by the same
# migration. This works best when we're sure the parents are imported
# before the children, and in this case our source plugin is guaranteeing
# that.
migration: beer_term migration: beer_term
# 'style_parent' is the parent reference field from the source data. The
# result of this plugin is that the destination 'parent' field is populated
# with the Drupal term ID of the referenced style (or NULL if style_parent
# was empty).
source: style_parent source: style_parent
# Migration configuration for user accounts. We've described most of what goes
# into migration configuration in migrate.migration.beer_term.yml, so won't
# repeat that here.
id: beer_user id: beer_user
label: Beer Drinkers of the world label: Beer Drinkers of the world
migration_group: beer migration_group: beer
...@@ -6,29 +9,86 @@ source: ...@@ -6,29 +9,86 @@ source:
destination: destination:
plugin: entity:user plugin: entity:user
process: process:
pass: password
mail: email
init: email
status: status
roles:
plugin: default_value
default_value: 2
# Here's a new process plugin - dedupe_entity. Our source site allowed there
# to be multiple user accounts with the same username, but Drupal wants
# usernames to be unique. This plugin allows us to automatically generate
# unique usernames when we detect collisions.
name: name:
plugin: dedupe_entity plugin: dedupe_entity
# The name of the source field containing the username.
source: username
# These next two settings identify the destination-side field to check for
# duplicates. They say "see if the incoming 'name' matches any existing
# 'name' field in any 'user' entity".
entity_type: user entity_type: user
field: name field: name
# Finally, this specifies a string to use between the original value and the
# sequence number appended to make the value unique. Thus, the first 'alice'
# account gets the name 'alice' in Drupal, and the second one gets the name
# 'alice_1'.
postfix: _ postfix: _
source: name
pass: password # Another new process plugin - callback. This allows us to filter an incoming
mail: mail # source value through an arbitrary PHP function. The function called must
# have one required argument.
created: created:
- plugin: callback
plugin: callback # The 'registered' timestamp in the source data is a string of the form
callable: strtotime # 'yyyy-mm-dd hh:mm:ss', but Drupal wants a UNIX timestamp for 'created'.
source: posted source: registered
access: callable: strtotime
plugin: get
source: @created # Our source data only has a single timestamp value, 'registered', which we
login: # want to use for all four of Drupal's user timestamp fields. We could
plugin: get # duplicate the callback plugin we used for 'created' above - but we have a
source: @created # shortcut. Putting an @ sign at the beginning of the source value indicates
status: status # that it is to be interpreted as a *destination* field name instead of a
init: mail # *source* field name. Thus, if a value we need in more than one place
roles: # requires some processing beyond simply copying it directly, we can perform
plugin: default_value # that processing a single time and use the result in multiple places.
default_value: 2 changed: '@created'
field_migrate_example_gender: sex access: '@created'
login: '@created'
# Yet another new process plugin - static_map. We're making a transformation
# in how we represent gender data - formerly it was integer values 0 for male
# and 1 for female, but in our modern Drupal site we will be making this a
# free-form text field, so we want to replace the obscure integers with
# simple strings.
field_migrate_example_gender:
plugin: static_map
# Specify the source field we're reading (containing 0's and 1's).
source: sex
# Tell it to transform 0 to 'Male', and 1 to 'Female'.
map:
0: Male
1: Female
field_migrate_example_favbeers: beers field_migrate_example_favbeers: beers
# The following is blocked on https://www.drupal.org/node/2590993.
# This looks like a simple migration process plugin, but there's magic
# happening here. We import nodes after terms and users, because they have
# references to terms and users, so of course the terms and users must be
# migrated first - right? However, the favbeers field is a reference to the
# beer nodes which haven't yet been migrated - we have a circular relationship
# between users and nodes. The way the migration system resolves this
# situation is by creating stubs. In this case, because no beer nodes have
# been created, each time a beer is looked up against the beer_node migration
# nothing is found, and by default the migration process plugin creates an
# empty stub node as a placeholder so the favbeers reference field has
# something to point to. The stub is recorded in the beer_node map table, so
# when that migration runs it knows that each incoming beer should overwrite
# its stub instead of creating a new node.
# field_migrate_example_favbeers:
# plugin: migration
# source: beers
# migration: beer_node
# A "migration group" is - surprise! - a group of migrations. It is used to
# group migrations for display by our tools, and to perform operations on a
# specific set of migrations. It can also be used to hold any configuration
# common to those migrations, so it doesn't have to be duplicated in each one.
# The machine name of the group, by which it is referenced in individual # The machine name of the group, by which it is referenced in individual
# migrations. # migrations.
id: beer id: beer
# A human-friendly label of the group, displayed in the UI. # A human-friendly label for the group.
label: Beer Imports label: Beer Imports
# More information about the group. # More information about the group.
...@@ -16,5 +21,10 @@ source_type: Custom tables ...@@ -16,5 +21,10 @@ source_type: Custom tables
# Drupal (default) database, but usually if your source data is in a # Drupal (default) database, but usually if your source data is in a
# database it will be external. # database it will be external.
shared_configuration: shared_configuration:
# Specifying 'source' here means that this configuration will be merged into
# the 'source' configuration of each migration.
source: source:
# A better practice for real-world migrations would be to add a database
# connection to your external database in settings.php and reference its
# key here.
key: default key: default
...@@ -213,13 +213,13 @@ function migrate_example_beer_schema_account() { ...@@ -213,13 +213,13 @@ function migrate_example_beer_schema_account() {
'not null' => TRUE, 'not null' => TRUE,
'description' => 'Blocked_Allowed', 'description' => 'Blocked_Allowed',
), ),
'posted' => array( 'registered' => array(
'type' => 'varchar', 'type' => 'varchar',
'length' => 255, 'length' => 255,
'not null' => TRUE, 'not null' => TRUE,
'description' => 'Registration date', 'description' => 'Registration date',
), ),
'name' => array( 'username' => array(
'type' => 'varchar', 'type' => 'varchar',
'length' => 255, 'length' => 255,
'not null' => FALSE, 'not null' => FALSE,
...@@ -237,7 +237,7 @@ function migrate_example_beer_schema_account() { ...@@ -237,7 +237,7 @@ function migrate_example_beer_schema_account() {
'not null' => FALSE, 'not null' => FALSE,
'description' => 'Account password (raw)', 'description' => 'Account password (raw)',
), ),
'mail' => array( 'email' => array(
'type' => 'varchar', 'type' => 'varchar',
'length' => 255, 'length' => 255,
'not null' => FALSE, 'not null' => FALSE,
...@@ -276,10 +276,10 @@ function migrate_example_beer_data_node() { ...@@ -276,10 +276,10 @@ function migrate_example_beer_data_node() {
$query->execute(); $query->execute();
} }
// Note that alice has duplicate username. Exercises dedupe() method. // Note that alice has duplicate username. Exercises dedupe_entity plugin.
// @TODO duplicate email also. // @TODO duplicate email also.
function migrate_example_beer_data_account() { function migrate_example_beer_data_account() {
$fields = array('status', 'posted', 'name', 'nickname', 'password', 'mail', 'sex', 'beers'); $fields = array('status', 'registered', 'username', 'nickname', 'password', 'email', 'sex', 'beers');
$query = db_insert('migrate_example_beer_account') $query = db_insert('migrate_example_beer_account')
->fields($fields); ->fields($fields);
$data = array( $data = array(
...@@ -303,7 +303,7 @@ function migrate_example_beer_data_comment() { ...@@ -303,7 +303,7 @@ function migrate_example_beer_data_comment() {
array(99999998, NULL, 'im second', 'aromatic', 'alice', 'alice@example.com', 0), array(99999998, NULL, 'im second', 'aromatic', 'alice', 'alice@example.com', 0),
array(99999999, NULL, 'im parent', 'malty', 'alice', 'alice@example.com', 0), array(99999999, NULL, 'im parent', 'malty', 'alice', 'alice@example.com', 0),
array(99999999, 1, 'im child', 'cold body', 'bob', NULL, 1), array(99999999, 1, 'im child', 'cold body', 'bob', NULL, 1),
array(99999999, 2, 'im grandchild', 'bitter body', 'charlie@example.com', NULL, 1), array(99999999, 4, 'im grandchild', 'bitter body', 'charlie@example.com', NULL, 1),
); );
foreach ($data as $row) { foreach ($data as $row) {
$query->values(array_combine($fields, $row)); $query->values(array_combine($fields, $row));
......
...@@ -10,7 +10,7 @@ namespace Drupal\migrate_example\Plugin\migrate\source; ...@@ -10,7 +10,7 @@ namespace Drupal\migrate_example\Plugin\migrate\source;
use Drupal\migrate\Plugin\migrate\source\SqlBase; use Drupal\migrate\Plugin\migrate\source\SqlBase;
/** /**
* Drupal 6 comment source from database. * Source plugin for beer comments.
* *
* @MigrateSource( * @MigrateSource(
* id = "beer_comment" * id = "beer_comment"
...@@ -23,8 +23,8 @@ class BeerComment extends SqlBase { ...@@ -23,8 +23,8 @@ class BeerComment extends SqlBase {
*/ */
public function query() { public function query() {
$query = $this->select('migrate_example_beer_comment', 'mec') $query = $this->select('migrate_example_beer_comment', 'mec')
->fields('mec', array('cid', 'cid_parent', 'name', 'mail', 'aid', ->fields('mec', ['cid', 'cid_parent', 'name', 'mail', 'aid',
'body', 'bid', 'subject')) 'body', 'bid', 'subject'])
->orderBy('cid_parent', 'ASC'); ->orderBy('cid_parent', 'ASC');
return $query; return $query;
} }
...@@ -33,7 +33,7 @@ class BeerComment extends SqlBase { ...@@ -33,7 +33,7 @@ class BeerComment extends SqlBase {
* {@inheritdoc} * {@inheritdoc}
*/ */
public function fields() { public function fields() {
$fields = array( $fields = [
'cid' => $this->t('Comment ID'), 'cid' => $this->t('Comment ID'),
'cid_parent' => $this->t('Parent comment ID in case of comment replies'), 'cid_parent' => $this->t('Parent comment ID in case of comment replies'),
'name' => $this->t('Comment name (if anon)'), 'name' => $this->t('Comment name (if anon)'),
...@@ -41,7 +41,7 @@ class BeerComment extends SqlBase { ...@@ -41,7 +41,7 @@ class BeerComment extends SqlBase {
'aid' => $this->t('Account ID (if any)'), 'aid' => $this->t('Account ID (if any)'),
'bid' => $this->t('Beer ID that is being commented upon'), 'bid' => $this->t('Beer ID that is being commented upon'),
'subject' => $this->t('Comment subject'), 'subject' => $this->t('Comment subject'),
); ];
return $fields; return $fields;
} }
...@@ -50,12 +50,12 @@ class BeerComment extends SqlBase { ...@@ -50,12 +50,12 @@ class BeerComment extends SqlBase {
* {@inheritdoc} * {@inheritdoc}
*/ */
public function getIds() { public function getIds() {
return array( return [
'cid' => array( 'cid' => [
'type' => 'integer', 'type' => 'integer',
'alias' => 'mec', 'alias' => 'mec',
), ],
); ];
} }
} }
...@@ -11,7 +11,7 @@ use Drupal\migrate\Plugin\migrate\source\SqlBase; ...@@ -11,7 +11,7 @@ use Drupal\migrate\Plugin\migrate\source\SqlBase;
use Drupal\migrate\Row; use Drupal\migrate\Row;
/** /**
* Drupal 6 node source from database. * Source plugin for beer content.
* *
* @MigrateSource( * @MigrateSource(
* id = "beer_node" * id = "beer_node"
...@@ -23,10 +23,21 @@ class BeerNode extends SqlBase { ...@@ -23,10 +23,21 @@ class BeerNode extends SqlBase {
* {@inheritdoc} * {@inheritdoc}
*/ */
public function query() { public function query() {
/**
* An important point to note is that your query *must* return a single row
* for each item to be imported. Here we might be tempted to add a join to
* migrate_example_beer_topic_node in our query, to pull in the
* relationships to our categories. Doing this would cause the query to
* return multiple rows for a given node, once per related value, thus
* processing the same node multiple times, each time with only one of the
* multiple values that should be imported. To avoid that, we simply query
* the base node data here, and pull in the relationships in prepareRow()
* below.
*/
$query = $this->select('migrate_example_beer_node', 'b')