From c4a8198940efb8e967144ba6eb59e9c25a460a80 Mon Sep 17 00:00:00 2001 From: Fran Garcia-Linares <fjgarlin@gmail.com> Date: Fri, 26 Jul 2024 12:58:31 +0200 Subject: [PATCH] Logic to retrieve changed rows via timestamps, similar to highwater. --- scripts/migrations.sh | 8 ++- src/Plugin/migrate/source/DrupalOrgUser.php | 69 +++++++++++++++++++++ 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/scripts/migrations.sh b/scripts/migrations.sh index 0e54cbb..a884dea 100755 --- a/scripts/migrations.sh +++ b/scripts/migrations.sh @@ -41,7 +41,13 @@ if [[ "$TYPE" == "all" || "$TYPE" == "users" ]]; then # UPDATE migrate.node SET uid = 1 where uid > 500; # UPDATE migrate.files SET uid = 1 where uid > 500; # UPDATE migrate.node_revision SET uid = 1 where uid > 500; - drush migrate:import drupalorg_migrate_users + drush migrate:import drupalorg_migrate_users --feedback 1000 +fi + +if [[ "$TYPE" == "full-users" ]]; then + drush migrate:import drupalorg_migrate_user_roles --update + drush state:set drupalorg_migrate.drupalorg_migrate_users_last_record 0 --input-format=integer + drush migrate:import drupalorg_migrate_users --feedback 10000 fi if [[ "$TYPE" == "all" || "$TYPE" == "files" ]]; then diff --git a/src/Plugin/migrate/source/DrupalOrgUser.php b/src/Plugin/migrate/source/DrupalOrgUser.php index acc3a7c..49c2d8a 100644 --- a/src/Plugin/migrate/source/DrupalOrgUser.php +++ b/src/Plugin/migrate/source/DrupalOrgUser.php @@ -2,6 +2,7 @@ namespace Drupal\drupalorg_migrate\Plugin\migrate\source; +use Drupal\migrate\MigrateException; use Drupal\migrate\Plugin\migrate\source\SourcePluginBase; use Drupal\migrate\Row; use Drupal\user\Plugin\migrate\source\d7\User; @@ -16,6 +17,74 @@ use Drupal\user\Plugin\migrate\source\d7\User; */ class DrupalOrgUser extends User { + /** + * {@inheritdoc} + */ + protected function initializeIterator() { + // Initialize the batch size. + if ($this->batchSize == 0 && isset($this->configuration['batch_size'])) { + // Valid batch sizes are integers >= 0. + if (is_int($this->configuration['batch_size']) && ($this->configuration['batch_size']) >= 0) { + $this->batchSize = $this->configuration['batch_size']; + } + else { + throw new MigrateException("batch_size must be greater than or equal to zero"); + } + } + + // If a batch has run the query is already setup. + if ($this->batch == 0) { + $this->prepareQuery(); + + // Removed all joinable + high-watermark code from the base plugin + // as we do not need it here. + // Only iterate on new/changed records. + // If we want to run a full migration: + // drush state:set drupalorg_migrate.drupalorg_migrate_users_last_record 0 --input-format=integer + $last_record = \Drupal::state()->get('drupalorg_migrate.drupalorg_migrate_users_last_record'); + if (!is_null($last_record)) { + $this->query->condition('u.changed', $last_record, '>='); + $this->query->orderBy('u.changed'); + } + // Set the time when the query was made in case we don't get any row processed. + \Drupal::state()->set('drupalorg_migrate.drupalorg_migrate_users_last_query', \Drupal::time()->getRequestTime()); + } + + // Download data in batches for performance. + if (($this->batchSize > 0)) { + $this->query->range($this->batch * $this->batchSize, $this->batchSize); + } + $statement = $this->query->execute(); + $statement->setFetchMode(\PDO::FETCH_ASSOC); + return new \IteratorIterator($statement); + } + + /** + * {@inheritdoc} + */ + public function next() { + // Do the full "next" calculation first. It will only come out of here with + // a "currentRow" if there is one to process, or with null, if there is + // none. + parent::next(); + + // At this point, we've discarded all the rows that didn't need updating/creating. + $last_record = \Drupal::state()->get('drupalorg_migrate.drupalorg_migrate_users_last_record', 0); + $last_query = \Drupal::state()->get('drupalorg_migrate.drupalorg_migrate_users_last_query', 0); + if (!is_null($this->currentRow)) { + // Set the new timestamp only when the record is going to be processed. + $row_changed = $this->currentRow->getSourceProperty('changed'); + if ($row_changed > $last_record) { + \Drupal::state()->set('drupalorg_migrate.drupalorg_migrate_users_last_record', $row_changed); + } + } + // If we could not find a row, then let's bring the timestamp up to the + // time of the initial query as there were no records created/changed. + elseif ($last_record < $last_query) { + \Drupal::state()->set('drupalorg_migrate.drupalorg_migrate_users_last_record', $last_query); + } + } + /** * {@inheritdoc} */ -- GitLab