XmlSitemapGenerator.php 19.7 KB
Newer Older
1 2 3 4
<?php

namespace Drupal\xmlsitemap;

5
use Drupal\Component\Datetime\TimeInterface;
6
use Drupal\Component\Utility\Bytes;
7
use Drupal\Core\Config\ConfigFactoryInterface;
8 9
use Drupal\Core\Database\Connection;
use Drupal\Core\Entity\EntityTypeManagerInterface;
10
use Drupal\Core\Extension\ModuleHandlerInterface;
11
use Drupal\Core\File\Exception\DirectoryNotReadyException;
12
use Drupal\Core\File\FileSystemInterface;
13
use Drupal\Core\Language\LanguageInterface;
14
use Drupal\Core\Language\LanguageManagerInterface;
15
use Drupal\Core\Messenger\MessengerInterface;
16
use Drupal\Core\Site\Settings;
17 18
use Drupal\Core\State\StateInterface;
use Drupal\Core\StringTranslation\StringTranslationTrait;
19
use Drupal\Core\Url;
20
use Psr\Log\LoggerInterface;
21 22

/**
23
 * XmlSitemap generator service class.
24 25 26
 *
 * @todo Update all the methods in this class to match the procedural functions
 *   and start using the 'xmlsitemap_generator' service.
27
 */
28
class XmlSitemapGenerator implements XmlSitemapGeneratorInterface {
29

30 31
  use StringTranslationTrait;

32 33 34 35 36
  /**
   * Aliases for links.
   *
   * @var array
   */
37
  public static $aliases;
38 39 40 41 42 43

  /**
   * Last used language.
   *
   * @var string
   */
44
  public static $lastLanguage;
45 46 47 48

  /**
   * Memory used before generation process.
   *
49
   * @var int
50
   */
51
  public static $memoryStart;
52

53 54 55 56 57 58 59
  /**
   * The xmlsitemap.settings config object.
   *
   * @var \Drupal\Core\Config\Config
   */
  protected $config;

60 61 62 63 64 65 66
  /**
   * The language manager object.
   *
   * @var \Drupal\Core\Language\LanguageManager
   */
  protected $languageManager;

67 68 69 70 71 72 73
  /**
   * The state object.
   *
   * @var \Drupal\Core\State\State
   */
  protected $state;

74 75 76 77 78 79 80
  /**
   * A logger instance.
   *
   * @var \Psr\Log\LoggerInterface
   */
  protected $logger;

81 82 83 84 85 86 87
  /**
   * The module handler.
   *
   * @var \Drupal\Core\Extension\ModuleHandlerInterface
   */
  protected $moduleHandler;

88 89 90 91 92 93 94 95 96 97 98 99 100 101
  /**
   * The entity type manager.
   *
   * @var \Drupal\Core\Entity\EntityTypeManagerInterface
   */
  protected $entityTypeManager;

  /**
   * The database connection.
   *
   * @var \Drupal\Core\Database\Connection
   */
  protected $connection;

102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
  /**
   * The messenger service.
   *
   * @var \Drupal\Core\Messenger\MessengerInterface
   */
  protected $messenger;

  /**
   * The file system.
   *
   * @var \Drupal\Core\File\FileSystemInterface
   */
  protected $fileSystem;

  /**
   * The time service.
   *
   * @var \Drupal\Component\Datetime\TimeInterface
   */
  protected $time;

123
  /**
124
   * Constructs a XmlSitemapGenerator object.
125 126 127 128 129
   *
   * @param \Drupal\Core\Config\ConfigFactoryInterface $config_factory
   *   The config factory object.
   * @param \Drupal\Core\State\StateInterface $state
   *   The state handler.
130
   * @param \Drupal\Core\Language\LanguageManagerInterface $language_manager
131
   *   Language Manager.
132 133
   * @param \Psr\Log\LoggerInterface $logger
   *   A logger instance.
134 135
   * @param \Drupal\Core\Extension\ModuleHandlerInterface $module_handler
   *   The module handler.
136 137 138 139
   * @param \Drupal\Core\Entity\EntityTypeManagerInterface $entity_type_manager
   *   The entity type manager.
   * @param \Drupal\Core\Database\Connection $connection
   *   The database connection.
140 141 142 143 144 145
   * @param \Drupal\Core\Messenger\MessengerInterface $messenger
   *   The messenger service.
   * @param \Drupal\Core\File\FileSystemInterface $file_system
   *   The file system.
   * @param \Drupal\Component\Datetime\TimeInterface $time
   *   The time service.
146
   */
147
  public function __construct(ConfigFactoryInterface $config_factory, StateInterface $state, LanguageManagerInterface $language_manager, LoggerInterface $logger, ModuleHandlerInterface $module_handler, EntityTypeManagerInterface $entity_type_manager, Connection $connection, MessengerInterface $messenger, FileSystemInterface $file_system, TimeInterface $time) {
148
    $this->config = $config_factory->getEditable('xmlsitemap.settings');
149
    $this->state = $state;
150
    $this->languageManager = $language_manager;
151
    $this->logger = $logger;
152
    $this->moduleHandler = $module_handler;
153 154
    $this->entityTypeManager = $entity_type_manager;
    $this->connection = $connection;
155 156 157
    $this->messenger = $messenger;
    $this->fileSystem = $file_system;
    $this->time = $time;
158 159
  }

160 161 162 163
  /**
   * {@inheritdoc}
   */
  public function getPathAlias($path, $language) {
164 165
    $query = $this->connection->select('path_alias', 'u');
    $query->fields('u', ['path', 'alias']);
166
    if (!isset(static::$aliases)) {
167
      $query->condition('langcode', LanguageInterface::LANGCODE_NOT_SPECIFIED, '=');
168
      static::$aliases[LanguageInterface::LANGCODE_NOT_SPECIFIED] = $query->execute()->fetchAllKeyed();
169
    }
170 171
    if ($language !== LanguageInterface::LANGCODE_NOT_SPECIFIED && static::$lastLanguage != $language) {
      unset(static::$aliases[static::$lastLanguage]);
172
      $query->condition('langcode', $language, '=');
173
      $query->orderBy('id');
174
      static::$aliases[$language] = $query->execute()->fetchAllKeyed();
175
      static::$lastLanguage = $language;
176 177
    }

178
    if ($language !== LanguageInterface::LANGCODE_NOT_SPECIFIED && isset(static::$aliases[$language][$path])) {
179
      return static::$aliases[$language][$path];
180
    }
181 182
    elseif (isset(static::$aliases[LanguageInterface::LANGCODE_NOT_SPECIFIED][$path])) {
      return static::$aliases[LanguageInterface::LANGCODE_NOT_SPECIFIED][$path];
183 184 185 186 187 188 189 190 191 192 193
    }
    else {
      return $path;
    }
  }

  /**
   * {@inheritdoc}
   */
  public function regenerateBefore() {
    // Attempt to increase the memory limit.
194
    $this->setMemoryLimit();
195

196
    if ($this->state->get('xmlsitemap_developer_mode')) {
197
      $this->logger->notice('Starting XML sitemap generation. Memory usage: @memory-peak.', [
198
        '@memory-peak' => format_size(memory_get_peak_usage(TRUE)),
199
      ]);
200 201 202 203 204 205 206 207
    }
  }

  /**
   * {@inheritdoc}
   */
  public function getMemoryUsage($start = FALSE) {
    $current = memory_get_peak_usage(TRUE);
208 209
    if (!isset(self::$memoryStart) || $start) {
      self::$memoryStart = $current;
210
    }
211
    return $current - self::$memoryStart;
212 213 214 215 216 217 218 219 220
  }

  /**
   * {@inheritdoc}
   */
  public function getOptimalMemoryLimit() {
    $optimal_limit = &drupal_static(__FUNCTION__);
    if (!isset($optimal_limit)) {
      // Set the base memory amount from the provided core constant.
221
      $optimal_limit = Bytes::toInt(DRUPAL_MINIMUM_PHP_MEMORY_LIMIT);
222 223 224 225 226

      // Add memory based on the chunk size.
      $optimal_limit += xmlsitemap_get_chunk_size() * 500;

      // Add memory for storing the url aliases.
227
      if ($this->config->get('prefetch_aliases')) {
228
        $aliases = $this->connection->query("SELECT COUNT(id) FROM {path_alias}")->fetchField();
229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
        $optimal_limit += $aliases * 250;
      }
    }
    return $optimal_limit;
  }

  /**
   * {@inheritdoc}
   */
  public function setMemoryLimit($new_limit = NULL) {
    $current_limit = @ini_get('memory_limit');
    if ($current_limit && $current_limit != -1) {
      if (!is_null($new_limit)) {
        $new_limit = $this->getOptimalMemoryLimit();
      }
244
      if (Bytes::toInt($current_limit) < $new_limit) {
245 246 247 248 249 250 251 252
        return @ini_set('memory_limit', $new_limit);
      }
    }
  }

  /**
   * {@inheritdoc}
   */
253
  public function generatePage(XmlSitemapInterface $sitemap, $page) {
254 255 256 257
    $writer = new XmlSitemapWriter($sitemap, $page);
    $writer->startDocument();
    $this->generateChunk($sitemap, $writer, $page);
    $writer->endDocument();
258 259 260 261 262 263
    return $writer->getSitemapElementCount();
  }

  /**
   * {@inheritdoc}
   */
264
  public function generateChunk(XmlSitemapInterface $sitemap, XmlSitemapWriter $writer, $chunk) {
265
    $lastmod_format = $this->config->get('lastmod_format');
266 267

    $url_options = $sitemap->uri['options'];
268
    $url_options += [
269
      'absolute' => TRUE,
270
      'base_url' => rtrim(Settings::get('xmlsitemap_base_url', $this->state->get('xmlsitemap_base_url')), '/'),
271
      'language' => $this->languageManager->getDefaultLanguage(),
272
      // @todo Figure out a way to bring back the alias preloading optimization.
273
      // 'alias' => $this->config->get('prefetch_aliases'),
274
      'alias' => FALSE,
275
    ];
276 277 278 279

    $last_url = '';
    $link_count = 0;

280
    $query = $this->connection->select('xmlsitemap', 'x');
281
    $query->fields('x', [
282
      'loc', 'type', 'subtype', 'id', 'lastmod', 'changefreq', 'changecount', 'priority', 'language', 'access', 'status',
283
    ]);
284 285 286 287 288 289 290 291 292 293 294 295 296
    $query->condition('x.access', 1);
    $query->condition('x.status', 1);
    $query->orderBy('x.language', 'DESC');
    $query->orderBy('x.loc');
    $query->addTag('xmlsitemap_generate');
    $query->addMetaData('sitemap', $sitemap);

    $offset = max($chunk - 1, 0) * xmlsitemap_get_chunk_size();
    $limit = xmlsitemap_get_chunk_size();
    $query->range($offset, $limit);
    $links = $query->execute();

    while ($link = $links->fetchAssoc()) {
297 298 299
      // Preserve the language code for hook_xmlsitemap_element_alter().
      $link['langcode'] = $link['language'];

300
      $link['language'] = $link['language'] != LanguageInterface::LANGCODE_NOT_SPECIFIED ? xmlsitemap_language_load($link['language']) : $url_options['language'];
301
      $link_options = [
302 303 304
        'language' => $link['language'],
        'xmlsitemap_link' => $link,
        'xmlsitemap_sitemap' => $sitemap,
305
      ];
306 307 308 309

      // Ensure every link starts with a slash.
      // @see \Drupal\Core\Url::fromInternalUri()
      if ($link['loc'][0] !== '/') {
310
        trigger_error("The XML sitemap link path {$link['loc']} for {$link['type']} {$link['id']} is invalid because it does not start with a slash.", E_USER_ERROR);
311 312 313
        $link['loc'] = '/' . $link['loc'];
      }

314
      // @todo Add a separate hook_xmlsitemap_link_url_alter() here?
315
      $link_url = Url::fromUri('internal:' . $link['loc'], $link_options + $url_options)->toString();
316 317 318 319 320 321 322 323 324 325 326 327 328

      // Skip this link if it was a duplicate of the last one.
      // @todo Figure out a way to do this before generation so we can report
      // back to the user about this.
      if ($link_url == $last_url) {
        continue;
      }
      else {
        $last_url = $link_url;
        // Keep track of the total number of links written.
        $link_count++;
      }

329
      $element = [];
330 331 332 333 334
      $element['loc'] = $link_url;
      if ($link['lastmod']) {
        $element['lastmod'] = gmdate($lastmod_format, $link['lastmod']);
        // If the link has a lastmod value, update the changefreq so that links
        // with a short changefreq but updated two years ago show decay.
335
        // We use abs() here just incase items were created on this same cron
336 337
        // run because lastmod would be greater than the request time.
        $link['changefreq'] = (abs($this->time->getRequestTime() - $link['lastmod']) + $link['changefreq']) / 2;
338 339 340 341 342
      }
      if ($link['changefreq']) {
        $element['changefreq'] = xmlsitemap_get_changefreq($link['changefreq']);
      }
      if (isset($link['priority']) && $link['priority'] != 0.5) {
343 344 345
        // Don't output the priority value for links that have 0.5 priority.
        // This is the default 'assumed' value if priority is not included as
        // per the sitemaps.org specification.
346 347
        $element['priority'] = number_format($link['priority'], 1);
      }
348 349 350 351

      // @todo Should this be moved to XMLSitemapWriter::writeSitemapElement()?
      $this->moduleHandler->alter('xmlsitemap_element', $element, $link, $sitemap);

352
      $writer->writeElement('url', $element);
353 354 355 356 357 358 359 360
    }

    return $link_count;
  }

  /**
   * {@inheritdoc}
   */
361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376
  public function generateIndex(XmlSitemapInterface $sitemap, $pages = NULL) {
    $writer = new XmlSitemapWriter($sitemap, 'index');
    $writer->startDocument();

    $lastmod_format = $this->config->get('lastmod_format');

    $url_options = $sitemap->uri['options'];
    $url_options += [
      'absolute' => TRUE,
      'xmlsitemap_base_url' => $this->state->get('xmlsitemap_base_url'),
      'language' => $this->languageManager->getDefaultLanguage(),
      'alias' => TRUE,
    ];

    if (!isset($pages)) {
      $pages = $sitemap->getChunks();
377
    }
378 379 380 381 382 383

    for ($current_page = 1; $current_page <= $pages; $current_page++) {
      $url_options['query']['page'] = $current_page;
      $element = [
        'loc' => Url::fromRoute('xmlsitemap.sitemap_xml', [], $url_options)->toString(),
        // @todo Use the actual lastmod value of the chunk file.
384
        'lastmod' => gmdate($lastmod_format, $this->time->getRequestTime()),
385 386
      ];

387
      // @todo Should the element be altered?
388
      $writer->writeElement('sitemap', $element);
389 390
    }

391
    $writer->endDocument();
392 393 394 395 396 397
    return $writer->getSitemapElementCount();
  }

  /**
   * {@inheritdoc}
   */
398
  public function regenerateBatchGenerate($smid, &$context) {
399
    if (!isset($context['sandbox']['sitemap'])) {
400
      $context['sandbox']['sitemap'] = $this->entityTypeManager->getStorage('xmlsitemap')->load($smid);
401 402
      $context['sandbox']['sitemap']->setChunks(1);
      $context['sandbox']['sitemap']->setLinks(0);
403 404 405
      $context['sandbox']['max'] = XMLSITEMAP_MAX_SITEMAP_LINKS;

      // Clear the cache directory for this sitemap before generating any files.
406 407 408
      if (!xmlsitemap_check_directory($context['sandbox']['sitemap'])) {
        throw new DirectoryNotReadyException("The sitemap directory could not be created or is not writable.");
      }
409 410
      xmlsitemap_clear_directory($context['sandbox']['sitemap']);
    }
411 412

    /** @var \Drupal\xmlsitemap\XmlSitemapInterface $sitemap */
413
    $sitemap = &$context['sandbox']['sitemap'];
414 415 416 417 418 419 420 421 422 423

    try {
      $links = $this->generatePage($sitemap, $sitemap->getChunks());
    }
    catch (\Exception $e) {
      // @todo Should this use watchdog_exception()?
      $this->logger->error($e);
    }

    if (!empty($links)) {
424
      $context['message'] = $this->t('Generated %sitemap-url with @count links.', [
425 426 427
        '%sitemap-url' => Url::fromRoute('xmlsitemap.sitemap_xml', [], $sitemap->uri['options'] + ['query' => ['page' => $sitemap->getChunks()]])->toString(),
        '@count' => $links,
      ]);
428 429
      $sitemap->setLinks($sitemap->getLinks() + $links);
      $sitemap->setChunks($sitemap->getChunks() + 1);
430 431 432
    }
    else {
      // Cleanup the 'extra' empty file.
433 434
      $file = xmlsitemap_sitemap_get_file($sitemap, $sitemap->getChunks());
      if (file_exists($file) && $sitemap->getChunks() > 1) {
435
        $this->fileSystem->delete($file);
436
      }
437
      $sitemap->setChunks($sitemap->getChunks() - 1);
438 439

      // Save the updated chunks and links values.
440
      $context['sandbox']['max'] = $sitemap->getChunks();
441
      $sitemap->setUpdated($this->time->getRequestTime());
442 443
      xmlsitemap_sitemap_get_max_filesize($sitemap);
      xmlsitemap_sitemap_save($sitemap);
444 445 446

      $context['finished'] = 1;
      return;
447 448
    }

449
    if ($sitemap->getChunks() < $context['sandbox']['max']) {
450
      $context['finished'] = $sitemap->getChunks() / $context['sandbox']['max'];
451 452 453 454 455 456
    }
  }

  /**
   * {@inheritdoc}
   */
457 458
  public function regenerateBatchGenerateIndex($smid, &$context) {
    $sitemap = xmlsitemap_sitemap_load($smid);
459
    if ($sitemap != NULL && $sitemap->getChunks() > 1) {
460 461 462 463 464 465 466
      try {
        $this->generateIndex($sitemap);
      }
      catch (\Exception $e) {
        // @todo Should this use watchdog_exception()?
        $this->logger->error($e);
      }
467
      $context['message'] = $this->t('Generated sitemap index %sitemap-url.', [
468 469
        '%sitemap-url' => Url::fromRoute('xmlsitemap.sitemap_xml', [], $sitemap->uri['options'])->toString(),
      ]);
470 471 472 473 474 475
    }
  }

  /**
   * {@inheritdoc}
   */
476
  public function regenerateBatchFinished($success, array $results, array $operations, $elapsed) {
477
    if ($success && $this->state->get('xmlsitemap_regenerate_needed') == FALSE) {
478 479
      $this->state->set('xmlsitemap_generated_last', $this->time->getRequestTime());
      $this->messenger->addStatus($this->t('The sitemaps were regenerated.'));
480

481
      // Show a watchdog message that the sitemap was regenerated.
482
      $this->logger->notice('Finished XML sitemap generation in @elapsed. Memory usage: @memory-peak.', ['@elapsed' => $elapsed, '@memory-peak' => format_size(memory_get_peak_usage(TRUE))]);
483 484
    }
    else {
485
      $this->messenger->addError($this->t('The sitemaps were not successfully regenerated.'));
486 487 488 489 490 491
    }
  }

  /**
   * {@inheritdoc}
   */
492 493
  public function rebuildBatchClear(array $entity_type_ids, $save_custom, &$context) {
    if (!empty($entity_type_ids)) {
494 495 496
      // Let other modules respond to the rebuild clearing.
      $this->moduleHandler->invokeAll('xmlsitemap_rebuild_clear', [$entity_type_ids, $save_custom]);

497
      $query = $this->connection->delete('xmlsitemap');
498
      $query->condition('type', $entity_type_ids, 'IN');
499 500 501 502 503 504 505 506 507 508 509

      // If we want to save the custom data, make sure to exclude any links
      // that are not using default inclusion or priority.
      if ($save_custom) {
        $query->condition('status_override', 0);
        $query->condition('priority_override', 0);
      }

      $query->execute();
    }

510
    $context['message'] = $this->t('Links cleared');
511 512 513 514 515
  }

  /**
   * {@inheritdoc}
   */
516
  public function rebuildBatchFetch($entity_type_id, &$context) {
517
    if (!isset($context['sandbox']['info'])) {
518
      $context['sandbox']['info'] = xmlsitemap_get_link_info($entity_type_id);
519
      $context['sandbox']['bundles'] = xmlsitemap_get_link_type_enabled_bundles($entity_type_id);
520 521 522
      $context['sandbox']['progress'] = 0;
      $context['sandbox']['last_id'] = 0;
    }
523 524 525 526 527

    if (empty($context['sandbox']['bundles'])) {
      return;
    }

528
    $info = $context['sandbox']['info'];
529
    $entity_type = $this->entityTypeManager->getDefinition($entity_type_id);
530

531
    $query = $this->entityTypeManager->getStorage($entity_type_id)->getQuery();
532
    $query->condition($entity_type->getKey('id'), $context['sandbox']['last_id'], '>');
533 534 535
    if ($entity_type->hasKey('bundle')) {
      $query->condition($entity_type->getKey('bundle'), $context['sandbox']['bundles'], 'IN');
    }
536 537
    $query->addTag('xmlsitemap_link_bundle_access');
    $query->addTag('xmlsitemap_rebuild');
538
    $query->addMetaData('entity_type_id', $entity_type_id);
539 540 541 542 543 544 545 546 547 548 549 550 551
    $query->addMetaData('entity_info', $info);

    if (!isset($context['sandbox']['max'])) {
      $count_query = clone $query;
      $count_query->count();
      $context['sandbox']['max'] = $count_query->execute();
      if (!$context['sandbox']['max']) {
        // If there are no items to process, skip everything else.
        return;
      }
    }

    // PostgreSQL cannot have the ORDERED BY in the count query.
552
    $query->sort($entity_type->getKey('id'));
553

554
    // Get batch limit.
555
    $limit = $this->config->get('batch_limit');
556 557 558 559
    $query->range(0, $limit);

    $result = $query->execute();

560 561 562
    $info['xmlsitemap']['process callback']($entity_type_id, $result);
    $context['sandbox']['last_id'] = end($result);
    $context['sandbox']['progress'] += count($result);
563
    $context['message'] = $this->t('Processed %entity_type_id @last_id (@progress of @count).', [
564 565 566 567 568
      '%entity_type_id' => $entity_type_id,
      '@last_id' => $context['sandbox']['last_id'],
      '@progress' => $context['sandbox']['progress'],
      '@count' => $context['sandbox']['max'],
    ]);
569 570 571 572 573 574 575 576 577 578 579 580

    if ($context['sandbox']['progress'] >= $context['sandbox']['max']) {
      $context['finished'] = 1;
    }
    else {
      $context['finished'] = $context['sandbox']['progress'] / $context['sandbox']['max'];
    }
  }

  /**
   * {@inheritdoc}
   */
581
  public function rebuildBatchFinished($success, array $results, array $operations, $elapsed) {
582 583
    if ($success && !$this->state->get('xmlsitemap_rebuild_needed', FALSE)) {
      $this->messenger->addStatus($this->t('The sitemap links were rebuilt.'));
584 585
    }
    else {
586
      $this->messenger->addError($this->t('The sitemap links were not successfully rebuilt.'));
587 588 589 590 591 592
    }
  }

  /**
   * {@inheritdoc}
   */
593 594 595 596 597
  public function batchVariableSet(array $variables) {
    $state_variables = xmlsitemap_state_variables();
    foreach ($variables as $variable => $value) {
      if (isset($state_variables[$variable])) {
        $this->state->set($variable, $value);
598 599
      }
      else {
600
        $this->config->set($variable, $value);
601
        $this->config->save();
602 603 604 605
      }
    }
  }

606
}