SitemapGenerator.php 10.9 KB
Newer Older
1 2
<?php

Pawel G's avatar
Pawel G committed
3
namespace Drupal\simple_sitemap;
4

Pawel G's avatar
Pawel G committed
5
use XMLWriter;
Pawel G's avatar
Pawel G committed
6 7 8 9
use Drupal\simple_sitemap\Batch\Batch;
use Drupal\Core\Database\Connection;
use Drupal\Core\Extension\ModuleHandler;
use Drupal\Core\Language\LanguageManagerInterface;
10
use Drupal\Component\Datetime\Time;
11 12

/**
Pawel G's avatar
Pawel G committed
13
 * Class SitemapGenerator
Pawel G's avatar
Pawel G committed
14
 * @package Drupal\simple_sitemap
15 16 17 18 19
 */
class SitemapGenerator {

  const XML_VERSION = '1.0';
  const ENCODING = 'UTF-8';
20 21
  const XMLNS = 'http://www.sitemaps.org/schemas/sitemap/0.9';
  const XMLNS_XHTML = 'http://www.w3.org/1999/xhtml';
22
  const GENERATED_BY = 'Generated by the Simple XML sitemap Drupal module: https://drupal.org/project/simple_sitemap.';
23
  const FIRST_CHUNK_INDEX = 1;
24
  const XMLNS_IMAGE = 'http://www.google.com/schemas/sitemap-image/1.1';
25

Pawel G's avatar
Pawel G committed
26 27 28
  /**
   * @var \Drupal\simple_sitemap\Batch\Batch
   */
29
  protected $batch;
Pawel G's avatar
Pawel G committed
30 31 32 33

  /**
   * @var \Drupal\simple_sitemap\EntityHelper
   */
34
  protected $entityHelper;
Pawel G's avatar
Pawel G committed
35 36 37 38

  /**
   * @var \Drupal\Core\Database\Connection
   */
39
  protected $db;
Pawel G's avatar
Pawel G committed
40 41

  /**
Pawel G's avatar
Pawel G committed
42
   * @var \Drupal\Core\Language\LanguageManagerInterface
Pawel G's avatar
Pawel G committed
43
   */
Pawel G's avatar
Pawel G committed
44
  protected $languageManager;
Pawel G's avatar
Pawel G committed
45 46

  /**
Pawel G's avatar
Pawel G committed
47
   * @var \Drupal\Core\Extension\ModuleHandler
Pawel G's avatar
Pawel G committed
48
   */
Pawel G's avatar
Pawel G committed
49
  protected $moduleHandler;
Pawel G's avatar
Pawel G committed
50 51 52 53

  /**
   * @var string
   */
54
  protected $generateFrom = 'form';
Pawel G's avatar
Pawel G committed
55 56 57 58

  /**
   * @var bool
   */
59
  protected $isHreflangSitemap;
Pawel G's avatar
Pawel G committed
60 61 62 63

  /**
   * @var \Drupal\simple_sitemap\Simplesitemap
   */
64
  protected $generator;
65

66 67 68 69 70
  /**
   * @var \Drupal\Component\Datetime\Time
   */
  protected $time;

Pawel G's avatar
Pawel G committed
71 72 73
  /**
   * @var array
   */
74 75 76 77 78 79
  protected static $attributes = [
    'xmlns' => self::XMLNS,
    'xmlns:xhtml' => self::XMLNS_XHTML,
    'xmlns:image' => self::XMLNS_IMAGE,
  ];

Pawel G's avatar
Pawel G committed
80 81 82
  /**
   * @var array
   */
83 84 85 86
  protected static $indexAttributes = [
    'xmlns' => self::XMLNS,
  ];

87 88
  /**
   * SitemapGenerator constructor.
Pawel G's avatar
Pawel G committed
89
   * @param \Drupal\simple_sitemap\Batch\Batch $batch
90
   * @param \Drupal\simple_sitemap\EntityHelper $entityHelper
Pawel G's avatar
Pawel G committed
91 92 93
   * @param \Drupal\Core\Database\Connection $database
   * @param \Drupal\Core\Extension\ModuleHandler $module_handler
   * @param \Drupal\Core\Language\LanguageManagerInterface $language_manager
94
   * @param \Drupal\Component\Datetime\Time $time
95
   */
Pawel G's avatar
Pawel G committed
96 97
  public function __construct(
    Batch $batch,
98
    EntityHelper $entityHelper,
Pawel G's avatar
Pawel G committed
99 100
    Connection $database,
    ModuleHandler $module_handler,
101 102
    LanguageManagerInterface $language_manager,
    Time $time
Pawel G's avatar
Pawel G committed
103
  ) {
104
    $this->batch = $batch;
105
    $this->entityHelper = $entityHelper;
106
    $this->db = $database;
107
    $this->moduleHandler = $module_handler;
Pawel G's avatar
Pawel G committed
108
    $this->languageManager = $language_manager;
109
    $this->time = $time;
Pawel G's avatar
Pawel G committed
110 111 112 113 114 115 116 117 118 119 120 121
    $this->setIsHreflangSitemap();
  }

  protected function setIsHreflangSitemap() {
    $this->isHreflangSitemap = count($this->languageManager->getLanguages()) > 1;
  }

  /**
   * @return bool
   */
  public function isHreflangSitemap() {
    return $this->isHreflangSitemap;
122 123 124
  }

  /**
Pawel G's avatar
Pawel G committed
125
   * @param \Drupal\simple_sitemap\Simplesitemap $generator
126 127
   * @return $this
   */
Pawel G's avatar
Pawel G committed
128
  public function setGenerator(Simplesitemap $generator) {
129 130
    $this->generator = $generator;
    return $this;
131 132
  }

Pawel G's avatar
Pawel G committed
133
  /**
Pawel G's avatar
Pawel G committed
134
   * @param string $from
135
   * @return $this
Pawel G's avatar
Pawel G committed
136
   */
Pawel G's avatar
Pawel G committed
137 138
  public function setGenerateFrom($from) {
    $this->generateFrom = $from;
Pawel G's avatar
Pawel G committed
139
    return $this;
140 141
  }

142
  /**
143
   * Adds all operations to the batch and starts it.
144
   */
145
  public function startGeneration() {
146
    $this->batch->setBatchInfo([
Pawel G's avatar
Pawel G committed
147
      'from' => $this->generateFrom,
Pawel G's avatar
Pawel G committed
148
      'batch_process_limit' => !empty($this->generator->getSetting('batch_process_limit'))
Pawel G's avatar
Pawel G committed
149
        ? $this->generator->getSetting('batch_process_limit') : NULL,
150 151 152
      'max_links' => $this->generator->getSetting('max_links', 2000),
      'skip_untranslated' => $this->generator->getSetting('skip_untranslated', FALSE),
      'remove_duplicates' => $this->generator->getSetting('remove_duplicates', TRUE),
153
      'entity_types' => $this->generator->getBundleSettings(),
154
      'base_url' => $this->generator->getSetting('base_url', ''),
155
      'excluded_languages' => $this->generator->getSetting('excluded_languages', []),
156
    ]);
157

Pawel G's avatar
Pawel G committed
158
    // Add custom link generating operation.
159
    $this->batch->addOperation('simple_sitemap.custom_url_generator', $this->getCustomUrlsData());
Pawel G's avatar
Pawel G committed
160 161

    // Add entity link generating operations.
Pawel G's avatar
Pawel G committed
162
    foreach ($this->getEntityTypeData() as $data) {
163 164 165 166 167 168 169 170
      $this->batch->addOperation('simple_sitemap.entity_url_generator', $data);
    }

    // Add arbitrary links generating operation.
    $arbitrary_links = [];
    $this->moduleHandler->alter('simple_sitemap_arbitrary_links', $arbitrary_links);
    if (!empty($arbitrary_links)) {
      $this->batch->addOperation('simple_sitemap.arbitrary_url_generator', $arbitrary_links);
Pawel G's avatar
Pawel G committed
171
    }
172

173
    $this->batch->start();
Pawel G's avatar
Pawel G committed
174 175 176
  }

  /**
Pawel G's avatar
Pawel G committed
177
   * Returns a batch-ready data array for custom link generation.
178
   *
Pawel G's avatar
Pawel G committed
179
   * @return array
Pawel G's avatar
Pawel G committed
180
   *   Data to be processed.
Pawel G's avatar
Pawel G committed
181
   */
182
  protected function getCustomUrlsData() {
183
    $paths = [];
184
    foreach ($this->generator->getCustomLinks() as $i => $custom_path) {
185 186
      $paths[$i]['path'] = $custom_path['path'];
      $paths[$i]['priority'] = isset($custom_path['priority']) ? $custom_path['priority'] : NULL;
187
      $paths[$i]['changefreq'] = isset($custom_path['changefreq']) ? $custom_path['changefreq'] : NULL;
188 189
    }
    return $paths;
Pawel G's avatar
Pawel G committed
190
  }
191

Pawel G's avatar
Pawel G committed
192
  /**
193
   * Collects entity metadata for entities that are set to be indexed
Pawel G's avatar
Pawel G committed
194
   * and returns an array of batch-ready data sets for entity link generation.
195
   *
Pawel G's avatar
Pawel G committed
196
   * @return array
Pawel G's avatar
Pawel G committed
197
   */
198
  protected function getEntityTypeData() {
Pawel G's avatar
Pawel G committed
199
    $data_sets = [];
Pawel G's avatar
Pawel G committed
200
    $sitemap_entity_types = $this->entityHelper->getSupportedEntityTypes();
201
    $entity_types = $this->generator->getBundleSettings();
Pawel G's avatar
Pawel G committed
202
    foreach ($entity_types as $entity_type_name => $bundles) {
203 204
      if (isset($sitemap_entity_types[$entity_type_name])) {
        $keys = $sitemap_entity_types[$entity_type_name]->getKeys();
Pawel G's avatar
Pawel G committed
205

Pawel G's avatar
Pawel G committed
206 207
        // Menu fix.
        $keys['bundle'] = $entity_type_name == 'menu_link_content' ? 'menu_name' : $keys['bundle'];
Pawel G's avatar
Pawel G committed
208

Pawel G's avatar
Pawel G committed
209
        foreach ($bundles as $bundle_name => $bundle_settings) {
210
          if ($bundle_settings['index']) {
Pawel G's avatar
Pawel G committed
211 212 213 214 215
            $data_sets[] = [
              'bundle_settings' => $bundle_settings,
              'bundle_name' => $bundle_name,
              'entity_type_name' => $entity_type_name,
              'keys' => $keys,
216 217
            ];
          }
Pawel G's avatar
Pawel G committed
218
        }
219 220
      }
    }
Pawel G's avatar
Pawel G committed
221
    return $data_sets;
Pawel G's avatar
Pawel G committed
222 223 224
  }

  /**
225 226
   * Wrapper method which takes links along with their options and then
   * generates and saves the sitemap.
Pawel G's avatar
Pawel G committed
227
   *
228
   * @param array $links
Pawel G's avatar
Pawel G committed
229
   *   All links with their multilingual versions and settings.
230
   * @param bool $remove_sitemap
Pawel G's avatar
Pawel G committed
231
   *   Remove old sitemap from database before inserting the new one.
Pawel G's avatar
Pawel G committed
232
   */
Pawel G's avatar
Pawel G committed
233
  public function generateSitemap(array $links, $remove_sitemap = FALSE) {
Pawel G's avatar
Pawel G committed
234
    $values = [
235 236 237
      'id' => $remove_sitemap ? self::FIRST_CHUNK_INDEX
        : $this->db->query('SELECT MAX(id) FROM {simple_sitemap}')
          ->fetchField() + 1,
238
      'sitemap_string' => $this->generateSitemapChunk($links),
239
      'sitemap_created' => $this->time->getRequestTime(),
Pawel G's avatar
Pawel G committed
240
    ];
241
    if ($remove_sitemap) {
242
      $this->db->truncate('simple_sitemap')->execute();
243
    }
244
    $this->db->insert('simple_sitemap')->fields($values)->execute();
245 246
  }

247
  /**
248
   * Generates and returns the sitemap index for all sitemap chunks.
249
   *
Pawel G's avatar
Pawel G committed
250 251
   * @param array $chunk_info
   *   Array containing chunk creation timestamps keyed by chunk ID.
252 253 254
   *
   * @return string sitemap index
   */
Pawel G's avatar
Pawel G committed
255
  public function generateSitemapIndex(array $chunk_info) {
256 257 258 259
    $writer = new XMLWriter();
    $writer->openMemory();
    $writer->setIndent(TRUE);
    $writer->startDocument(self::XML_VERSION, self::ENCODING);
260
    $writer->writeComment(self::GENERATED_BY);
261 262
    $writer->startElement('sitemapindex');

263 264 265 266 267 268 269
    // Add attributes to document.
    $this->moduleHandler->alter('simple_sitemap_index_attributes', self::$indexAttributes);
    foreach (self::$indexAttributes as $name => $value) {
      $writer->writeAttribute($name, $value);
    }

    // Add sitemap locations to document.
Pawel G's avatar
Pawel G committed
270
    foreach ($chunk_info as $chunk_id => $chunk_data) {
271
      $writer->startElement('sitemap');
272
      $writer->writeElement('loc', $this->getCustomBaseUrl() . '/sitemaps/' . $chunk_id . '/' . 'sitemap.xml');
273
      $writer->writeElement('lastmod', date_iso8601($chunk_data->sitemap_created));
274 275
      $writer->endElement();
    }
276

277 278 279 280 281
    $writer->endElement();
    $writer->endDocument();
    return $writer->outputMemory();
  }

282 283 284
  /**
   * @return string
   */
285 286 287 288 289
  public function getCustomBaseUrl() {
    $customBaseUrl = $this->generator->getSetting('base_url', '');
    return !empty($customBaseUrl) ? $customBaseUrl : $GLOBALS['base_url'];
  }

290 291 292
  /**
   * Generates and returns a sitemap chunk.
   *
Pawel G's avatar
Pawel G committed
293
   * @param array $links
Pawel G's avatar
Pawel G committed
294
   *   All links with their multilingual versions and settings.
295
   *
Pawel G's avatar
Pawel G committed
296
   * @return string
Pawel G's avatar
Pawel G committed
297
   *   Sitemap chunk
298
   */
299
  protected function generateSitemapChunk(array $links) {
300 301 302 303
    $writer = new XMLWriter();
    $writer->openMemory();
    $writer->setIndent(TRUE);
    $writer->startDocument(self::XML_VERSION, self::ENCODING);
304
    $writer->writeComment(self::GENERATED_BY);
305
    $writer->startElement('urlset');
Pawel G's avatar
Pawel G committed
306

307 308 309 310 311 312 313
    // Add attributes to document.
    if (!$this->isHreflangSitemap()) {
      unset(self::$attributes['xmlns:xhtml']);
    }
    $this->moduleHandler->alter('simple_sitemap_attributes', self::$attributes);
    foreach (self::$attributes as $name => $value) {
      $writer->writeAttribute($name, $value);
314
    }
315

316 317
    // Add URLs to document.
    $this->moduleHandler->alter('simple_sitemap_links', $links);
Pawel G's avatar
Pawel G committed
318
    foreach ($links as $link) {
319

320
      // Add each translation variant URL as location to the sitemap.
321 322 323
      $writer->startElement('url');
      $writer->writeElement('loc', $link['url']);

324 325 326
      // If more than one language is enabled, add all translation variant URLs
      // as alternate links to this location turning the sitemap into a hreflang
      // sitemap.
327
      if (isset($link['alternate_urls']) && $this->isHreflangSitemap()) {
Pawel G's avatar
Pawel G committed
328
        foreach ($link['alternate_urls'] as $language_id => $alternate_url) {
329 330 331 332 333 334
          $writer->startElement('xhtml:link');
          $writer->writeAttribute('rel', 'alternate');
          $writer->writeAttribute('hreflang', $language_id);
          $writer->writeAttribute('href', $alternate_url);
          $writer->endElement();
        }
335
      }
336

337 338
      // Add lastmod if any.
      if (isset($link['lastmod'])) {
339 340
        $writer->writeElement('lastmod', $link['lastmod']);
      }
341

342 343 344 345
      // Add changefreq if any.
      if (isset($link['changefreq'])) {
        $writer->writeElement('changefreq', $link['changefreq']);
      }
346 347 348 349 350 351

      // Add priority if any.
      if (isset($link['priority'])) {
        $writer->writeElement('priority', $link['priority']);
      }

352 353
      // Add images if any.
      if (!empty($link['images'])) {
Pawel G's avatar
Pawel G committed
354
        foreach ($link['images'] as $image) {
355
          $writer->startElement('image:image');
Pawel G's avatar
Pawel G committed
356
          $writer->writeElement('image:loc', $image['path']);
357 358 359 360
          $writer->endElement();
        }
      }

361 362
      $writer->endElement();
    }
Pawel G's avatar
Pawel G committed
363
    $writer->endElement();
364 365
    $writer->endDocument();
    return $writer->outputMemory();
366
  }
Pawel G's avatar
Pawel G committed
367

368
}