SitemapGenerator.php 9.78 KB
Newer Older
1
2
<?php

Pawel G's avatar
Pawel G committed
3
namespace Drupal\simple_sitemap;
4

Pawel G's avatar
Pawel G committed
5
use XMLWriter;
Pawel G's avatar
Pawel G committed
6
7
8
9
use Drupal\simple_sitemap\Batch\Batch;
use Drupal\Core\Database\Connection;
use Drupal\Core\Extension\ModuleHandler;
use Drupal\Core\Language\LanguageManagerInterface;
10
11

/**
Pawel G's avatar
Pawel G committed
12
 * Class SitemapGenerator
Pawel G's avatar
Pawel G committed
13
 * @package Drupal\simple_sitemap
14
15
16
17
18
 */
class SitemapGenerator {

  const XML_VERSION = '1.0';
  const ENCODING = 'UTF-8';
19
20
  const XMLNS = 'http://www.sitemaps.org/schemas/sitemap/0.9';
  const XMLNS_XHTML = 'http://www.w3.org/1999/xhtml';
21
  const GENERATED_BY = 'Generated by the Simple XML sitemap Drupal module: https://drupal.org/project/simple_sitemap.';
22
  const FIRST_CHUNK_INDEX = 1;
23
  const XMLNS_IMAGE = 'http://www.google.com/schemas/sitemap-image/1.1';
24

Pawel G's avatar
Pawel G committed
25
26
27
  /**
   * @var \Drupal\simple_sitemap\Batch\Batch
   */
28
  protected $batch;
Pawel G's avatar
Pawel G committed
29
30
31
32

  /**
   * @var \Drupal\simple_sitemap\EntityHelper
   */
33
  protected $entityHelper;
Pawel G's avatar
Pawel G committed
34
35
36
37

  /**
   * @var \Drupal\Core\Database\Connection
   */
38
  protected $db;
Pawel G's avatar
Pawel G committed
39

Pawel G's avatar
Pawel G committed
40

Pawel G's avatar
Pawel G committed
41
  /**
Pawel G's avatar
Pawel G committed
42
   * @var \Drupal\Core\Language\LanguageManagerInterface
Pawel G's avatar
Pawel G committed
43
   */
Pawel G's avatar
Pawel G committed
44
  protected $languageManager;
Pawel G's avatar
Pawel G committed
45
46

  /**
Pawel G's avatar
Pawel G committed
47
   * @var \Drupal\Core\Extension\ModuleHandler
Pawel G's avatar
Pawel G committed
48
   */
Pawel G's avatar
Pawel G committed
49
  protected $moduleHandler;
Pawel G's avatar
Pawel G committed
50
51
52
53

  /**
   * @var string
   */
54
  protected $generateFrom = 'form';
Pawel G's avatar
Pawel G committed
55
56
57
58

  /**
   * @var bool
   */
59
  protected $isHreflangSitemap;
Pawel G's avatar
Pawel G committed
60
61
62
63

  /**
   * @var \Drupal\simple_sitemap\Simplesitemap
   */
64
  protected $generator;
65

66
67
  /**
   * SitemapGenerator constructor.
Pawel G's avatar
Pawel G committed
68
   * @param \Drupal\simple_sitemap\Batch\Batch $batch
69
   * @param \Drupal\simple_sitemap\EntityHelper $entityHelper
Pawel G's avatar
Pawel G committed
70
71
72
   * @param \Drupal\Core\Database\Connection $database
   * @param \Drupal\Core\Extension\ModuleHandler $module_handler
   * @param \Drupal\Core\Language\LanguageManagerInterface $language_manager
73
   */
Pawel G's avatar
Pawel G committed
74
75
  public function __construct(
    Batch $batch,
76
    EntityHelper $entityHelper,
Pawel G's avatar
Pawel G committed
77
78
    Connection $database,
    ModuleHandler $module_handler,
79
    LanguageManagerInterface $language_manager
Pawel G's avatar
Pawel G committed
80
  ) {
81
    $this->batch = $batch;
82
    $this->entityHelper = $entityHelper;
83
    $this->db = $database;
84
    $this->moduleHandler = $module_handler;
Pawel G's avatar
Pawel G committed
85
86
87
88
89
90
91
92
93
94
95
96
97
    $this->languageManager = $language_manager;
    $this->setIsHreflangSitemap();
  }

  protected function setIsHreflangSitemap() {
    $this->isHreflangSitemap = count($this->languageManager->getLanguages()) > 1;
  }

  /**
   * @return bool
   */
  public function isHreflangSitemap() {
    return $this->isHreflangSitemap;
98
99
100
  }

  /**
Pawel G's avatar
Pawel G committed
101
   * @param \Drupal\simple_sitemap\Simplesitemap $generator
102
103
   * @return $this
   */
Pawel G's avatar
Pawel G committed
104
  public function setGenerator(Simplesitemap $generator) {
105
106
    $this->generator = $generator;
    return $this;
107
108
  }

Pawel G's avatar
Pawel G committed
109
  /**
Pawel G's avatar
Pawel G committed
110
   * @param string $from
111
   * @return $this
Pawel G's avatar
Pawel G committed
112
   */
Pawel G's avatar
Pawel G committed
113
114
  public function setGenerateFrom($from) {
    $this->generateFrom = $from;
Pawel G's avatar
Pawel G committed
115
    return $this;
116
117
  }

118
  /**
119
   * Adds all operations to the batch and starts it.
120
   */
121
  public function startGeneration() {
122
    $this->batch->setBatchInfo([
Pawel G's avatar
Pawel G committed
123
      'from' => $this->generateFrom,
Pawel G's avatar
Pawel G committed
124
      'batch_process_limit' => !empty($this->generator->getSetting('batch_process_limit'))
Pawel G's avatar
Pawel G committed
125
        ? $this->generator->getSetting('batch_process_limit') : NULL,
126
127
128
      'max_links' => $this->generator->getSetting('max_links', 2000),
      'skip_untranslated' => $this->generator->getSetting('skip_untranslated', FALSE),
      'remove_duplicates' => $this->generator->getSetting('remove_duplicates', TRUE),
129
      'entity_types' => $this->generator->getBundleSettings(),
130
      'base_url' => $this->generator->getSetting('base_url', ''),
131
    ]);
Pawel G's avatar
Pawel G committed
132
    // Add custom link generating operation.
133
    $this->batch->addOperation('generateCustomUrls', $this->getCustomUrlsData());
Pawel G's avatar
Pawel G committed
134
135

    // Add entity link generating operations.
Pawel G's avatar
Pawel G committed
136
    foreach ($this->getEntityTypeData() as $data) {
137
      $this->batch->addOperation('generateBundleUrls', $data);
Pawel G's avatar
Pawel G committed
138
    }
139
    $this->batch->start();
Pawel G's avatar
Pawel G committed
140
141
142
  }

  /**
Pawel G's avatar
Pawel G committed
143
   * Returns a batch-ready data array for custom link generation.
144
   *
Pawel G's avatar
Pawel G committed
145
   * @return array
Pawel G's avatar
Pawel G committed
146
   *   Data to be processed.
Pawel G's avatar
Pawel G committed
147
   */
148
  protected function getCustomUrlsData() {
149
    $paths = [];
150
    foreach ($this->generator->getCustomLinks() as $i => $custom_path) {
151
152
      $paths[$i]['path'] = $custom_path['path'];
      $paths[$i]['priority'] = isset($custom_path['priority']) ? $custom_path['priority'] : NULL;
153
      $paths[$i]['changefreq'] = isset($custom_path['changefreq']) ? $custom_path['changefreq'] : NULL;
Pawel G's avatar
Pawel G committed
154
155
      // todo: implement lastmod.
      $paths[$i]['lastmod'] = NULL;
156
157
    }
    return $paths;
Pawel G's avatar
Pawel G committed
158
  }
159

Pawel G's avatar
Pawel G committed
160
  /**
161
   * Collects entity metadata for entities that are set to be indexed
Pawel G's avatar
Pawel G committed
162
   * and returns an array of batch-ready data sets for entity link generation.
163
   *
Pawel G's avatar
Pawel G committed
164
   * @return array
Pawel G's avatar
Pawel G committed
165
   */
166
  protected function getEntityTypeData() {
Pawel G's avatar
Pawel G committed
167
    $data_sets = [];
Pawel G's avatar
Pawel G committed
168
    $sitemap_entity_types = $this->entityHelper->getSupportedEntityTypes();
169
    $entity_types = $this->generator->getBundleSettings();
Pawel G's avatar
Pawel G committed
170
    foreach ($entity_types as $entity_type_name => $bundles) {
171
172
      if (isset($sitemap_entity_types[$entity_type_name])) {
        $keys = $sitemap_entity_types[$entity_type_name]->getKeys();
Pawel G's avatar
Pawel G committed
173

Pawel G's avatar
Pawel G committed
174
175
        // Menu fix.
        $keys['bundle'] = $entity_type_name == 'menu_link_content' ? 'menu_name' : $keys['bundle'];
Pawel G's avatar
Pawel G committed
176

Pawel G's avatar
Pawel G committed
177
        foreach ($bundles as $bundle_name => $bundle_settings) {
178
          if ($bundle_settings['index']) {
Pawel G's avatar
Pawel G committed
179
180
181
182
183
            $data_sets[] = [
              'bundle_settings' => $bundle_settings,
              'bundle_name' => $bundle_name,
              'entity_type_name' => $entity_type_name,
              'keys' => $keys,
184
185
            ];
          }
Pawel G's avatar
Pawel G committed
186
        }
187
188
      }
    }
Pawel G's avatar
Pawel G committed
189
    return $data_sets;
Pawel G's avatar
Pawel G committed
190
191
192
  }

  /**
193
194
   * Wrapper method which takes links along with their options, lets other
   * modules alter the links and then generates and saves the sitemap.
Pawel G's avatar
Pawel G committed
195
   *
196
   * @param array $links
Pawel G's avatar
Pawel G committed
197
   *   All links with their multilingual versions and settings.
198
   * @param bool $remove_sitemap
Pawel G's avatar
Pawel G committed
199
   *   Remove old sitemap from database before inserting the new one.
Pawel G's avatar
Pawel G committed
200
   */
Pawel G's avatar
Pawel G committed
201
  public function generateSitemap(array $links, $remove_sitemap = FALSE) {
202
    // Invoke alter hook.
203
    $this->moduleHandler->alter('simple_sitemap_links', $links);
Pawel G's avatar
Pawel G committed
204

Pawel G's avatar
Pawel G committed
205
    $values = [
206
207
208
      'id' => $remove_sitemap ? self::FIRST_CHUNK_INDEX
        : $this->db->query('SELECT MAX(id) FROM {simple_sitemap}')
          ->fetchField() + 1,
209
      'sitemap_string' => $this->generateSitemapChunk($links),
210
      'sitemap_created' => REQUEST_TIME,
Pawel G's avatar
Pawel G committed
211
    ];
212
    if ($remove_sitemap) {
213
      $this->db->truncate('simple_sitemap')->execute();
214
    }
215
    $this->db->insert('simple_sitemap')->fields($values)->execute();
216
217
  }

218
  /**
219
   * Generates and returns the sitemap index for all sitemap chunks.
220
   *
Pawel G's avatar
Pawel G committed
221
222
   * @param array $chunk_info
   *   Array containing chunk creation timestamps keyed by chunk ID.
223
224
225
   *
   * @return string sitemap index
   */
Pawel G's avatar
Pawel G committed
226
  public function generateSitemapIndex(array $chunk_info) {
227
228
229
230
    $writer = new XMLWriter();
    $writer->openMemory();
    $writer->setIndent(TRUE);
    $writer->startDocument(self::XML_VERSION, self::ENCODING);
231
    $writer->writeComment(self::GENERATED_BY);
232
233
    $writer->startElement('sitemapindex');
    $writer->writeAttribute('xmlns', self::XMLNS);
234
    $writer->writeAttribute('xmlns:image', self::XMLNS_IMAGE);
235

Pawel G's avatar
Pawel G committed
236
    foreach ($chunk_info as $chunk_id => $chunk_data) {
237
      $writer->startElement('sitemap');
238
      $writer->writeElement('loc', $this->getCustomBaseUrl() . '/sitemaps/' . $chunk_id . '/' . 'sitemap.xml');
239
      $writer->writeElement('lastmod', date_iso8601($chunk_data->sitemap_created));
240
241
242
243
244
245
246
      $writer->endElement();
    }
    $writer->endElement();
    $writer->endDocument();
    return $writer->outputMemory();
  }

247
248
249
250
251
  public function getCustomBaseUrl() {
    $customBaseUrl = $this->generator->getSetting('base_url', '');
    return !empty($customBaseUrl) ? $customBaseUrl : $GLOBALS['base_url'];
  }

252
253
254
  /**
   * Generates and returns a sitemap chunk.
   *
Pawel G's avatar
Pawel G committed
255
   * @param array $links
Pawel G's avatar
Pawel G committed
256
   *   All links with their multilingual versions and settings.
257
   *
Pawel G's avatar
Pawel G committed
258
   * @return string
Pawel G's avatar
Pawel G committed
259
   *   Sitemap chunk
260
   */
261
  protected function generateSitemapChunk(array $links) {
262
263
264
265
    $writer = new XMLWriter();
    $writer->openMemory();
    $writer->setIndent(TRUE);
    $writer->startDocument(self::XML_VERSION, self::ENCODING);
266
    $writer->writeComment(self::GENERATED_BY);
267
268
    $writer->startElement('urlset');
    $writer->writeAttribute('xmlns', self::XMLNS);
269
    $writer->writeAttribute('xmlns:image', self::XMLNS_IMAGE);
Pawel G's avatar
Pawel G committed
270
271

    if ($this->isHreflangSitemap()) {
272
273
      $writer->writeAttribute('xmlns:xhtml', self::XMLNS_XHTML);
    }
274

Pawel G's avatar
Pawel G committed
275
    foreach ($links as $link) {
276

277
      // Add each translation variant URL as location to the sitemap.
278
279
280
      $writer->startElement('url');
      $writer->writeElement('loc', $link['url']);

281
282
283
      // If more than one language is enabled, add all translation variant URLs
      // as alternate links to this location turning the sitemap into a hreflang
      // sitemap.
Pawel G's avatar
Pawel G committed
284
      if ($this->isHreflangSitemap()) {
Pawel G's avatar
Pawel G committed
285
        foreach ($link['alternate_urls'] as $language_id => $alternate_url) {
286
287
288
289
290
291
          $writer->startElement('xhtml:link');
          $writer->writeAttribute('rel', 'alternate');
          $writer->writeAttribute('hreflang', $language_id);
          $writer->writeAttribute('href', $alternate_url);
          $writer->endElement();
        }
292
      }
293

294
295
      // Add lastmod if any.
      if (isset($link['lastmod'])) {
296
297
        $writer->writeElement('lastmod', $link['lastmod']);
      }
298

299
300
301
302
      // Add changefreq if any.
      if (isset($link['changefreq'])) {
        $writer->writeElement('changefreq', $link['changefreq']);
      }
303
304
305
306
307
308

      // Add priority if any.
      if (isset($link['priority'])) {
        $writer->writeElement('priority', $link['priority']);
      }

309
310
311
312
313
314
315
316
317
      // Add images if any.
      if (!empty($link['images'])) {
        foreach ($link['images'] as $image_url) {
          $writer->startElement('image:image');
          $writer->writeElement('image:loc', $image_url);
          $writer->endElement();
        }
      }

318
319
      $writer->endElement();
    }
Pawel G's avatar
Pawel G committed
320
    $writer->endElement();
321
322
    $writer->endDocument();
    return $writer->outputMemory();
323
  }
Pawel G's avatar
Pawel G committed
324

325
}