Commit d0e208c0 authored by thejimbirch's avatar thejimbirch Committed by gbyte.co

Search engine submission WIP

parent 9c657c65
This module adds additional functionality to the Simple XML Sitemap module
(https://www.drupal.org/project/simple_sitemap), providing the ability to
automatically submit generated sitemaps to search engines. Similar submission
functionality exists in the XML Sitemap module, but this module provides the
ability to take advantage of the multilingual capabilities of Simple XML Sitemap
and still be able to submit sitemaps automatically.
Settings for which search engines to submit to and submission frequency may be
found at admin/config/search/simplesitemap/engines.
id: bing
label: 'Bing'
url: http://www.bing.com/ping?sitemap=[sitemap]
status: true
id: google
label: 'Google'
url: http://www.google.com/ping?sitemap=[sitemap]
status: true
simple_sitemap_engines.simple_sitemap_engine.*:
type: config_entity
label: 'Search engine'
mapping:
id:
type: string
label: 'Search engine ID'
label:
type: label
label: 'Label'
url:
type: string
label: 'Submission URL'
status:
type: boolean
label: 'Enabled'
last_submitted:
type: integer
label: 'Last submitted'
simple_sitemap_engines.settings:
type: config_object
label: 'Sitemap search engine submission settings'
mapping:
enabled:
type: boolean
label: 'Sitemap submission enabled'
frequency:
type: integer
label: 'Sitemap submission frequency'
name: 'Simple XML Sitemap - Search engines'
type: module
description: 'Submits sitemaps to search engines.'
package: SEO
core: 8.x
dependencies:
- simple_sitemap:simple_sitemap
simple_sitemap_engines.settings:
route_name: simple_sitemap_engines.settings
title: 'Search engines'
base_route: simple_sitemap.settings
weight: 5
<?php
/**
* @file
* Submits sitemaps to search engines.
*
* (c) 2019 Brad Greco.
* This module may be distributed under the terms of GPL version 2.0 or later.
*/
/**
* Implements hook_cron().
*
* If the sitemap submission interval has elapsed, adds each search engine to
* the submission queue to be processed.
*
* @see Drupal\simple_sitemap_engines\Plugin\QueueWorker\SitemapSubmitter
*/
function simple_sitemap_engines_cron() {
$config = \Drupal::config('simple_sitemap_engines.settings');
if ($config->get('enabled')) {
$submit_frequency = $config->get('frequency');
$last_submitted = \Drupal::state()->get('simple_sitemap_engines_last_submitted', 0);
if ($submit_frequency > 0 && $last_submitted + $submit_frequency < time()) {
// Load only the enabled search engines.
$engines = \Drupal::entityTypeManager()
->getStorage('simple_sitemap_engine')
->loadByProperties(['status' => TRUE]);
// Add them to the submission queue.
foreach ($engines as $id => $engine) {
\Drupal::queue('simple_sitemap_engine_submit')->createItem($id);
}
\Drupal::state()->set('simple_sitemap_engines_last_submitted', time());
}
}
}
simple_sitemap_engines.settings:
path: '/admin/config/search/simplesitemap/engines'
defaults:
_form: '\Drupal\simple_sitemap_engines\Form\SimplesitemapEnginesForm'
_title: 'Simple XML Sitemap Settings'
requirements:
_permission: 'administer sitemap settings'
entity.simple_sitemap_engine.list:
path: '/admin/config/search/simplesitemap/engines/list'
defaults:
_entity_list: simple_sitemap_engine
_title: 'Search engines'
requirements:
_permission: 'administer sitemap settings'
<?php
namespace Drupal\simple_sitemap_engines\Controller;
use Drupal\Core\Config\Entity\ConfigEntityListBuilder;
use Drupal\Core\Datetime\DateFormatterInterface;
use Drupal\Core\Entity\EntityInterface;
use Drupal\Core\Entity\EntityStorageInterface;
use Drupal\Core\Entity\EntityTypeInterface;
use Symfony\Component\DependencyInjection\ContainerInterface;
/**
* Search engine entity list builder.
*/
class SearchEngineListBuilder extends ConfigEntityListBuilder {
/**
* The date formatter service.
*
* @var \Drupal\Core\Datetime\DateFormatterInterface
*/
protected $dateFormatter;
/**
* SearchEngineListBuilder constructor.
*
* @param \Drupal\Core\Entity\EntityTypeInterface $entity_type
* The entity type definition.
* @param \Drupal\Core\Entity\EntityStorageInterface $storage
* The entity storage class.
* @param \Drupal\Core\Datetime\DateFormatterInterface $date_formatter
* The date formatter service.
*/
public function __construct(EntityTypeInterface $entity_type, EntityStorageInterface $storage, DateFormatterInterface $date_formatter) {
parent::__construct($entity_type, $storage);
$this->dateFormatter = $date_formatter;
}
/**
* {@inheritdoc}
*/
public static function createInstance(ContainerInterface $container, EntityTypeInterface $entity_type) {
return new static(
$entity_type,
$container->get('entity_type.manager')->getStorage($entity_type->id()),
$container->get('date.formatter')
);
}
/**
* {@inheritdoc}
*/
public function buildHeader() {
$header['label'] = $this->t('Name');
$header['url'] = $this->t('Submission URL');
$header['status'] = $this->t('Status');
$header['last_submitted'] = $this->t('Last submitted');
return $header;
}
/**
* {@inheritdoc}
*/
public function buildRow(EntityInterface $entity) {
/** @var \Drupal\simple_sitemap_engines\Entity\SearchEngine $entity */
$row['label'] = $entity->label();
$row['url'] = $entity->url;
$row['status'] = $entity->status ? $this->t('Enabled') : $this->t('Disabled');
$row['last_submitted'] = $entity->last_submitted
? $this->dateFormatter->format($entity->last_submitted, 'short')
: $this->t('Never');
return $row;
}
}
<?php
namespace Drupal\simple_sitemap_engines\Entity;
use Drupal\Core\Config\Entity\ConfigEntityBase;
/**
* Defines the the search engine entity class.
*
* @ConfigEntityType(
* id = "simple_sitemap_engine",
* label = @Translation("Search engine"),
* admin_permission = "administer sitemap settings",
* entity_keys = {
* "id" = "id",
* "label" = "label",
* },
* handlers = {
* "list_builder" = "Drupal\simple_sitemap_engines\Controller\SearchEngineListBuilder",
* },
* links = {
* "collection" = "/admin/config/search/simplesitemap/engines/list",
* },
* config_export = {
* "id",
* "label",
* "url",
* "status",
* "last_submitted",
* }
* )
*/
class SearchEngine extends ConfigEntityBase {
/**
* The search engine ID.
*
* @var string
*/
public $id;
/**
* The search engine label.
*
* @var string
*/
public $label;
/**
* The search engine submission URL.
*
* When submitting to search engines, '[sitemap]' will be replaced with the
* full URL to the sitemap.xml.
*
* @var string
*/
public $url;
/**
* The search engine enabled state.
*
* @var bool
*/
public $status;
/**
* Timestamp when the sitemap was last submitted to this search engine.
*
* @var int
*/
public $last_submitted;
/**
* Implements magic __toString() to simplify checkbox list building.
*
* @return string
* The search engine label.
*/
public function __toString() {
return $this->label();
}
}
<?php
namespace Drupal\simple_sitemap_engines\Form;
use Drupal\Core\Config\ConfigFactoryInterface;
use Drupal\Core\Datetime\DateFormatter;
use Drupal\Core\Entity\EntityTypeManagerInterface;
use Drupal\Core\Form\ConfigFormBase;
use Drupal\Core\Form\FormStateInterface;
use Drupal\Core\Url;
use Symfony\Component\DependencyInjection\ContainerInterface;
/**
* Form for managing search engine submission settings.
*/
class SimplesitemapEnginesForm extends ConfigFormBase {
/**
* The entity type manager service.
*
* @var \Drupal\Core\Entity\EntityTypeManagerInterface
*/
protected $entityTypeManager;
/**
* The date formatter service.
*
* @var \Drupal\Core\Datetime\DateFormatter
*/
protected $dateFormatter;
/**
* SimplesitemapEnginesForm constructor.
*
* @param \Drupal\Core\Config\ConfigFactoryInterface $config_factory
* The config factory service.
* @param \Drupal\Core\Entity\EntityTypeManagerInterface $entity_type_manager
* The entity type manager service.
* @param \Drupal\Core\Datetime\DateFormatter $date_formatter
* The date formatter service.
*/
public function __construct(ConfigFactoryInterface $config_factory, EntityTypeManagerInterface $entity_type_manager, DateFormatter $date_formatter) {
parent::__construct($config_factory);
$this->entityTypeManager = $entity_type_manager;
$this->dateFormatter = $date_formatter;
}
/**
* {@inheritdoc}
*/
public static function create(ContainerInterface $container) {
return new static(
$container->get('config.factory'),
$container->get('entity_type.manager'),
$container->get('date.formatter')
);
}
/**
* {@inheritdoc}
*/
public function getFormId() {
return 'simple_sitemap_engines_settings_form';
}
/**
* {@inheritdoc}
*/
protected function getEditableConfigNames() {
return ['simple_sitemap_engines.settings'];
}
/**
* {@inheritdoc}
*/
public function buildForm(array $form, FormStateInterface $form_state) {
$config = $this->config('simple_sitemap_engines.settings');
$engines = $this->entityTypeManager->getStorage('simple_sitemap_engine')->loadMultiple();
// Construct a non-associative array containing the enabled search engines.
$engine_statuses = array_column($engines, 'status', 'id');
$enabled_engines = array_keys(array_filter($engine_statuses));
// Build the frequency options, in the form [interval => label].
$frequency_options = array_flip([
10800,
21600,
43200,
86400,
259200,
604800,
]);
foreach ($frequency_options as $frequency => &$label) {
$label = $this->dateFormatter->formatInterval($frequency);
}
$form['engine_group'] = [
'#type' => 'fieldset',
'#title' => $this->t('Search engines'),
];
$form['engine_group']['engines'] = [
'#type' => 'checkboxes',
'#title' => $this->t('Submit the sitemap to the following engines'),
'#options' => $engines,
'#default_value' => $enabled_engines,
'#description' => $this->t('Details about each search engine can be seen <a href="@url">here</a>.', [
'@url' => Url::fromRoute('entity.simple_sitemap_engine.list')->toString(),
]),
];
$form['settings'] = [
'#type' => 'fieldset',
'#title' => $this->t('Submission settings'),
];
$form['settings']['enabled'] = [
'#type' => 'checkbox',
'#title' => $this->t('Submit the sitemap to search engines every:'),
'#default_value' => $config->get('enabled'),
];
$form['settings']['frequency'] = [
'#type' => 'select',
'#options' => $frequency_options,
'#default_value' => $config->get('frequency'),
'#states' => [
'enabled' => [':input[name="enabled"]' => ['checked' => TRUE]],
],
];
return parent::buildForm($form, $form_state);
}
/**
* {@inheritdoc}
*/
public function submitForm(array &$form, FormStateInterface $form_state) {
$engines = $this->entityTypeManager->getStorage('simple_sitemap_engine')->loadMultiple();
foreach ($form_state->getValue('engines') as $engine => $enabled) {
$engines[$engine]->status = (bool) $enabled;
$engines[$engine]->save();
}
$config = $this->config('simple_sitemap_engines.settings');
$config->set('enabled', $form_state->getValue('enabled'));
$config->set('frequency', $form_state->getValue('frequency'));
$config->save();
}
}
<?php
namespace Drupal\simple_sitemap_engines\Plugin\QueueWorker;
use Drupal\Core\Entity\EntityStorageInterface;
use Drupal\Core\Plugin\ContainerFactoryPluginInterface;
use Drupal\Core\Queue\QueueWorkerBase;
use Drupal\simple_sitemap\SimplesitemapManager;
use GuzzleHttp\ClientInterface;
use GuzzleHttp\Exception\RequestException;
use Psr\Log\LoggerInterface;
use Symfony\Component\DependencyInjection\ContainerInterface;
/**
* Process a queue of search engines to submit sitemaps.
*
* @QueueWorker(
* id = "simple_sitemap_engine_submit",
* title = @Translation("Sitemap search engine submission"),
* cron = {"time" = 30}
* )
*
* @see simple_sitemap_engines_cron()
*/
class SitemapSubmitter extends QueueWorkerBase implements ContainerFactoryPluginInterface {
/**
* The search engine entity storage.
*
* @var \Drupal\Core\Entity\EntityStorageInterface
*/
protected $engineStorage;
/**
* The HTTP client service.
*
* @var \GuzzleHttp\ClientInterface
*/
protected $httpClient;
/**
* The sitemap manager service.
*
* @var \Drupal\simple_sitemap\SimplesitemapManager
*/
protected $sitemapManager;
/**
* The simple sitemap logger.
*
* @var \Psr\Log\LoggerInterface
*/
protected $logger;
/**
* Constructs a new class instance.
*
* @param array $configuration
* A configuration array containing information about the plugin instance.
* @param string $plugin_id
* The plugin_id for the plugin instance.
* @param mixed $plugin_definition
* The plugin implementation definition.
* @param \Drupal\Core\Entity\EntityStorageInterface $engine_storage
* The search engine entity storage.
* @param \GuzzleHttp\ClientInterface $http_client
* The HTTP client service.
* @param \Drupal\simple_sitemap\SimplesitemapManager $sitemap_manager
* The sitemap manager service.
* @param \Psr\Log\LoggerInterface $logger
* The simple sitemap logger.
*/
public function __construct(array $configuration, $plugin_id, $plugin_definition, EntityStorageInterface $engine_storage, ClientInterface $http_client, SimplesitemapManager $sitemap_manager, LoggerInterface $logger) {
parent::__construct($configuration, $plugin_id, $plugin_definition);
$this->engineStorage = $engine_storage;
$this->httpClient = $http_client;
$this->sitemapManager = $sitemap_manager;
$this->logger = $logger;
}
/**
* {@inheritdoc}
*/
public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition) {
return new static(
$configuration,
$plugin_id,
$plugin_definition,
$container->get('entity_type.manager')->getStorage('simple_sitemap_engine'),
$container->get('http_client'),
$container->get('simple_sitemap.manager'),
$container->get('logger.factory')->get('simple_sitemap')
);
}
/**
* {@inheritdoc}
*/
public function processItem($id) {
/** @var \Drupal\simple_sitemap_engines\Entity\SearchEngine $engine */
if ($engine = $this->engineStorage->load($id)) {
// Gather URLs for all sitemap variants.
$sitemap_urls = [];
foreach ($this->sitemapManager->getSitemapTypes() as $type_name => $type_definition) {
$sitemap_generator = $this->sitemapManager->getSitemapGenerator($type_definition['sitemapGenerator']);
$variants = $this->sitemapManager->getSitemapVariants($type_name, FALSE);
if (!empty($variants)) {
foreach ($variants as $id => $variant) {
$sitemap_urls[$variant['label']] = $sitemap_generator->setSitemapVariant($id)->getSitemapUrl();
}
}
}
// Submit all URLs.
foreach ($sitemap_urls as $variant => $sitemap_url) {
$submit_url = str_replace('[sitemap]', $sitemap_url, $engine->url);
try {
$this->httpClient->request('GET', $submit_url);
// Log if submission was successful.
$this->logger->info('Sitemap %sitemap submitted to @url', ['%sitemap' => $variant, '@url' => $submit_url]);
// Record last submission time. This is purely informational; the
// variable that determines when the next submission should be run is
// stored in the global state.
$engine->last_submitted = time();
}
catch (RequestException $e) {
// Catch and log exceptions so this submission gets removed from the
// queue whether or not it succeeded.
// If the error was caused by network failure, it's fine to just wait
// until next time the submission is queued to try again.
// If the error was caused by a malformed URL, keeping the submission
// in the queue to retry is pointless since it will always fail.
watchdog_exception('simple_sitemap', $e);
}
}
$engine->save();
}
}
}
This module uses PHPUnit functional tests to follow drupal.org standards.
To run tests, execute the command below in the web root of your Drupal site,
where 'www-data' is the name of the user your web server runs as, and
'http://localhost/drupal8/web' is the URL to the root of your site.
sudo -u www-data php core/scripts/run-tests.sh --verbose --sqlite /tmp/test.sqlite --url http://localhost/drupal8/web simple_sitemap_engines
If you receive an error that sqlite is missing, install the php-sqlite3 Debian
package.
<?php
namespace Drupal\Tests\simple_sitemap_engines\Kernel;
use Drupal\KernelTests\KernelTestBase;
use GuzzleHttp\ClientInterface;
use GuzzleHttp\Exception\RequestException;
use Prophecy\Argument;
// phpcs:disable Drupal.Arrays.Array.LongLineDeclaration
/**
* Tests search engine sitemap submission.
*
* @group simple_sitemap_engines
*/
class SubmitSitemapTest extends KernelTestBase {
/**
* The modules to enable.
*
* @var array
*/
public static $modules = ['system', 'simple_sitemap', 'simple_sitemap_engines'];
/**
* The cron service.
*
* @var \Drupal\Core\Cron
*/
protected $cron;
/**
* The search engine entity storage.
*
* @var \Drupal\Core\Entity\EntityStorageInterface
*/
protected $engineStorage;
/**
* {@inheritdoc}
*/
protected function setUp() {
parent::setUp();
$this->installEntitySchema('simple_sitemap_engine');
$this->installConfig('simple_sitemap');
$this->installConfig('simple_sitemap_engines');
$this->cron = \Drupal::service('cron');
$this->engineStorage = \Drupal::entityTypeManager()->getStorage('simple_sitemap_engine');
$this->queue = \Drupal::queue('simple_sitemap_engine_submit');
// Disable all search engines but one so tests will not fail if additional
// search engines are added in the future.
$engines = $this->engineStorage->loadMultiple();
foreach ($engines as $id => $engine) {
if ($id != 'google') {
$engine->status = FALSE;
$engine->save();
}
}
}
/**
* Tests sitemap submission URLs and last submission status.
*/
public function testSubmission() {
// Create a mock HTTP client.
$http_client = $this->prophesize(ClientInterface::class);
// Make mock HTTP requests always succeed.
$http_client->request('GET', Argument::any())->willReturn(TRUE);
// Replace the default HTTP client service with the mock.
$this->container->set('http_client', $http_client->reveal());
// Run cron to trigger submission.
$this->cron->run();
$google = $this->engineStorage->load('google');
$bing = $this->engineStorage->load('bing');
// Check that Google was marked as submitted and Bing was not.
$this->assertNotEmpty($google->last_submitted);
$this->assertEmpty($bing->last_submitted);
// Check that exactly 1 HTTP request was sent to the correct URL.
$http_client->request('GET', 'http://www.google.com/ping?sitemap=http://localhost/default/sitemap.xml')->shouldBeCalled();
$http_client->request('GET', Argument::any())->shouldBeCalledTimes(1);
}
/**
* Tests that sitemaps are not submitted every time cron runs.
*/
public function testNoDoubleSubmission() {
// Create a mock HTTP client.
$http_client = $this->prophesize(ClientInterface::class);
// Make mock HTTP requests always succeed.
$http_client->request('GET', Argument::any())->willReturn(TRUE);
// Replace the default HTTP client service with the mock.
$this->container->set('http_client', $http_client->reveal());
// Run cron to trigger submission.
$this->cron->run();
// Check that Google was submitted and store its last submitted time.
$google = $this->engineStorage->load('google');
$http_client->request('GET', 'http://www.google.com/ping?sitemap=http://localhost/default/sitemap.xml')->shouldBeCalledTimes(1);
$this->assertNotEmpty($google->last_submitted);
$google_last_submitted = $google->last_submitted;
// Make sure enough time passes between cron runs to guarantee that they
// do not run within the same second, since timestamps are compared below.