From d0e208c01875aee391c4222ffa37bb465e8baee2 Mon Sep 17 00:00:00 2001 From: Caffeinated Date: Sat, 1 Jun 2019 03:51:48 +0200 Subject: [PATCH] Search engine submission WIP --- modules/simple_sitemap_engines/README.txt | 9 ++ .../simple_sitemap_engines.settings.yml | 2 + ...map_engines.simple_sitemap_engine.bing.yml | 4 + ...p_engines.simple_sitemap_engine.google.yml | 4 + .../schema/simple_sitemap_engines.schema.yml | 30 ++++ .../simple_sitemap_engines.info.yml | 7 + .../simple_sitemap_engines.links.task.yml | 5 + .../simple_sitemap_engines.module | 37 +++++ .../simple_sitemap_engines.routing.yml | 15 ++ .../Controller/SearchEngineListBuilder.php | 77 +++++++++ .../src/Entity/SearchEngine.php | 83 ++++++++++ .../src/Form/SimplesitemapEnginesForm.php | 146 +++++++++++++++++ .../Plugin/QueueWorker/SitemapSubmitter.php | 140 +++++++++++++++++ .../simple_sitemap_engines/tests/README.txt | 10 ++ .../tests/src/Kernel/SubmitSitemapTest.php | 148 ++++++++++++++++++ 15 files changed, 717 insertions(+) create mode 100644 modules/simple_sitemap_engines/README.txt create mode 100644 modules/simple_sitemap_engines/config/install/simple_sitemap_engines.settings.yml create mode 100644 modules/simple_sitemap_engines/config/install/simple_sitemap_engines.simple_sitemap_engine.bing.yml create mode 100644 modules/simple_sitemap_engines/config/install/simple_sitemap_engines.simple_sitemap_engine.google.yml create mode 100644 modules/simple_sitemap_engines/config/schema/simple_sitemap_engines.schema.yml create mode 100644 modules/simple_sitemap_engines/simple_sitemap_engines.info.yml create mode 100644 modules/simple_sitemap_engines/simple_sitemap_engines.links.task.yml create mode 100644 modules/simple_sitemap_engines/simple_sitemap_engines.module create mode 100644 modules/simple_sitemap_engines/simple_sitemap_engines.routing.yml create mode 100644 modules/simple_sitemap_engines/src/Controller/SearchEngineListBuilder.php create mode 100644 modules/simple_sitemap_engines/src/Entity/SearchEngine.php create mode 100644 modules/simple_sitemap_engines/src/Form/SimplesitemapEnginesForm.php create mode 100644 modules/simple_sitemap_engines/src/Plugin/QueueWorker/SitemapSubmitter.php create mode 100644 modules/simple_sitemap_engines/tests/README.txt create mode 100644 modules/simple_sitemap_engines/tests/src/Kernel/SubmitSitemapTest.php diff --git a/modules/simple_sitemap_engines/README.txt b/modules/simple_sitemap_engines/README.txt new file mode 100644 index 0000000..6af2c52 --- /dev/null +++ b/modules/simple_sitemap_engines/README.txt @@ -0,0 +1,9 @@ +This module adds additional functionality to the Simple XML Sitemap module +(https://www.drupal.org/project/simple_sitemap), providing the ability to +automatically submit generated sitemaps to search engines. Similar submission +functionality exists in the XML Sitemap module, but this module provides the +ability to take advantage of the multilingual capabilities of Simple XML Sitemap +and still be able to submit sitemaps automatically. + +Settings for which search engines to submit to and submission frequency may be +found at admin/config/search/simplesitemap/engines. diff --git a/modules/simple_sitemap_engines/config/install/simple_sitemap_engines.settings.yml b/modules/simple_sitemap_engines/config/install/simple_sitemap_engines.settings.yml new file mode 100644 index 0000000..ed43cbd --- /dev/null +++ b/modules/simple_sitemap_engines/config/install/simple_sitemap_engines.settings.yml @@ -0,0 +1,2 @@ +enabled: true +frequency: 86400 diff --git a/modules/simple_sitemap_engines/config/install/simple_sitemap_engines.simple_sitemap_engine.bing.yml b/modules/simple_sitemap_engines/config/install/simple_sitemap_engines.simple_sitemap_engine.bing.yml new file mode 100644 index 0000000..c57fe4b --- /dev/null +++ b/modules/simple_sitemap_engines/config/install/simple_sitemap_engines.simple_sitemap_engine.bing.yml @@ -0,0 +1,4 @@ +id: bing +label: 'Bing' +url: http://www.bing.com/ping?sitemap=[sitemap] +status: true diff --git a/modules/simple_sitemap_engines/config/install/simple_sitemap_engines.simple_sitemap_engine.google.yml b/modules/simple_sitemap_engines/config/install/simple_sitemap_engines.simple_sitemap_engine.google.yml new file mode 100644 index 0000000..99622df --- /dev/null +++ b/modules/simple_sitemap_engines/config/install/simple_sitemap_engines.simple_sitemap_engine.google.yml @@ -0,0 +1,4 @@ +id: google +label: 'Google' +url: http://www.google.com/ping?sitemap=[sitemap] +status: true diff --git a/modules/simple_sitemap_engines/config/schema/simple_sitemap_engines.schema.yml b/modules/simple_sitemap_engines/config/schema/simple_sitemap_engines.schema.yml new file mode 100644 index 0000000..d66f301 --- /dev/null +++ b/modules/simple_sitemap_engines/config/schema/simple_sitemap_engines.schema.yml @@ -0,0 +1,30 @@ +simple_sitemap_engines.simple_sitemap_engine.*: + type: config_entity + label: 'Search engine' + mapping: + id: + type: string + label: 'Search engine ID' + label: + type: label + label: 'Label' + url: + type: string + label: 'Submission URL' + status: + type: boolean + label: 'Enabled' + last_submitted: + type: integer + label: 'Last submitted' + +simple_sitemap_engines.settings: + type: config_object + label: 'Sitemap search engine submission settings' + mapping: + enabled: + type: boolean + label: 'Sitemap submission enabled' + frequency: + type: integer + label: 'Sitemap submission frequency' diff --git a/modules/simple_sitemap_engines/simple_sitemap_engines.info.yml b/modules/simple_sitemap_engines/simple_sitemap_engines.info.yml new file mode 100644 index 0000000..febfffb --- /dev/null +++ b/modules/simple_sitemap_engines/simple_sitemap_engines.info.yml @@ -0,0 +1,7 @@ +name: 'Simple XML Sitemap - Search engines' +type: module +description: 'Submits sitemaps to search engines.' +package: SEO +core: 8.x +dependencies: + - simple_sitemap:simple_sitemap diff --git a/modules/simple_sitemap_engines/simple_sitemap_engines.links.task.yml b/modules/simple_sitemap_engines/simple_sitemap_engines.links.task.yml new file mode 100644 index 0000000..a4a7f79 --- /dev/null +++ b/modules/simple_sitemap_engines/simple_sitemap_engines.links.task.yml @@ -0,0 +1,5 @@ +simple_sitemap_engines.settings: + route_name: simple_sitemap_engines.settings + title: 'Search engines' + base_route: simple_sitemap.settings + weight: 5 diff --git a/modules/simple_sitemap_engines/simple_sitemap_engines.module b/modules/simple_sitemap_engines/simple_sitemap_engines.module new file mode 100644 index 0000000..db93432 --- /dev/null +++ b/modules/simple_sitemap_engines/simple_sitemap_engines.module @@ -0,0 +1,37 @@ +get('enabled')) { + $submit_frequency = $config->get('frequency'); + $last_submitted = \Drupal::state()->get('simple_sitemap_engines_last_submitted', 0); + + if ($submit_frequency > 0 && $last_submitted + $submit_frequency < time()) { + // Load only the enabled search engines. + $engines = \Drupal::entityTypeManager() + ->getStorage('simple_sitemap_engine') + ->loadByProperties(['status' => TRUE]); + // Add them to the submission queue. + foreach ($engines as $id => $engine) { + \Drupal::queue('simple_sitemap_engine_submit')->createItem($id); + } + \Drupal::state()->set('simple_sitemap_engines_last_submitted', time()); + } + } +} diff --git a/modules/simple_sitemap_engines/simple_sitemap_engines.routing.yml b/modules/simple_sitemap_engines/simple_sitemap_engines.routing.yml new file mode 100644 index 0000000..b0df615 --- /dev/null +++ b/modules/simple_sitemap_engines/simple_sitemap_engines.routing.yml @@ -0,0 +1,15 @@ +simple_sitemap_engines.settings: + path: '/admin/config/search/simplesitemap/engines' + defaults: + _form: '\Drupal\simple_sitemap_engines\Form\SimplesitemapEnginesForm' + _title: 'Simple XML Sitemap Settings' + requirements: + _permission: 'administer sitemap settings' + +entity.simple_sitemap_engine.list: + path: '/admin/config/search/simplesitemap/engines/list' + defaults: + _entity_list: simple_sitemap_engine + _title: 'Search engines' + requirements: + _permission: 'administer sitemap settings' diff --git a/modules/simple_sitemap_engines/src/Controller/SearchEngineListBuilder.php b/modules/simple_sitemap_engines/src/Controller/SearchEngineListBuilder.php new file mode 100644 index 0000000..a1264d9 --- /dev/null +++ b/modules/simple_sitemap_engines/src/Controller/SearchEngineListBuilder.php @@ -0,0 +1,77 @@ +dateFormatter = $date_formatter; + } + + /** + * {@inheritdoc} + */ + public static function createInstance(ContainerInterface $container, EntityTypeInterface $entity_type) { + return new static( + $entity_type, + $container->get('entity_type.manager')->getStorage($entity_type->id()), + $container->get('date.formatter') + ); + } + + /** + * {@inheritdoc} + */ + public function buildHeader() { + $header['label'] = $this->t('Name'); + $header['url'] = $this->t('Submission URL'); + $header['status'] = $this->t('Status'); + $header['last_submitted'] = $this->t('Last submitted'); + return $header; + } + + /** + * {@inheritdoc} + */ + public function buildRow(EntityInterface $entity) { + /** @var \Drupal\simple_sitemap_engines\Entity\SearchEngine $entity */ + + $row['label'] = $entity->label(); + $row['url'] = $entity->url; + $row['status'] = $entity->status ? $this->t('Enabled') : $this->t('Disabled'); + $row['last_submitted'] = $entity->last_submitted + ? $this->dateFormatter->format($entity->last_submitted, 'short') + : $this->t('Never'); + + return $row; + } + +} diff --git a/modules/simple_sitemap_engines/src/Entity/SearchEngine.php b/modules/simple_sitemap_engines/src/Entity/SearchEngine.php new file mode 100644 index 0000000..e81abe0 --- /dev/null +++ b/modules/simple_sitemap_engines/src/Entity/SearchEngine.php @@ -0,0 +1,83 @@ +label(); + } + +} diff --git a/modules/simple_sitemap_engines/src/Form/SimplesitemapEnginesForm.php b/modules/simple_sitemap_engines/src/Form/SimplesitemapEnginesForm.php new file mode 100644 index 0000000..780c164 --- /dev/null +++ b/modules/simple_sitemap_engines/src/Form/SimplesitemapEnginesForm.php @@ -0,0 +1,146 @@ +entityTypeManager = $entity_type_manager; + $this->dateFormatter = $date_formatter; + } + + /** + * {@inheritdoc} + */ + public static function create(ContainerInterface $container) { + return new static( + $container->get('config.factory'), + $container->get('entity_type.manager'), + $container->get('date.formatter') + ); + } + + /** + * {@inheritdoc} + */ + public function getFormId() { + return 'simple_sitemap_engines_settings_form'; + } + + /** + * {@inheritdoc} + */ + protected function getEditableConfigNames() { + return ['simple_sitemap_engines.settings']; + } + + /** + * {@inheritdoc} + */ + public function buildForm(array $form, FormStateInterface $form_state) { + $config = $this->config('simple_sitemap_engines.settings'); + $engines = $this->entityTypeManager->getStorage('simple_sitemap_engine')->loadMultiple(); + // Construct a non-associative array containing the enabled search engines. + $engine_statuses = array_column($engines, 'status', 'id'); + $enabled_engines = array_keys(array_filter($engine_statuses)); + + // Build the frequency options, in the form [interval => label]. + $frequency_options = array_flip([ + 10800, + 21600, + 43200, + 86400, + 259200, + 604800, + ]); + foreach ($frequency_options as $frequency => &$label) { + $label = $this->dateFormatter->formatInterval($frequency); + } + + $form['engine_group'] = [ + '#type' => 'fieldset', + '#title' => $this->t('Search engines'), + ]; + $form['engine_group']['engines'] = [ + '#type' => 'checkboxes', + '#title' => $this->t('Submit the sitemap to the following engines'), + '#options' => $engines, + '#default_value' => $enabled_engines, + '#description' => $this->t('Details about each search engine can be seen here.', [ + '@url' => Url::fromRoute('entity.simple_sitemap_engine.list')->toString(), + ]), + ]; + + $form['settings'] = [ + '#type' => 'fieldset', + '#title' => $this->t('Submission settings'), + ]; + $form['settings']['enabled'] = [ + '#type' => 'checkbox', + '#title' => $this->t('Submit the sitemap to search engines every:'), + '#default_value' => $config->get('enabled'), + ]; + $form['settings']['frequency'] = [ + '#type' => 'select', + '#options' => $frequency_options, + '#default_value' => $config->get('frequency'), + '#states' => [ + 'enabled' => [':input[name="enabled"]' => ['checked' => TRUE]], + ], + ]; + + return parent::buildForm($form, $form_state); + } + + /** + * {@inheritdoc} + */ + public function submitForm(array &$form, FormStateInterface $form_state) { + $engines = $this->entityTypeManager->getStorage('simple_sitemap_engine')->loadMultiple(); + foreach ($form_state->getValue('engines') as $engine => $enabled) { + $engines[$engine]->status = (bool) $enabled; + $engines[$engine]->save(); + } + $config = $this->config('simple_sitemap_engines.settings'); + $config->set('enabled', $form_state->getValue('enabled')); + $config->set('frequency', $form_state->getValue('frequency')); + $config->save(); + } + +} diff --git a/modules/simple_sitemap_engines/src/Plugin/QueueWorker/SitemapSubmitter.php b/modules/simple_sitemap_engines/src/Plugin/QueueWorker/SitemapSubmitter.php new file mode 100644 index 0000000..bee75da --- /dev/null +++ b/modules/simple_sitemap_engines/src/Plugin/QueueWorker/SitemapSubmitter.php @@ -0,0 +1,140 @@ +engineStorage = $engine_storage; + $this->httpClient = $http_client; + $this->sitemapManager = $sitemap_manager; + $this->logger = $logger; + } + + /** + * {@inheritdoc} + */ + public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition) { + return new static( + $configuration, + $plugin_id, + $plugin_definition, + $container->get('entity_type.manager')->getStorage('simple_sitemap_engine'), + $container->get('http_client'), + $container->get('simple_sitemap.manager'), + $container->get('logger.factory')->get('simple_sitemap') + ); + } + + /** + * {@inheritdoc} + */ + public function processItem($id) { + /** @var \Drupal\simple_sitemap_engines\Entity\SearchEngine $engine */ + if ($engine = $this->engineStorage->load($id)) { + // Gather URLs for all sitemap variants. + $sitemap_urls = []; + foreach ($this->sitemapManager->getSitemapTypes() as $type_name => $type_definition) { + $sitemap_generator = $this->sitemapManager->getSitemapGenerator($type_definition['sitemapGenerator']); + $variants = $this->sitemapManager->getSitemapVariants($type_name, FALSE); + if (!empty($variants)) { + foreach ($variants as $id => $variant) { + $sitemap_urls[$variant['label']] = $sitemap_generator->setSitemapVariant($id)->getSitemapUrl(); + } + } + } + + // Submit all URLs. + foreach ($sitemap_urls as $variant => $sitemap_url) { + $submit_url = str_replace('[sitemap]', $sitemap_url, $engine->url); + try { + $this->httpClient->request('GET', $submit_url); + // Log if submission was successful. + $this->logger->info('Sitemap %sitemap submitted to @url', ['%sitemap' => $variant, '@url' => $submit_url]); + // Record last submission time. This is purely informational; the + // variable that determines when the next submission should be run is + // stored in the global state. + $engine->last_submitted = time(); + } + catch (RequestException $e) { + // Catch and log exceptions so this submission gets removed from the + // queue whether or not it succeeded. + // If the error was caused by network failure, it's fine to just wait + // until next time the submission is queued to try again. + // If the error was caused by a malformed URL, keeping the submission + // in the queue to retry is pointless since it will always fail. + watchdog_exception('simple_sitemap', $e); + } + } + $engine->save(); + } + } + +} diff --git a/modules/simple_sitemap_engines/tests/README.txt b/modules/simple_sitemap_engines/tests/README.txt new file mode 100644 index 0000000..1e2821a --- /dev/null +++ b/modules/simple_sitemap_engines/tests/README.txt @@ -0,0 +1,10 @@ +This module uses PHPUnit functional tests to follow drupal.org standards. + +To run tests, execute the command below in the web root of your Drupal site, +where 'www-data' is the name of the user your web server runs as, and +'http://localhost/drupal8/web' is the URL to the root of your site. + +sudo -u www-data php core/scripts/run-tests.sh --verbose --sqlite /tmp/test.sqlite --url http://localhost/drupal8/web simple_sitemap_engines + +If you receive an error that sqlite is missing, install the php-sqlite3 Debian +package. diff --git a/modules/simple_sitemap_engines/tests/src/Kernel/SubmitSitemapTest.php b/modules/simple_sitemap_engines/tests/src/Kernel/SubmitSitemapTest.php new file mode 100644 index 0000000..c67de12 --- /dev/null +++ b/modules/simple_sitemap_engines/tests/src/Kernel/SubmitSitemapTest.php @@ -0,0 +1,148 @@ +installEntitySchema('simple_sitemap_engine'); + $this->installConfig('simple_sitemap'); + $this->installConfig('simple_sitemap_engines'); + + $this->cron = \Drupal::service('cron'); + $this->engineStorage = \Drupal::entityTypeManager()->getStorage('simple_sitemap_engine'); + $this->queue = \Drupal::queue('simple_sitemap_engine_submit'); + + // Disable all search engines but one so tests will not fail if additional + // search engines are added in the future. + $engines = $this->engineStorage->loadMultiple(); + foreach ($engines as $id => $engine) { + if ($id != 'google') { + $engine->status = FALSE; + $engine->save(); + } + } + } + + /** + * Tests sitemap submission URLs and last submission status. + */ + public function testSubmission() { + // Create a mock HTTP client. + $http_client = $this->prophesize(ClientInterface::class); + // Make mock HTTP requests always succeed. + $http_client->request('GET', Argument::any())->willReturn(TRUE); + // Replace the default HTTP client service with the mock. + $this->container->set('http_client', $http_client->reveal()); + + // Run cron to trigger submission. + $this->cron->run(); + + $google = $this->engineStorage->load('google'); + $bing = $this->engineStorage->load('bing'); + + // Check that Google was marked as submitted and Bing was not. + $this->assertNotEmpty($google->last_submitted); + $this->assertEmpty($bing->last_submitted); + + // Check that exactly 1 HTTP request was sent to the correct URL. + $http_client->request('GET', 'http://www.google.com/ping?sitemap=http://localhost/default/sitemap.xml')->shouldBeCalled(); + $http_client->request('GET', Argument::any())->shouldBeCalledTimes(1); + } + + /** + * Tests that sitemaps are not submitted every time cron runs. + */ + public function testNoDoubleSubmission() { + // Create a mock HTTP client. + $http_client = $this->prophesize(ClientInterface::class); + // Make mock HTTP requests always succeed. + $http_client->request('GET', Argument::any())->willReturn(TRUE); + // Replace the default HTTP client service with the mock. + $this->container->set('http_client', $http_client->reveal()); + + // Run cron to trigger submission. + $this->cron->run(); + + // Check that Google was submitted and store its last submitted time. + $google = $this->engineStorage->load('google'); + $http_client->request('GET', 'http://www.google.com/ping?sitemap=http://localhost/default/sitemap.xml')->shouldBeCalledTimes(1); + $this->assertNotEmpty($google->last_submitted); + $google_last_submitted = $google->last_submitted; + + // Make sure enough time passes between cron runs to guarantee that they + // do not run within the same second, since timestamps are compared below. + sleep(2); + $this->cron->run(); + $google = $this->engineStorage->load('google'); + + // Check that the last submitted time was not updated on the second cron + // run. + $this->assertEquals($google->last_submitted, $google_last_submitted); + // Check that no duplicate request was sent. + $http_client->request('GET', 'http://www.google.com/ping?sitemap=http://localhost/default/sitemap.xml')->shouldBeCalledTimes(1); + } + + /** + * Tests that failed sitemap submissions are handled properly. + */ + public function testFailedSubmission() { + // Create a mock HTTP client. + $http_client = $this->prophesize(ClientInterface::class); + // Make mock HTTP requests always fail. + $http_client->request('GET', Argument::any())->willThrow(RequestException::class); + // Replace the default HTTP client service with the mock. + $this->container->set('http_client', $http_client->reveal()); + + // Run cron to trigger submission. + $this->cron->run(); + + $google = $this->engineStorage->load('google'); + + // Check that one request was attempted. + $http_client->request('GET', Argument::any())->shouldBeCalledTimes(1); + // Check the last submission time is still empty. + $this->assertEmpty($google->last_submitted); + // Check that the submission was removed from the queue despite failure. + $this->assertEquals(0, $this->queue->numberOfItems()); + } + +} -- GitLab