From 8ff4d48c9ab124356439b7f18ae21f457204736d Mon Sep 17 00:00:00 2001 From: Alex Pott <alex.a.pott@googlemail.com> Date: Sun, 12 Sep 2021 18:07:29 +0100 Subject: [PATCH] Issue #2016739 by richardbporter, larowlan, aalamaki, afox, wroxbox, mark_fullmer, mohit_aghera, ayushmishra206, rakesh.gectcr, NikolaAt, rteijeiro, tanubansal, amietpatial, jibran, alexpott, Wim Leers: Links with "@" are converted into email addresses even if there is no domain suffix present --- core/modules/filter/filter.module | 9 ++++++--- core/modules/filter/tests/filter.url-input.txt | 2 ++ core/modules/filter/tests/filter.url-output.txt | 2 ++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/core/modules/filter/filter.module b/core/modules/filter/filter.module index 15aad887308d..77e5ff0a189f 100644 --- a/core/modules/filter/filter.module +++ b/core/modules/filter/filter.module @@ -490,9 +490,12 @@ function _filter_url($text, $filter) { // Prepare domain name pattern. // The ICANN seems to be on track towards accepting more diverse top level - // domains, so this pattern has been "future-proofed" to allow for TLDs - // of length 2-64. + // domains (TLDs), so this pattern has been "future-proofed" to allow for + // TLDs of length 2-64. $domain = '(?:[\p{L}\p{M}\p{N}._+-]+\.)?[\p{L}\p{M}]{2,64}\b'; + // Mail domains differ from the generic domain pattern, specifically: + // A . character must be present in the string that follows the @ character. + $email_domain = '(?:[\p{L}\p{M}\p{N}._+-]+\.)+[\p{L}\p{M}]{2,64}\b'; $ip = '(?:[0-9]{1,3}\.){3}[0-9]{1,3}'; $auth = '[\p{L}\p{M}\p{N}:%_+*~#?&=.,/;-]+@'; $trail = '(' . $valid_url_path . '*)?(\\?' . $valid_url_query_chars . '*' . $valid_url_query_ending_chars . ')?'; @@ -503,7 +506,7 @@ function _filter_url($text, $filter) { $tasks['_filter_url_parse_full_links'] = $pattern; // Match email addresses. - $url_pattern = "[\p{L}\p{M}\p{N}._+-]{1,254}@(?:$domain)"; + $url_pattern = "[\p{L}\p{M}\p{N}._+-]{1,254}@(?:$email_domain)"; $pattern = "`($url_pattern)`u"; $tasks['_filter_url_parse_email_links'] = $pattern; diff --git a/core/modules/filter/tests/filter.url-input.txt b/core/modules/filter/tests/filter.url-input.txt index 92289dcfd169..b41adb3f0392 100644 --- a/core/modules/filter/tests/filter.url-input.txt +++ b/core/modules/filter/tests/filter.url-input.txt @@ -9,6 +9,7 @@ This is just a www.test.com. paragraph with person@test.com. some http://www.tes http://www.test.com www.test.com person@test.com +person@test <code>www.test.com</code> http://test.com/?search=test http://test.com/?search=Test @@ -28,6 +29,7 @@ The old URL filter has problems with <a title="kind of link www.example.com with <dt>www.test.com</dt> <dd>http://www.test.com</dd> <dd>person@test.com</dd> +<dd>person@test</dd> <dt>check www.test.com</dt> <dd>this with some text around: http://www.test.com not so easy person@test.com now?</dd> </dl> diff --git a/core/modules/filter/tests/filter.url-output.txt b/core/modules/filter/tests/filter.url-output.txt index 814a4ed71766..19acee24a7fb 100644 --- a/core/modules/filter/tests/filter.url-output.txt +++ b/core/modules/filter/tests/filter.url-output.txt @@ -9,6 +9,7 @@ This is just a <a href="http://www.test.com">www.test.com</a>. paragraph with <a <a href="http://www.test.com">http://www.test.com</a> <a href="http://www.test.com">www.test.com</a> <a href="mailto:person@test.com">person@test.com</a> +person@test <code>www.test.com</code> <a href="http://test.com/?search=test">http://test.com/?search=test</a> <a href="http://test.com/?search=Test">http://test.com/?search=Test</a> @@ -28,6 +29,7 @@ The old URL filter has problems with <a title="kind of link www.example.com with <dt><a href="http://www.test.com">www.test.com</a></dt> <dd><a href="http://www.test.com">http://www.test.com</a></dd> <dd><a href="mailto:person@test.com">person@test.com</a></dd> +<dd>person@test</dd> <dt>check <a href="http://www.test.com">www.test.com</a></dt> <dd>this with some text around: <a href="http://www.test.com">http://www.test.com</a> not so easy <a href="mailto:person@test.com">person@test.com</a> now?</dd> </dl> -- GitLab