aboutsummaryrefslogtreecommitdiff
path: root/engine
diff options
context:
space:
mode:
Diffstat (limited to 'engine')
-rw-r--r--engine/lib/output.php25
-rw-r--r--engine/tests/regression/trac_bugs.php52
2 files changed, 67 insertions, 10 deletions
diff --git a/engine/lib/output.php b/engine/lib/output.php
index c5a04989b..6905b9b71 100644
--- a/engine/lib/output.php
+++ b/engine/lib/output.php
@@ -13,28 +13,33 @@
* @param string $text The input string
*
* @return string The output string with formatted links
- **/
+ */
function parse_urls($text) {
+
+ // URI specification: http://www.ietf.org/rfc/rfc3986.txt
+ // This varies from the specification in the following ways:
+ // * Supports non-ascii characters
+ // * Does not allow parentheses and single quotes
+ // * Cuts off commas, exclamation points, and periods off as last character
+
// @todo this causes problems with <attr = "val">
// must be in <attr="val"> format (no space).
// By default htmlawed rewrites tags to this format.
// if PHP supported conditional negative lookbehinds we could use this:
// $r = preg_replace_callback('/(?<!=)(?<![ ])?(?<!["\'])((ht|f)tps?:\/\/[^\s\r\n\t<>"\'\!\(\),]+)/i',
- //
- // we can put , in the list of excluded char but need to keep . because of domain names.
- // it is removed in the callback.
- $r = preg_replace_callback('/(?<!=)(?<!["\'])((ht|f)tps?:\/\/[^\s\r\n\t<>"\'\!\(\),]+)/i',
+ $r = preg_replace_callback('/(?<!=)(?<!["\'])((ht|f)tps?:\/\/[^\s\r\n\t<>"\'\(\)]+)/i',
create_function(
'$matches',
'
$url = $matches[1];
- $period = \'\';
- if (substr($url, -1, 1) == \'.\') {
- $period = \'.\';
- $url = trim($url, \'.\');
+ $punc = \'\';
+ $last = substr($url, -1, 1);
+ if (in_array($last, array(".", "!", ","))) {
+ $punc = $last;
+ $url = rtrim($url, ".!,");
}
$urltext = str_replace("/", "/<wbr />", $url);
- return "<a href=\"$url\">$urltext</a>$period";
+ return "<a href=\"$url\" rel=\"nofollow\">$urltext</a>$punc";
'
), $text);
diff --git a/engine/tests/regression/trac_bugs.php b/engine/tests/regression/trac_bugs.php
index 58444dd39..4de9c306b 100644
--- a/engine/tests/regression/trac_bugs.php
+++ b/engine/tests/regression/trac_bugs.php
@@ -236,4 +236,56 @@ class ElggCoreRegressionBugsTest extends ElggCoreUnitTest {
$this->assertIdentical($expected, $friendly_title);
}
}
+
+ /**
+ * Test #5369 -- parse_urls()
+ * https://github.com/Elgg/Elgg/issues/5369
+ */
+ public function test_parse_urls() {
+
+ $cases = array(
+ 'no.link.here' =>
+ 'no.link.here',
+ 'simple link http://example.org test' =>
+ 'simple link <a href="http://example.org" rel="nofollow">http:/<wbr />/<wbr />example.org</a> test',
+ 'non-ascii http://ñew.org/ test' =>
+ 'non-ascii <a href="http://ñew.org/" rel="nofollow">http:/<wbr />/<wbr />ñew.org/<wbr /></a> test',
+
+ // section 2.1
+ 'percent encoded http://example.org/a%20b test' =>
+ 'percent encoded <a href="http://example.org/a%20b" rel="nofollow">http:/<wbr />/<wbr />example.org/<wbr />a%20b</a> test',
+ // section 2.2: skipping single quote and parenthese
+ 'reserved characters http://example.org/:/?#[]@!$&*+,;= test' =>
+ 'reserved characters <a href="http://example.org/:/?#[]@!$&*+,;=" rel="nofollow">http:/<wbr />/<wbr />example.org/<wbr />:/<wbr />?#[]@!$&*+,;=</a> test',
+ // section 2.3
+ 'unreserved characters http://example.org/a1-._~ test' =>
+ 'unreserved characters <a href="http://example.org/a1-._~" rel="nofollow">http:/<wbr />/<wbr />example.org/<wbr />a1-._~</a> test',
+
+ 'parameters http://example.org/?val[]=1&val[]=2 test' =>
+ 'parameters <a href="http://example.org/?val[]=1&val[]=2" rel="nofollow">http:/<wbr />/<wbr />example.org/<wbr />?val[]=1&val[]=2</a> test',
+ 'port http://example.org:80/ test' =>
+ 'port <a href="http://example.org:80/" rel="nofollow">http:/<wbr />/<wbr />example.org:80/<wbr /></a> test',
+
+ 'parentheses (http://www.google.com) test' =>
+ 'parentheses (<a href="http://www.google.com" rel="nofollow">http:/<wbr />/<wbr />www.google.com</a>) test',
+ 'comma http://elgg.org, test' =>
+ 'comma <a href="http://elgg.org" rel="nofollow">http:/<wbr />/<wbr />elgg.org</a>, test',
+ 'period http://elgg.org. test' =>
+ 'period <a href="http://elgg.org" rel="nofollow">http:/<wbr />/<wbr />elgg.org</a>. test',
+ 'exclamation http://elgg.org! test' =>
+ 'exclamation <a href="http://elgg.org" rel="nofollow">http:/<wbr />/<wbr />elgg.org</a>! test',
+
+ 'already anchor <a href="http://twitter.com/">twitter</a> test' =>
+ 'already anchor <a href="http://twitter.com/">twitter</a> test',
+
+ 'ssl https://example.org/ test' =>
+ 'ssl <a href="https://example.org/" rel="nofollow">https:/<wbr />/<wbr />example.org/<wbr /></a> test',
+ 'ftp ftp://example.org/ test' =>
+ 'ftp <a href="ftp://example.org/" rel="nofollow">ftp:/<wbr />/<wbr />example.org/<wbr /></a> test',
+
+ );
+ foreach ($cases as $input => $output) {
+ $this->assertEqual($output, parse_urls($input));
+ }
+ }
}