diff options
-rw-r--r-- | engine/lib/output.php | 25 | ||||
-rw-r--r-- | engine/tests/regression/trac_bugs.php | 52 |
2 files changed, 67 insertions, 10 deletions
diff --git a/engine/lib/output.php b/engine/lib/output.php index c5a04989b..6905b9b71 100644 --- a/engine/lib/output.php +++ b/engine/lib/output.php @@ -13,28 +13,33 @@ * @param string $text The input string * * @return string The output string with formatted links - **/ + */ function parse_urls($text) { + + // URI specification: http://www.ietf.org/rfc/rfc3986.txt + // This varies from the specification in the following ways: + // * Supports non-ascii characters + // * Does not allow parentheses and single quotes + // * Cuts off commas, exclamation points, and periods off as last character + // @todo this causes problems with <attr = "val"> // must be in <attr="val"> format (no space). // By default htmlawed rewrites tags to this format. // if PHP supported conditional negative lookbehinds we could use this: // $r = preg_replace_callback('/(?<!=)(?<![ ])?(?<!["\'])((ht|f)tps?:\/\/[^\s\r\n\t<>"\'\!\(\),]+)/i', - // - // we can put , in the list of excluded char but need to keep . because of domain names. - // it is removed in the callback. - $r = preg_replace_callback('/(?<!=)(?<!["\'])((ht|f)tps?:\/\/[^\s\r\n\t<>"\'\!\(\),]+)/i', + $r = preg_replace_callback('/(?<!=)(?<!["\'])((ht|f)tps?:\/\/[^\s\r\n\t<>"\'\(\)]+)/i', create_function( '$matches', ' $url = $matches[1]; - $period = \'\'; - if (substr($url, -1, 1) == \'.\') { - $period = \'.\'; - $url = trim($url, \'.\'); + $punc = \'\'; + $last = substr($url, -1, 1); + if (in_array($last, array(".", "!", ","))) { + $punc = $last; + $url = rtrim($url, ".!,"); } $urltext = str_replace("/", "/<wbr />", $url); - return "<a href=\"$url\">$urltext</a>$period"; + return "<a href=\"$url\" rel=\"nofollow\">$urltext</a>$punc"; ' ), $text); diff --git a/engine/tests/regression/trac_bugs.php b/engine/tests/regression/trac_bugs.php index 58444dd39..4de9c306b 100644 --- a/engine/tests/regression/trac_bugs.php +++ b/engine/tests/regression/trac_bugs.php @@ -236,4 +236,56 @@ class ElggCoreRegressionBugsTest extends ElggCoreUnitTest { $this->assertIdentical($expected, $friendly_title); } } + + /** + * Test #5369 -- parse_urls() + * https://github.com/Elgg/Elgg/issues/5369 + */ + public function test_parse_urls() { + + $cases = array( + 'no.link.here' => + 'no.link.here', + 'simple link http://example.org test' => + 'simple link <a href="http://example.org" rel="nofollow">http:/<wbr />/<wbr />example.org</a> test', + 'non-ascii http://ñew.org/ test' => + 'non-ascii <a href="http://ñew.org/" rel="nofollow">http:/<wbr />/<wbr />ñew.org/<wbr /></a> test', + + // section 2.1 + 'percent encoded http://example.org/a%20b test' => + 'percent encoded <a href="http://example.org/a%20b" rel="nofollow">http:/<wbr />/<wbr />example.org/<wbr />a%20b</a> test', + // section 2.2: skipping single quote and parenthese + 'reserved characters http://example.org/:/?#[]@!$&*+,;= test' => + 'reserved characters <a href="http://example.org/:/?#[]@!$&*+,;=" rel="nofollow">http:/<wbr />/<wbr />example.org/<wbr />:/<wbr />?#[]@!$&*+,;=</a> test', + // section 2.3 + 'unreserved characters http://example.org/a1-._~ test' => + 'unreserved characters <a href="http://example.org/a1-._~" rel="nofollow">http:/<wbr />/<wbr />example.org/<wbr />a1-._~</a> test', + + 'parameters http://example.org/?val[]=1&val[]=2 test' => + 'parameters <a href="http://example.org/?val[]=1&val[]=2" rel="nofollow">http:/<wbr />/<wbr />example.org/<wbr />?val[]=1&val[]=2</a> test', + 'port http://example.org:80/ test' => + 'port <a href="http://example.org:80/" rel="nofollow">http:/<wbr />/<wbr />example.org:80/<wbr /></a> test', + + 'parentheses (http://www.google.com) test' => + 'parentheses (<a href="http://www.google.com" rel="nofollow">http:/<wbr />/<wbr />www.google.com</a>) test', + 'comma http://elgg.org, test' => + 'comma <a href="http://elgg.org" rel="nofollow">http:/<wbr />/<wbr />elgg.org</a>, test', + 'period http://elgg.org. test' => + 'period <a href="http://elgg.org" rel="nofollow">http:/<wbr />/<wbr />elgg.org</a>. test', + 'exclamation http://elgg.org! test' => + 'exclamation <a href="http://elgg.org" rel="nofollow">http:/<wbr />/<wbr />elgg.org</a>! test', + + 'already anchor <a href="http://twitter.com/">twitter</a> test' => + 'already anchor <a href="http://twitter.com/">twitter</a> test', + + 'ssl https://example.org/ test' => + 'ssl <a href="https://example.org/" rel="nofollow">https:/<wbr />/<wbr />example.org/<wbr /></a> test', + 'ftp ftp://example.org/ test' => + 'ftp <a href="ftp://example.org/" rel="nofollow">ftp:/<wbr />/<wbr />example.org/<wbr /></a> test', + + ); + foreach ($cases as $input => $output) { + $this->assertEqual($output, parse_urls($input)); + } + } } |