From 834c4ad0bf82f28949b108eb6c957fde3c18e1ce Mon Sep 17 00:00:00 2001 From: Cash Costello Date: Sat, 20 Apr 2013 11:07:44 -0400 Subject: Fixes #5369 allows ! in urls and adds unit tests --- engine/lib/output.php | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'engine/lib') diff --git a/engine/lib/output.php b/engine/lib/output.php index c5a04989b..fe5bbcaaf 100644 --- a/engine/lib/output.php +++ b/engine/lib/output.php @@ -13,28 +13,33 @@ * @param string $text The input string * * @return string The output string with formatted links - **/ + */ function parse_urls($text) { + + // URI specification: http://www.ietf.org/rfc/rfc3986.txt + // This varies from the specification in the following ways: + // * Supports non-ascii characters + // * Does not allow parentheses and single quotes + // * Cuts off commas, exclamation points, and periods off as last character + // @todo this causes problems with // must be in format (no space). // By default htmlawed rewrites tags to this format. // if PHP supported conditional negative lookbehinds we could use this: // $r = preg_replace_callback('/(?"\'\!\(\),]+)/i', - // - // we can put , in the list of excluded char but need to keep . because of domain names. - // it is removed in the callback. - $r = preg_replace_callback('/(?"\'\!\(\),]+)/i', + $r = preg_replace_callback('/(?"\'\(\)]+)/i', create_function( '$matches', ' $url = $matches[1]; - $period = \'\'; - if (substr($url, -1, 1) == \'.\') { - $period = \'.\'; - $url = trim($url, \'.\'); + $punc = \'\'; + $last = substr($url, -1, 1); + if (in_array($last, array(".", "!", ","))) { + $punc = $last; + $url = rtrim($url, ".!,"); } $urltext = str_replace("/", "/", $url); - return "$urltext$period"; + return "$urltext$punc"; ' ), $text); -- cgit v1.2.3 From 164ff10b46d5917c7ab6ad068abf10e492464691 Mon Sep 17 00:00:00 2001 From: cash Date: Sat, 20 Apr 2013 12:08:27 -0400 Subject: Fixes #5244 adds nofollow to anchor tags created by parse_urls() --- engine/lib/output.php | 2 +- engine/tests/regression/trac_bugs.php | 26 +++++++++++++------------- 2 files changed, 14 insertions(+), 14 deletions(-) (limited to 'engine/lib') diff --git a/engine/lib/output.php b/engine/lib/output.php index fe5bbcaaf..6905b9b71 100644 --- a/engine/lib/output.php +++ b/engine/lib/output.php @@ -39,7 +39,7 @@ function parse_urls($text) { $url = rtrim($url, ".!,"); } $urltext = str_replace("/", "/", $url); - return "$urltext$punc"; + return "$urltext$punc"; ' ), $text); diff --git a/engine/tests/regression/trac_bugs.php b/engine/tests/regression/trac_bugs.php index 83b78bc6b..4de9c306b 100644 --- a/engine/tests/regression/trac_bugs.php +++ b/engine/tests/regression/trac_bugs.php @@ -247,41 +247,41 @@ class ElggCoreRegressionBugsTest extends ElggCoreUnitTest { 'no.link.here' => 'no.link.here', 'simple link http://example.org test' => - 'simple link http://example.org test', + 'simple link http://example.org test', 'non-ascii http://ñew.org/ test' => - 'non-ascii http://ñew.org/ test', + 'non-ascii http://ñew.org/ test', // section 2.1 'percent encoded http://example.org/a%20b test' => - 'percent encoded http://example.org/a%20b test', + 'percent encoded http://example.org/a%20b test', // section 2.2: skipping single quote and parenthese 'reserved characters http://example.org/:/?#[]@!$&*+,;= test' => - 'reserved characters http://example.org/:/?#[]@!$&*+,;= test', + 'reserved characters http://example.org/:/?#[]@!$&*+,;= test', // section 2.3 'unreserved characters http://example.org/a1-._~ test' => - 'unreserved characters http://example.org/a1-._~ test', + 'unreserved characters http://example.org/a1-._~ test', 'parameters http://example.org/?val[]=1&val[]=2 test' => - 'parameters http://example.org/?val[]=1&val[]=2 test', + 'parameters http://example.org/?val[]=1&val[]=2 test', 'port http://example.org:80/ test' => - 'port http://example.org:80/ test', + 'port http://example.org:80/ test', 'parentheses (http://www.google.com) test' => - 'parentheses (http://www.google.com) test', + 'parentheses (http://www.google.com) test', 'comma http://elgg.org, test' => - 'comma http://elgg.org, test', + 'comma http://elgg.org, test', 'period http://elgg.org. test' => - 'period http://elgg.org. test', + 'period http://elgg.org. test', 'exclamation http://elgg.org! test' => - 'exclamation http://elgg.org! test', + 'exclamation http://elgg.org! test', 'already anchor twitter test' => 'already anchor twitter test', 'ssl https://example.org/ test' => - 'ssl https://example.org/ test', + 'ssl https://example.org/ test', 'ftp ftp://example.org/ test' => - 'ftp ftp://example.org/ test', + 'ftp ftp://example.org/ test', ); foreach ($cases as $input => $output) { -- cgit v1.2.3