From 5ccc0540fd28cc1620ffca10e3aed92319e78794 Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Sun, 31 Mar 2013 21:09:07 -0400 Subject: Fixes #4972: More robust friendly titles implementation --- engine/lib/output.php | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'engine/lib/output.php') diff --git a/engine/lib/output.php b/engine/lib/output.php index da8e1ab86..c5a04989b 100644 --- a/engine/lib/output.php +++ b/engine/lib/output.php @@ -284,11 +284,9 @@ function elgg_get_friendly_title($title) { return $result; } - // handle some special cases - $title = str_replace('&', 'and', $title); - // quotes and angle brackets stored in the database as html encoded - $title = htmlspecialchars_decode($title); - + // titles are often stored HTML encoded + $title = html_entity_decode($title, ENT_QUOTES, 'UTF-8'); + $title = ElggTranslit::urlize($title); return $title; -- cgit v1.2.3 From 834c4ad0bf82f28949b108eb6c957fde3c18e1ce Mon Sep 17 00:00:00 2001 From: Cash Costello Date: Sat, 20 Apr 2013 11:07:44 -0400 Subject: Fixes #5369 allows ! in urls and adds unit tests --- engine/lib/output.php | 25 ++++++++++------- engine/tests/regression/trac_bugs.php | 52 +++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 10 deletions(-) (limited to 'engine/lib/output.php') diff --git a/engine/lib/output.php b/engine/lib/output.php index c5a04989b..fe5bbcaaf 100644 --- a/engine/lib/output.php +++ b/engine/lib/output.php @@ -13,28 +13,33 @@ * @param string $text The input string * * @return string The output string with formatted links - **/ + */ function parse_urls($text) { + + // URI specification: http://www.ietf.org/rfc/rfc3986.txt + // This varies from the specification in the following ways: + // * Supports non-ascii characters + // * Does not allow parentheses and single quotes + // * Cuts off commas, exclamation points, and periods off as last character + // @todo this causes problems with // must be in format (no space). // By default htmlawed rewrites tags to this format. // if PHP supported conditional negative lookbehinds we could use this: // $r = preg_replace_callback('/(?"\'\!\(\),]+)/i', - // - // we can put , in the list of excluded char but need to keep . because of domain names. - // it is removed in the callback. - $r = preg_replace_callback('/(?"\'\!\(\),]+)/i', + $r = preg_replace_callback('/(?"\'\(\)]+)/i', create_function( '$matches', ' $url = $matches[1]; - $period = \'\'; - if (substr($url, -1, 1) == \'.\') { - $period = \'.\'; - $url = trim($url, \'.\'); + $punc = \'\'; + $last = substr($url, -1, 1); + if (in_array($last, array(".", "!", ","))) { + $punc = $last; + $url = rtrim($url, ".!,"); } $urltext = str_replace("/", "/", $url); - return "$urltext$period"; + return "$urltext$punc"; ' ), $text); diff --git a/engine/tests/regression/trac_bugs.php b/engine/tests/regression/trac_bugs.php index 58444dd39..83b78bc6b 100644 --- a/engine/tests/regression/trac_bugs.php +++ b/engine/tests/regression/trac_bugs.php @@ -236,4 +236,56 @@ class ElggCoreRegressionBugsTest extends ElggCoreUnitTest { $this->assertIdentical($expected, $friendly_title); } } + + /** + * Test #5369 -- parse_urls() + * https://github.com/Elgg/Elgg/issues/5369 + */ + public function test_parse_urls() { + + $cases = array( + 'no.link.here' => + 'no.link.here', + 'simple link http://example.org test' => + 'simple link http://example.org test', + 'non-ascii http://ñew.org/ test' => + 'non-ascii http://ñew.org/ test', + + // section 2.1 + 'percent encoded http://example.org/a%20b test' => + 'percent encoded http://example.org/a%20b test', + // section 2.2: skipping single quote and parenthese + 'reserved characters http://example.org/:/?#[]@!$&*+,;= test' => + 'reserved characters http://example.org/:/?#[]@!$&*+,;= test', + // section 2.3 + 'unreserved characters http://example.org/a1-._~ test' => + 'unreserved characters http://example.org/a1-._~ test', + + 'parameters http://example.org/?val[]=1&val[]=2 test' => + 'parameters http://example.org/?val[]=1&val[]=2 test', + 'port http://example.org:80/ test' => + 'port http://example.org:80/ test', + + 'parentheses (http://www.google.com) test' => + 'parentheses (http://www.google.com) test', + 'comma http://elgg.org, test' => + 'comma http://elgg.org, test', + 'period http://elgg.org. test' => + 'period http://elgg.org. test', + 'exclamation http://elgg.org! test' => + 'exclamation http://elgg.org! test', + + 'already anchor twitter test' => + 'already anchor twitter test', + + 'ssl https://example.org/ test' => + 'ssl https://example.org/ test', + 'ftp ftp://example.org/ test' => + 'ftp ftp://example.org/ test', + + ); + foreach ($cases as $input => $output) { + $this->assertEqual($output, parse_urls($input)); + } + } } -- cgit v1.2.3 From 164ff10b46d5917c7ab6ad068abf10e492464691 Mon Sep 17 00:00:00 2001 From: cash Date: Sat, 20 Apr 2013 12:08:27 -0400 Subject: Fixes #5244 adds nofollow to anchor tags created by parse_urls() --- engine/lib/output.php | 2 +- engine/tests/regression/trac_bugs.php | 26 +++++++++++++------------- 2 files changed, 14 insertions(+), 14 deletions(-) (limited to 'engine/lib/output.php') diff --git a/engine/lib/output.php b/engine/lib/output.php index fe5bbcaaf..6905b9b71 100644 --- a/engine/lib/output.php +++ b/engine/lib/output.php @@ -39,7 +39,7 @@ function parse_urls($text) { $url = rtrim($url, ".!,"); } $urltext = str_replace("/", "/", $url); - return "$urltext$punc"; + return "$urltext$punc"; ' ), $text); diff --git a/engine/tests/regression/trac_bugs.php b/engine/tests/regression/trac_bugs.php index 83b78bc6b..4de9c306b 100644 --- a/engine/tests/regression/trac_bugs.php +++ b/engine/tests/regression/trac_bugs.php @@ -247,41 +247,41 @@ class ElggCoreRegressionBugsTest extends ElggCoreUnitTest { 'no.link.here' => 'no.link.here', 'simple link http://example.org test' => - 'simple link http://example.org test', + 'simple link http://example.org test', 'non-ascii http://ñew.org/ test' => - 'non-ascii http://ñew.org/ test', + 'non-ascii http://ñew.org/ test', // section 2.1 'percent encoded http://example.org/a%20b test' => - 'percent encoded http://example.org/a%20b test', + 'percent encoded http://example.org/a%20b test', // section 2.2: skipping single quote and parenthese 'reserved characters http://example.org/:/?#[]@!$&*+,;= test' => - 'reserved characters http://example.org/:/?#[]@!$&*+,;= test', + 'reserved characters http://example.org/:/?#[]@!$&*+,;= test', // section 2.3 'unreserved characters http://example.org/a1-._~ test' => - 'unreserved characters http://example.org/a1-._~ test', + 'unreserved characters http://example.org/a1-._~ test', 'parameters http://example.org/?val[]=1&val[]=2 test' => - 'parameters http://example.org/?val[]=1&val[]=2 test', + 'parameters http://example.org/?val[]=1&val[]=2 test', 'port http://example.org:80/ test' => - 'port http://example.org:80/ test', + 'port http://example.org:80/ test', 'parentheses (http://www.google.com) test' => - 'parentheses (http://www.google.com) test', + 'parentheses (http://www.google.com) test', 'comma http://elgg.org, test' => - 'comma http://elgg.org, test', + 'comma http://elgg.org, test', 'period http://elgg.org. test' => - 'period http://elgg.org. test', + 'period http://elgg.org. test', 'exclamation http://elgg.org! test' => - 'exclamation http://elgg.org! test', + 'exclamation http://elgg.org! test', 'already anchor twitter test' => 'already anchor twitter test', 'ssl https://example.org/ test' => - 'ssl https://example.org/ test', + 'ssl https://example.org/ test', 'ftp ftp://example.org/ test' => - 'ftp ftp://example.org/ test', + 'ftp ftp://example.org/ test', ); foreach ($cases as $input => $output) { -- cgit v1.2.3 From cd7922c58f3f2d04b8ca4bcf336ecda2787c821e Mon Sep 17 00:00:00 2001 From: Jeff Tilson Date: Mon, 29 Apr 2013 13:38:42 -0400 Subject: Fixes #2057 (broken internet archive links) --- engine/lib/output.php | 2 +- engine/tests/regression/trac_bugs.php | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'engine/lib/output.php') diff --git a/engine/lib/output.php b/engine/lib/output.php index 6905b9b71..5adc01053 100644 --- a/engine/lib/output.php +++ b/engine/lib/output.php @@ -27,7 +27,7 @@ function parse_urls($text) { // By default htmlawed rewrites tags to this format. // if PHP supported conditional negative lookbehinds we could use this: // $r = preg_replace_callback('/(?"\'\!\(\),]+)/i', - $r = preg_replace_callback('/(?"\'\(\)]+)/i', + $r = preg_replace_callback('/(?"\'\(\)]+)/i', create_function( '$matches', ' diff --git a/engine/tests/regression/trac_bugs.php b/engine/tests/regression/trac_bugs.php index 4de9c306b..b791dcad3 100644 --- a/engine/tests/regression/trac_bugs.php +++ b/engine/tests/regression/trac_bugs.php @@ -288,4 +288,13 @@ class ElggCoreRegressionBugsTest extends ElggCoreUnitTest { $this->assertEqual($output, parse_urls($input)); } } + + /** + * Test #2057 -- parse_urls() + * https://github.com/Elgg/Elgg/issues/2057 + */ + public function test_archive_url() { + $input = 'google'; + $this->assertEqual($input, parse_urls($input)); + } } -- cgit v1.2.3 From 9892692deefdb06d9e7176c72fc5780ab79e3a7d Mon Sep 17 00:00:00 2001 From: Brett Profitt Date: Tue, 9 Jul 2013 12:13:17 -0400 Subject: Fixes #5706. Allowing parens in URIs if not last character. --- engine/lib/output.php | 8 ++++---- engine/tests/regression/trac_bugs.php | 3 +++ 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'engine/lib/output.php') diff --git a/engine/lib/output.php b/engine/lib/output.php index 5adc01053..6172a5c8d 100644 --- a/engine/lib/output.php +++ b/engine/lib/output.php @@ -27,16 +27,16 @@ function parse_urls($text) { // By default htmlawed rewrites tags to this format. // if PHP supported conditional negative lookbehinds we could use this: // $r = preg_replace_callback('/(?"\'\!\(\),]+)/i', - $r = preg_replace_callback('/(?"\'\(\)]+)/i', + $r = preg_replace_callback('/(?"\']+)/i', create_function( '$matches', ' $url = $matches[1]; - $punc = \'\'; + $punc = ""; $last = substr($url, -1, 1); - if (in_array($last, array(".", "!", ","))) { + if (in_array($last, array(".", "!", ",", "(", ")"))) { $punc = $last; - $url = rtrim($url, ".!,"); + $url = rtrim($url, ".!,()"); } $urltext = str_replace("/", "/", $url); return "$urltext$punc"; diff --git a/engine/tests/regression/trac_bugs.php b/engine/tests/regression/trac_bugs.php index 5730830bb..f173b5b9f 100644 --- a/engine/tests/regression/trac_bugs.php +++ b/engine/tests/regression/trac_bugs.php @@ -291,6 +291,9 @@ class ElggCoreRegressionBugsTest extends ElggCoreUnitTest { 'unquoted already anchor yahoo' => 'unquoted already anchor yahoo', + + 'parens in uri http://thedailywtf.com/Articles/A-(Long-Overdue)-BuildMaster-Introduction.aspx' => + 'parens in uri http://thedailywtf.com/Articles/A-(Long-Overdue)-BuildMaster-Introduction.aspx' ); foreach ($cases as $input => $output) { $this->assertEqual($output, parse_urls($input)); -- cgit v1.2.3 From c1ea910e3b3b0bcc27a214383c9f6355a05dd495 Mon Sep 17 00:00:00 2001 From: Paweł Sroka Date: Thu, 12 Sep 2013 05:59:18 +0200 Subject: Added function for escaping query strings and fixed several XSRF vulnerabilities. --- engine/lib/output.php | 19 +++++++++++++++++++ mod/groups/lib/groups.php | 3 ++- mod/members/pages/members/search.php | 8 ++++++-- mod/search/pages/search/index.php | 10 +--------- 4 files changed, 28 insertions(+), 12 deletions(-) (limited to 'engine/lib/output.php') diff --git a/engine/lib/output.php b/engine/lib/output.php index 6172a5c8d..de4f911fb 100644 --- a/engine/lib/output.php +++ b/engine/lib/output.php @@ -420,6 +420,25 @@ function _elgg_html_decode($string) { return $string; } +/** + * Prepares query string for output to prevent CSRF attacks. + * + * @param string $string + * @return string + * + * @access private + */ +function _elgg_get_display_query($string) { + //encode <,>,&, quotes and characters above 127 + if (function_exists('mb_convert_encoding')) { + $display_query = mb_convert_encoding($string, 'HTML-ENTITIES', 'UTF-8'); + } else { + // if no mbstring extension, we just strip characters + $display_query = preg_replace("/[^\x01-\x7F]/", "", $string); + } + return htmlspecialchars($display_query, ENT_QUOTES, 'UTF-8', false); +} + /** * Unit tests for Output * diff --git a/mod/groups/lib/groups.php b/mod/groups/lib/groups.php index 77d7c09cc..aa8766e06 100644 --- a/mod/groups/lib/groups.php +++ b/mod/groups/lib/groups.php @@ -73,7 +73,8 @@ function groups_search_page() { elgg_push_breadcrumb(elgg_echo('search')); $tag = get_input("tag"); - $title = elgg_echo('groups:search:title', array($tag)); + $display_query = _elgg_get_display_query($tag); + $title = elgg_echo('groups:search:title', array($display_query)); // groups plugin saves tags as "interests" - see groups_fields_setup() in start.php $params = array( diff --git a/mod/members/pages/members/search.php b/mod/members/pages/members/search.php index 1f0444d67..5466a8246 100644 --- a/mod/members/pages/members/search.php +++ b/mod/members/pages/members/search.php @@ -7,7 +7,9 @@ if ($vars['search_type'] == 'tag') { $tag = get_input('tag'); - $title = elgg_echo('members:title:searchtag', array($tag)); + $display_query = _elgg_get_display_query($tag); + + $title = elgg_echo('members:title:searchtag', array($display_query)); $options = array(); $options['query'] = $tag; @@ -28,7 +30,9 @@ if ($vars['search_type'] == 'tag') { } else { $name = sanitize_string(get_input('name')); - $title = elgg_echo('members:title:searchname', array($name)); + $display_query = _elgg_get_display_query($name); + + $title = elgg_echo('members:title:searchname', array($display_query)); $db_prefix = elgg_get_config('dbprefix'); $params = array( diff --git a/mod/search/pages/search/index.php b/mod/search/pages/search/index.php index ede09329b..9542e0751 100644 --- a/mod/search/pages/search/index.php +++ b/mod/search/pages/search/index.php @@ -17,15 +17,7 @@ $search_type = get_input('search_type', 'all'); // XSS protection is more important that searching for HTML. $query = stripslashes(get_input('q', get_input('tag', ''))); -// @todo - create function for sanitization of strings for display in 1.8 -// encode <,>,&, quotes and characters above 127 -if (function_exists('mb_convert_encoding')) { - $display_query = mb_convert_encoding($query, 'HTML-ENTITIES', 'UTF-8'); -} else { - // if no mbstring extension, we just strip characters - $display_query = preg_replace("/[^\x01-\x7F]/", "", $query); -} -$display_query = htmlspecialchars($display_query, ENT_QUOTES, 'UTF-8', false); +$display_query = _elgg_get_display_query($query); // check that we have an actual query if (!$query) { -- cgit v1.2.3