From 5ccc0540fd28cc1620ffca10e3aed92319e78794 Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Sun, 31 Mar 2013 21:09:07 -0400 Subject: Fixes #4972: More robust friendly titles implementation --- engine/classes/ElggTranslit.php | 15 ++++++++++++--- engine/lib/output.php | 8 +++----- engine/tests/regression/trac_bugs.php | 16 +++++++++------- 3 files changed, 24 insertions(+), 15 deletions(-) diff --git a/engine/classes/ElggTranslit.php b/engine/classes/ElggTranslit.php index 601965c11..4ae1d2479 100644 --- a/engine/classes/ElggTranslit.php +++ b/engine/classes/ElggTranslit.php @@ -49,10 +49,19 @@ class ElggTranslit { // Internationalization, AND 日本語! $string = self::transliterateAscii($string); - // more translation + // allow HTML tags in titles + $string = preg_replace('~<([a-zA-Z][^>]*)>~', ' $1 ', $string); + + // more substitutions + // @todo put these somewhere else $string = strtr($string, array( - // Euro/GBP - "\xE2\x82\xAC" /* € */ => 'E', "\xC2\xA3" /* £ */ => 'GBP', + // currency + "\xE2\x82\xAC" /* € */ => ' E ', + "\xC2\xA3" /* £ */ => ' GBP ', + + "&" => ' and ', + ">" => ' greater than ', + "<" => ' less than ', )); // remove all ASCII except 0-9a-zA-Z, hyphen, underscore, and whitespace diff --git a/engine/lib/output.php b/engine/lib/output.php index da8e1ab86..c5a04989b 100644 --- a/engine/lib/output.php +++ b/engine/lib/output.php @@ -284,11 +284,9 @@ function elgg_get_friendly_title($title) { return $result; } - // handle some special cases - $title = str_replace('&', 'and', $title); - // quotes and angle brackets stored in the database as html encoded - $title = htmlspecialchars_decode($title); - + // titles are often stored HTML encoded + $title = html_entity_decode($title, ENT_QUOTES, 'UTF-8'); + $title = ElggTranslit::urlize($title); return $title; diff --git a/engine/tests/regression/trac_bugs.php b/engine/tests/regression/trac_bugs.php index 691433a41..58444dd39 100644 --- a/engine/tests/regression/trac_bugs.php +++ b/engine/tests/regression/trac_bugs.php @@ -206,21 +206,23 @@ class ElggCoreRegressionBugsTest extends ElggCoreUnitTest { */ public function test_friendly_title() { $cases = array( + // acid test + "B&N > Amazon, OK? 'hey!' $34" + => "b-and-n-greater-than-amazon-ok-bold-hey-34", + // hyphen, underscore and ASCII whitespace replaced by separator, // other non-alphanumeric ASCII removed - "a-a_a a\na\ra\ta\va!a\"a#a\$a%a&a'a(a)a*a+a,a.a/a:a;aa?a@a[a\\a]a^a`a{a|a}a~a" - => "a-a-a-a-a-a-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", - + "a-a_a a\na\ra\ta\va!a\"a#a\$a%a&a'a(a)a*a+a,a.a/a:a;a=a?a@a[a\\a]a^a`a{a|a}a~a" + => "a-a-a-a-a-a-aaaaaaa-and-aaaaaaaaaaaaaaaaaaaaaaa", + // separators trimmed - "-_ hello _-" => "hello", + "-_ hello _-" + => "hello", // accents removed, lower case, other multibyte chars are URL encoded "I\xC3\xB1t\xC3\xABrn\xC3\xA2ti\xC3\xB4n\xC3\xA0liz\xC3\xA6ti\xC3\xB8n, AND \xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E" // Iñtërnâtiônàlizætiøn, AND 日本語 => 'internationalizaetion-and-%E6%97%A5%E6%9C%AC%E8%AA%9E', - - // some HTML entity replacements - "Me & You" => 'me-and-you', ); // where available, string is converted to NFC before transliteration -- cgit v1.2.3