aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCash Costello <cash.costello@gmail.com>2013-04-13 05:59:29 -0700
committerCash Costello <cash.costello@gmail.com>2013-04-13 05:59:29 -0700
commit46f4c707dc302979bf142c41b65546966287aa84 (patch)
tree8c7d50d67178bd8bce659cf3a73f989c33fea361
parentc4a5b13569f9e3d41eb03ebf8e9b031719680453 (diff)
parent5ccc0540fd28cc1620ffca10e3aed92319e78794 (diff)
downloadelgg-46f4c707dc302979bf142c41b65546966287aa84.tar.gz
elgg-46f4c707dc302979bf142c41b65546966287aa84.tar.bz2
Merge pull request #5310 from mrclay/4972
Fixes #4972: More robust friendly titles implementation
-rw-r--r--engine/classes/ElggTranslit.php15
-rw-r--r--engine/lib/output.php8
-rw-r--r--engine/tests/regression/trac_bugs.php16
3 files changed, 24 insertions, 15 deletions
diff --git a/engine/classes/ElggTranslit.php b/engine/classes/ElggTranslit.php
index 601965c11..4ae1d2479 100644
--- a/engine/classes/ElggTranslit.php
+++ b/engine/classes/ElggTranslit.php
@@ -49,10 +49,19 @@ class ElggTranslit {
// Internationalization, AND 日本語!
$string = self::transliterateAscii($string);
- // more translation
+ // allow HTML tags in titles
+ $string = preg_replace('~<([a-zA-Z][^>]*)>~', ' $1 ', $string);
+
+ // more substitutions
+ // @todo put these somewhere else
$string = strtr($string, array(
- // Euro/GBP
- "\xE2\x82\xAC" /* € */ => 'E', "\xC2\xA3" /* £ */ => 'GBP',
+ // currency
+ "\xE2\x82\xAC" /* € */ => ' E ',
+ "\xC2\xA3" /* £ */ => ' GBP ',
+
+ "&" => ' and ',
+ ">" => ' greater than ',
+ "<" => ' less than ',
));
// remove all ASCII except 0-9a-zA-Z, hyphen, underscore, and whitespace
diff --git a/engine/lib/output.php b/engine/lib/output.php
index da8e1ab86..c5a04989b 100644
--- a/engine/lib/output.php
+++ b/engine/lib/output.php
@@ -284,11 +284,9 @@ function elgg_get_friendly_title($title) {
return $result;
}
- // handle some special cases
- $title = str_replace('&amp;', 'and', $title);
- // quotes and angle brackets stored in the database as html encoded
- $title = htmlspecialchars_decode($title);
-
+ // titles are often stored HTML encoded
+ $title = html_entity_decode($title, ENT_QUOTES, 'UTF-8');
+
$title = ElggTranslit::urlize($title);
return $title;
diff --git a/engine/tests/regression/trac_bugs.php b/engine/tests/regression/trac_bugs.php
index 691433a41..58444dd39 100644
--- a/engine/tests/regression/trac_bugs.php
+++ b/engine/tests/regression/trac_bugs.php
@@ -206,21 +206,23 @@ class ElggCoreRegressionBugsTest extends ElggCoreUnitTest {
*/
public function test_friendly_title() {
$cases = array(
+ // acid test
+ "B&N > Amazon, OK? <bold> 'hey!' $34"
+ => "b-and-n-greater-than-amazon-ok-bold-hey-34",
+
// hyphen, underscore and ASCII whitespace replaced by separator,
// other non-alphanumeric ASCII removed
- "a-a_a a\na\ra\ta\va!a\"a#a\$a%a&a'a(a)a*a+a,a.a/a:a;a<a=a>a?a@a[a\\a]a^a`a{a|a}a~a"
- => "a-a-a-a-a-a-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
-
+ "a-a_a a\na\ra\ta\va!a\"a#a\$a%a&a'a(a)a*a+a,a.a/a:a;a=a?a@a[a\\a]a^a`a{a|a}a~a"
+ => "a-a-a-a-a-a-aaaaaaa-and-aaaaaaaaaaaaaaaaaaaaaaa",
+
// separators trimmed
- "-_ hello _-" => "hello",
+ "-_ hello _-"
+ => "hello",
// accents removed, lower case, other multibyte chars are URL encoded
"I\xC3\xB1t\xC3\xABrn\xC3\xA2ti\xC3\xB4n\xC3\xA0liz\xC3\xA6ti\xC3\xB8n, AND \xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E"
// Iñtërnâtiônàlizætiøn, AND 日本語
=> 'internationalizaetion-and-%E6%97%A5%E6%9C%AC%E8%AA%9E',
-
- // some HTML entity replacements
- "Me &amp; You" => 'me-and-you',
);
// where available, string is converted to NFC before transliteration