aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSteve Clay <steve@mrclay.org>2013-03-31 21:09:07 -0400
committerSteve Clay <steve@mrclay.org>2013-03-31 21:09:07 -0400
commit5ccc0540fd28cc1620ffca10e3aed92319e78794 (patch)
tree9f63ec4480036bdc988b66aaf479f9978c74dbe8
parent835c7fe5eb77343081b9bd33ec465f9ce8929570 (diff)
downloadelgg-5ccc0540fd28cc1620ffca10e3aed92319e78794.tar.gz
elgg-5ccc0540fd28cc1620ffca10e3aed92319e78794.tar.bz2
Fixes #4972: More robust friendly titles implementation
-rw-r--r--engine/classes/ElggTranslit.php15
-rw-r--r--engine/lib/output.php8
-rw-r--r--engine/tests/regression/trac_bugs.php16
3 files changed, 24 insertions, 15 deletions
diff --git a/engine/classes/ElggTranslit.php b/engine/classes/ElggTranslit.php
index 601965c11..4ae1d2479 100644
--- a/engine/classes/ElggTranslit.php
+++ b/engine/classes/ElggTranslit.php
@@ -49,10 +49,19 @@ class ElggTranslit {
// Internationalization, AND 日本語!
$string = self::transliterateAscii($string);
- // more translation
+ // allow HTML tags in titles
+ $string = preg_replace('~<([a-zA-Z][^>]*)>~', ' $1 ', $string);
+
+ // more substitutions
+ // @todo put these somewhere else
$string = strtr($string, array(
- // Euro/GBP
- "\xE2\x82\xAC" /* € */ => 'E', "\xC2\xA3" /* £ */ => 'GBP',
+ // currency
+ "\xE2\x82\xAC" /* € */ => ' E ',
+ "\xC2\xA3" /* £ */ => ' GBP ',
+
+ "&" => ' and ',
+ ">" => ' greater than ',
+ "<" => ' less than ',
));
// remove all ASCII except 0-9a-zA-Z, hyphen, underscore, and whitespace
diff --git a/engine/lib/output.php b/engine/lib/output.php
index da8e1ab86..c5a04989b 100644
--- a/engine/lib/output.php
+++ b/engine/lib/output.php
@@ -284,11 +284,9 @@ function elgg_get_friendly_title($title) {
return $result;
}
- // handle some special cases
- $title = str_replace('&amp;', 'and', $title);
- // quotes and angle brackets stored in the database as html encoded
- $title = htmlspecialchars_decode($title);
-
+ // titles are often stored HTML encoded
+ $title = html_entity_decode($title, ENT_QUOTES, 'UTF-8');
+
$title = ElggTranslit::urlize($title);
return $title;
diff --git a/engine/tests/regression/trac_bugs.php b/engine/tests/regression/trac_bugs.php
index 691433a41..58444dd39 100644
--- a/engine/tests/regression/trac_bugs.php
+++ b/engine/tests/regression/trac_bugs.php
@@ -206,21 +206,23 @@ class ElggCoreRegressionBugsTest extends ElggCoreUnitTest {
*/
public function test_friendly_title() {
$cases = array(
+ // acid test
+ "B&N > Amazon, OK? <bold> 'hey!' $34"
+ => "b-and-n-greater-than-amazon-ok-bold-hey-34",
+
// hyphen, underscore and ASCII whitespace replaced by separator,
// other non-alphanumeric ASCII removed
- "a-a_a a\na\ra\ta\va!a\"a#a\$a%a&a'a(a)a*a+a,a.a/a:a;a<a=a>a?a@a[a\\a]a^a`a{a|a}a~a"
- => "a-a-a-a-a-a-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
-
+ "a-a_a a\na\ra\ta\va!a\"a#a\$a%a&a'a(a)a*a+a,a.a/a:a;a=a?a@a[a\\a]a^a`a{a|a}a~a"
+ => "a-a-a-a-a-a-aaaaaaa-and-aaaaaaaaaaaaaaaaaaaaaaa",
+
// separators trimmed
- "-_ hello _-" => "hello",
+ "-_ hello _-"
+ => "hello",
// accents removed, lower case, other multibyte chars are URL encoded
"I\xC3\xB1t\xC3\xABrn\xC3\xA2ti\xC3\xB4n\xC3\xA0liz\xC3\xA6ti\xC3\xB8n, AND \xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E"
// Iñtërnâtiônàlizætiøn, AND 日本語
=> 'internationalizaetion-and-%E6%97%A5%E6%9C%AC%E8%AA%9E',
-
- // some HTML entity replacements
- "Me &amp; You" => 'me-and-you',
);
// where available, string is converted to NFC before transliteration