From 4fff20a33467a7318956412d4dabfcab1ce6daba Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Mon, 25 Jun 2012 01:25:46 -0400 Subject: Fixes #2276: Better friendly titles, portable ElggTranslit class, better units --- engine/tests/regression/trac_bugs.php | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'engine/tests/regression/trac_bugs.php') diff --git a/engine/tests/regression/trac_bugs.php b/engine/tests/regression/trac_bugs.php index 26a45ab6a..e81bd6936 100644 --- a/engine/tests/regression/trac_bugs.php +++ b/engine/tests/regression/trac_bugs.php @@ -202,14 +202,25 @@ class ElggCoreRegressionBugsTest extends ElggCoreUnitTest { /** * http://trac.elgg.org/ticket/3210 - Don't remove -s in friendly titles - * @todo: http://trac.elgg.org/ticket/2276 - improve char encoding + * http://trac.elgg.org/ticket/2276 - improve char encoding */ public function test_friendly_title() { $cases = array( - 'Simple Test' => 'simple-test', - 'Test top-level page' => 'test-top-level-page', -// 'éclair' => 'éclair', -// 'English, Español, and 日本語' => 'english-español-and-日本語' + // hyphen, underscore and ASCII whitespace replaced by separator, + // other non-alphanumeric ASCII removed + "a-a_a a\na\ra\ta\va!a\"a#a\$a%a&a'a(a)a*a+a,a.a/a:a;aa?a@a[a\\a]a^a`a{a|a}a~a" + => "a-a-a-a-a-a-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + + // separators trimmed + "-_ hello _-" => "hello", + + // accents removed, lower case, other multibyte chars are URL encoded + "I\xC3\xB1t\xC3\xABrn\xC3\xA2ti\xC3\xB4n\xC3\xA0liz\xC3\xA6ti\xC3\xB8n, AND \xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E" + // Iñtërnâtiônàlizætiøn, AND 日本語 + => 'internationalizaetion-and-%E6%97%A5%E6%9C%AC%E8%AA%9E', + + // some HTML entity replacements + "Me & You" => 'me-and-you', ); foreach ($cases as $case => $expected) { -- cgit v1.2.3 From 8a5ddacfa2598b8d984c2cdc6142d41936f38c48 Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Tue, 26 Jun 2012 11:28:58 -0400 Subject: Added NFC conversion where available --- engine/classes/ElggTranslit.php | 22 ++++++++++++++++++++++ engine/tests/regression/trac_bugs.php | 6 ++++++ 2 files changed, 28 insertions(+) (limited to 'engine/tests/regression/trac_bugs.php') diff --git a/engine/classes/ElggTranslit.php b/engine/classes/ElggTranslit.php index 704c17f6a..809302276 100644 --- a/engine/classes/ElggTranslit.php +++ b/engine/classes/ElggTranslit.php @@ -37,6 +37,13 @@ class ElggTranslit { static public function urlize($string, $separator = '-') { // Iñtërnâtiônàlizætiøn, AND 日本語! + // try to force combined chars because the translit map and others expect it + if (self::hasNormalizerSupport()) { + $nfc = normalizer_normalize($string); + if (is_string($nfc)) { + $string = $nfc; + } + } // Internationalization, AND 日本語! $string = self::transliterateAscii($string); @@ -235,4 +242,19 @@ class ElggTranslit { "\xE1\xBB\xB4" /* Ỵ */ => 'Y', "\xE1\xBB\xB5" /* ỵ */ => 'y', ); } + + /** + * Tests that "normalizer_normalize" exists and works + * @return bool + */ + static public function hasNormalizerSupport() { + static $ret = null; + if (null === $ret) { + $form_c = "\xC3\x85"; // 'LATIN CAPITAL LETTER A WITH RING ABOVE' (U+00C5) + $form_d = "A\xCC\x8A"; // A followed by 'COMBINING RING ABOVE' (U+030A) + $ret = (function_exists('normalizer_normalize') + && $form_c === normalizer_normalize($form_d)); + } + return $ret; + } } diff --git a/engine/tests/regression/trac_bugs.php b/engine/tests/regression/trac_bugs.php index e81bd6936..691433a41 100644 --- a/engine/tests/regression/trac_bugs.php +++ b/engine/tests/regression/trac_bugs.php @@ -223,6 +223,12 @@ class ElggCoreRegressionBugsTest extends ElggCoreUnitTest { "Me & You" => 'me-and-you', ); + // where available, string is converted to NFC before transliteration + if (ElggTranslit::hasNormalizerSupport()) { + $form_d = "A\xCC\x8A"; // A followed by 'COMBINING RING ABOVE' (U+030A) + $cases[$form_d] = "a"; + } + foreach ($cases as $case => $expected) { $friendly_title = elgg_get_friendly_title($case); $this->assertIdentical($expected, $friendly_title); -- cgit v1.2.3