diff options
Diffstat (limited to 'engine/classes/ElggTranslit.php')
-rw-r--r-- | engine/classes/ElggTranslit.php | 55 |
1 files changed, 31 insertions, 24 deletions
diff --git a/engine/classes/ElggTranslit.php b/engine/classes/ElggTranslit.php index 676c59fc8..b4bf87797 100644 --- a/engine/classes/ElggTranslit.php +++ b/engine/classes/ElggTranslit.php @@ -20,11 +20,10 @@ * and is licensed under the LGPL. For more information, see * <http://www.doctrine-project.org>. * - * @author Konsta Vesterinen <kvesteri@cc.hut.fi> - * @author Jonathan H. Wage <jonwage@gmail.com> - * - * @author Steve Clay <steve@mrclay.org> - * @package Elgg.Core + * @package Elgg.Core + * @author Konsta Vesterinen <kvesteri@cc.hut.fi> + * @author Jonathan H. Wage <jonwage@gmail.com> + * @author Steve Clay <steve@mrclay.org> * * @access private Plugin authors should not use this directly */ @@ -32,8 +31,9 @@ class ElggTranslit { /** * Create a version of a string for embedding in a URL - * @param string $string a UTF-8 string - * @param string $separator + * + * @param string $string A UTF-8 string + * @param string $separator The character to separate words with * @return string */ static public function urlize($string, $separator = '-') { @@ -49,24 +49,29 @@ class ElggTranslit { // Internationalization, AND 日本語! $string = self::transliterateAscii($string); - // more translation + // allow HTML tags in titles + $string = preg_replace('~<([a-zA-Z][^>]*)>~', ' $1 ', $string); + + // more substitutions + // @todo put these somewhere else $string = strtr($string, array( - // Euro/GBP - "\xE2\x82\xAC" /* € */ => 'E', "\xC2\xA3" /* £ */ => 'GBP', + // currency + "\xE2\x82\xAC" /* € */ => ' E ', + "\xC2\xA3" /* £ */ => ' GBP ', )); // remove all ASCII except 0-9a-zA-Z, hyphen, underscore, and whitespace // note: "x" modifier did not work with this pattern. $string = preg_replace('~[' - . '\x00-\x08' # control chars - . '\x0b\x0c' # vert tab, form feed - . '\x0e-\x1f' # control chars - . '\x21-\x2c' # ! ... , - . '\x2e\x2f' # . slash - . '\x3a-\x40' # : ... @ - . '\x5b-\x5e' # [ ... ^ - . '\x60' # ` - . '\x7b-\x7f' # { ... DEL + . '\x00-\x08' // control chars + . '\x0b\x0c' // vert tab, form feed + . '\x0e-\x1f' // control chars + . '\x21-\x2c' // ! ... , + . '\x2e\x2f' // . slash + . '\x3a-\x40' // : ... @ + . '\x5b-\x5e' // [ ... ^ + . '\x60' // ` + . '\x7b-\x7f' // { ... DEL . ']~', '', $string); $string = strtr($string, '', ''); @@ -80,10 +85,10 @@ class ElggTranslit { // note: we cannot use [^0-9a-zA-Z] because that matches multibyte chars. // note: "x" modifier did not work with this pattern. $pattern = '~[' - . '\x00-\x2f' # controls ... slash - . '\x3a-\x40' # : ... @ - . '\x5b-\x60' # [ ... ` - . '\x7b-\x7f' # { ... DEL + . '\x00-\x2f' // controls ... slash + . '\x3a-\x40' // : ... @ + . '\x5b-\x60' // [ ... ` + . '\x7b-\x7f' // { ... DEL . ']+~x'; // ['internationalization', 'and', '日本語'] @@ -98,6 +103,7 @@ class ElggTranslit { /** * Transliterate Western multibyte chars to ASCII + * * @param string $utf8 a UTF-8 string * @return string */ @@ -247,6 +253,7 @@ class ElggTranslit { /** * Tests that "normalizer_normalize" exists and works + * * @return bool */ static public function hasNormalizerSupport() { @@ -255,7 +262,7 @@ class ElggTranslit { $form_c = "\xC3\x85"; // 'LATIN CAPITAL LETTER A WITH RING ABOVE' (U+00C5) $form_d = "A\xCC\x8A"; // A followed by 'COMBINING RING ABOVE' (U+030A) $ret = (function_exists('normalizer_normalize') - && $form_c === normalizer_normalize($form_d)); + && $form_c === normalizer_normalize($form_d)); } return $ret; } |