diff options
Diffstat (limited to 'engine/lib')
| -rw-r--r-- | engine/lib/output.php | 46 | ||||
| -rw-r--r-- | engine/lib/upgrades/2010052601.php | 12 | 
2 files changed, 50 insertions, 8 deletions
| diff --git a/engine/lib/output.php b/engine/lib/output.php index 7bfc4be6e..352de863b 100644 --- a/engine/lib/output.php +++ b/engine/lib/output.php @@ -271,8 +271,8 @@ function elgg_normalize_url($url) {  		// '?query=test', #target  		return $url; -	} elseif (stripos($url, 'javascript:') === 0) { -		// 'javascript:' +	} elseif (stripos($url, 'javascript:') === 0 || stripos($url, 'mailto:') === 0) { +		// 'javascript:' and 'mailto:'  		// Not covered in FILTER_VALIDATE_URL  		return $url; @@ -398,3 +398,45 @@ function elgg_strip_tags($string) {  	return $string;  } + +/** + * Apply html_entity_decode() to a string while re-entitising HTML + * special char entities to prevent them from being decoded back to their + * unsafe original forms. + * + * This relies on html_entity_decode() not translating entities when + * doing so leaves behind another entity, e.g. &gt; if decoded would + * create > which is another entity itself. This seems to escape the + * usual behaviour where any two paired entities creating a HTML tag are + * usually decoded, i.e. a lone > is not decoded, but <foo> would + * be decoded to <foo> since it creates a full tag. + * + * Note: This function is poorly explained in the manual - which is really + * bad given its potential for misuse on user input already escaped elsewhere. + * Stackoverflow is littered with advice to use this function in the precise + * way that would lead to user input being capable of injecting arbitrary HTML. + * + * @param string $string + * + * @return string + * + * @author Pádraic Brady + * @copyright Copyright (c) 2010 Pádraic Brady (http://blog.astrumfutura.com) + * @license Released under dual-license GPL2/MIT by explicit permission of Pádraic Brady + * + * @access private + */ +function _elgg_html_decode($string) { +	$string = str_replace( +		array('>', '<', '&', '"', '''), +		array('&gt;', '&lt;', '&amp;', '&quot;', '&#039;'), +		$string +	); +	$string = html_entity_decode($string, ENT_NOQUOTES, 'UTF-8'); +	$string = str_replace( +		array('&gt;', '&lt;', '&amp;', '&quot;', '&#039;'), +		array('>', '<', '&', '"', '''), +		$string +	); +	return $string; +} diff --git a/engine/lib/upgrades/2010052601.php b/engine/lib/upgrades/2010052601.php index 5b477910f..a9cca6dc5 100644 --- a/engine/lib/upgrades/2010052601.php +++ b/engine/lib/upgrades/2010052601.php @@ -9,14 +9,14 @@ $params = array('type' => 'group',  $groups = elgg_get_entities($params);  if ($groups) {  	foreach ($groups as $group) { -		$group->name = html_entity_decode($group->name, ENT_COMPAT, 'UTF-8'); -		$group->description = html_entity_decode($group->description, ENT_COMPAT, 'UTF-8'); -		$group->briefdescription = html_entity_decode($group->briefdescription, ENT_COMPAT, 'UTF-8'); -		$group->website = html_entity_decode($group->website, ENT_COMPAT, 'UTF-8'); +		$group->name = _elgg_html_decode($group->name); +		$group->description = _elgg_html_decode($group->description); +		$group->briefdescription = _elgg_html_decode($group->briefdescription); +		$group->website = _elgg_html_decode($group->website);  		if ($group->interests) {  			$tags = $group->interests; -			foreach ($tags as $index=>$tag) { -				$tags[$index] = html_entity_decode($tag, ENT_COMPAT, 'UTF-8'); +			foreach ($tags as $index => $tag) { +				$tags[$index] = _elgg_html_decode($tag);  			}  			$group->interests = $tags;  		} | 
