From 6648304aa71067a05b0d4166396f5f93c0f66628 Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Thu, 11 Oct 2012 15:49:02 -0400 Subject: Better HTML decoding and output/email encoding --- actions/profile/edit.php | 4 +-- engine/lib/output.php | 42 ++++++++++++++++++++++++++++++ engine/lib/upgrades/2010052601.php | 12 ++++----- mod/blog/views/default/forms/blog/save.php | 2 +- mod/groups/actions/groups/edit.php | 8 +++--- views/default/output/email.php | 4 ++- 6 files changed, 58 insertions(+), 14 deletions(-) diff --git a/actions/profile/edit.php b/actions/profile/edit.php index 8ca60f246..baf3ecaa6 100644 --- a/actions/profile/edit.php +++ b/actions/profile/edit.php @@ -25,7 +25,7 @@ if (!is_array($accesslevel)) { * wrapper for recursive array walk decoding */ function profile_array_decoder(&$v) { - $v = html_entity_decode($v, ENT_COMPAT, 'UTF-8'); + $v = _elgg_html_decode($v); } $profile_fields = elgg_get_config('profile_fields'); @@ -37,7 +37,7 @@ foreach ($profile_fields as $shortname => $valuetype) { if (is_array($value)) { array_walk_recursive($value, 'profile_array_decoder'); } else { - $value = html_entity_decode($value, ENT_COMPAT, 'UTF-8'); + $value = _elgg_html_decode($value); } // limit to reasonable sizes diff --git a/engine/lib/output.php b/engine/lib/output.php index 7bfc4be6e..ea28b6ef4 100644 --- a/engine/lib/output.php +++ b/engine/lib/output.php @@ -398,3 +398,45 @@ function elgg_strip_tags($string) { return $string; } + +/** + * Apply html_entity_decode() to a string while re-entitising HTML + * special char entities to prevent them from being decoded back to their + * unsafe original forms. + * + * This relies on html_entity_decode() not translating entities when + * doing so leaves behind another entity, e.g. &gt; if decoded would + * create > which is another entity itself. This seems to escape the + * usual behaviour where any two paired entities creating a HTML tag are + * usually decoded, i.e. a lone > is not decoded, but <foo> would + * be decoded to since it creates a full tag. + * + * Note: This function is poorly explained in the manual - which is really + * bad given its potential for misuse on user input already escaped elsewhere. + * Stackoverflow is littered with advice to use this function in the precise + * way that would lead to user input being capable of injecting arbitrary HTML. + * + * @param string $string + * + * @return string + * + * @author Pádraic Brady + * @copyright Copyright (c) 2010 Pádraic Brady (http://blog.astrumfutura.com) + * @license Released under dual-license GPL2/MIT by explicit permission of Pádraic Brady + * + * @access private + */ +function _elgg_html_decode($string) { + $string = str_replace( + array('>', '<', '&', '"', '''), + array('&gt;', '&lt;', '&amp;', '&quot;', '&#039;'), + $string + ); + $string = html_entity_decode($string, ENT_NOQUOTES, 'UTF-8'); + $string = str_replace( + array('&gt;', '&lt;', '&amp;', '&quot;', '&#039;'), + array('>', '<', '&', '"', '''), + $string + ); + return $string; +} diff --git a/engine/lib/upgrades/2010052601.php b/engine/lib/upgrades/2010052601.php index 5b477910f..a9cca6dc5 100644 --- a/engine/lib/upgrades/2010052601.php +++ b/engine/lib/upgrades/2010052601.php @@ -9,14 +9,14 @@ $params = array('type' => 'group', $groups = elgg_get_entities($params); if ($groups) { foreach ($groups as $group) { - $group->name = html_entity_decode($group->name, ENT_COMPAT, 'UTF-8'); - $group->description = html_entity_decode($group->description, ENT_COMPAT, 'UTF-8'); - $group->briefdescription = html_entity_decode($group->briefdescription, ENT_COMPAT, 'UTF-8'); - $group->website = html_entity_decode($group->website, ENT_COMPAT, 'UTF-8'); + $group->name = _elgg_html_decode($group->name); + $group->description = _elgg_html_decode($group->description); + $group->briefdescription = _elgg_html_decode($group->briefdescription); + $group->website = _elgg_html_decode($group->website); if ($group->interests) { $tags = $group->interests; - foreach ($tags as $index=>$tag) { - $tags[$index] = html_entity_decode($tag, ENT_COMPAT, 'UTF-8'); + foreach ($tags as $index => $tag) { + $tags[$index] = _elgg_html_decode($tag); } $group->interests = $tags; } diff --git a/mod/blog/views/default/forms/blog/save.php b/mod/blog/views/default/forms/blog/save.php index a805541bd..be6adac0a 100644 --- a/mod/blog/views/default/forms/blog/save.php +++ b/mod/blog/views/default/forms/blog/save.php @@ -53,7 +53,7 @@ $excerpt_label = elgg_echo('blog:excerpt'); $excerpt_input = elgg_view('input/text', array( 'name' => 'excerpt', 'id' => 'blog_excerpt', - 'value' => html_entity_decode($vars['excerpt'], ENT_COMPAT, 'UTF-8') + 'value' => _elgg_html_decode($vars['excerpt']) )); $body_label = elgg_echo('blog:body'); diff --git a/mod/groups/actions/groups/edit.php b/mod/groups/actions/groups/edit.php index a4169461a..2d7e1f023 100644 --- a/mod/groups/actions/groups/edit.php +++ b/mod/groups/actions/groups/edit.php @@ -8,15 +8,15 @@ // Load configuration global $CONFIG; +elgg_make_sticky_form('groups'); + /** * wrapper for recursive array walk decoding */ function profile_array_decoder(&$v) { - $v = html_entity_decode($v, ENT_COMPAT, 'UTF-8'); + $v = _elgg_html_decode($v); } -elgg_make_sticky_form('groups'); - // Get group fields $input = array(); foreach ($CONFIG->group as $shortname => $valuetype) { @@ -25,7 +25,7 @@ foreach ($CONFIG->group as $shortname => $valuetype) { if (is_array($input[$shortname])) { array_walk_recursive($input[$shortname], 'profile_array_decoder'); } else { - $input[$shortname] = html_entity_decode($input[$shortname], ENT_COMPAT, 'UTF-8'); + $input[$shortname] = _elgg_html_decode($input[$shortname]); } if ($valuetype == 'tags') { diff --git a/views/default/output/email.php b/views/default/output/email.php index 00eefad1f..f5a8bc4b8 100644 --- a/views/default/output/email.php +++ b/views/default/output/email.php @@ -10,6 +10,8 @@ * */ +$encoded_value = htmlspecialchars($vars['value'], ENT_QUOTES, 'UTF-8'); + if (!empty($vars['value'])) { - echo "". htmlspecialchars($vars['value'], ENT_QUOTES, 'UTF-8', false) .""; + echo "$encoded_value"; } \ No newline at end of file -- cgit v1.2.3