aboutsummaryrefslogtreecommitdiff
path: root/engine/lib
diff options
context:
space:
mode:
authorSteve Clay <steve@mrclay.org>2012-10-11 15:49:02 -0400
committerSteve Clay <steve@mrclay.org>2012-11-07 16:55:38 -0500
commit6648304aa71067a05b0d4166396f5f93c0f66628 (patch)
treeacc332d05de9199b59dc2b7320a530d801ea95f4 /engine/lib
parentd134beadb79fcc90a75bda8bbcbfb9987b27470c (diff)
downloadelgg-6648304aa71067a05b0d4166396f5f93c0f66628.tar.gz
elgg-6648304aa71067a05b0d4166396f5f93c0f66628.tar.bz2
Better HTML decoding and output/email encoding
Diffstat (limited to 'engine/lib')
-rw-r--r--engine/lib/output.php42
-rw-r--r--engine/lib/upgrades/2010052601.php12
2 files changed, 48 insertions, 6 deletions
diff --git a/engine/lib/output.php b/engine/lib/output.php
index 7bfc4be6e..ea28b6ef4 100644
--- a/engine/lib/output.php
+++ b/engine/lib/output.php
@@ -398,3 +398,45 @@ function elgg_strip_tags($string) {
return $string;
}
+
+/**
+ * Apply html_entity_decode() to a string while re-entitising HTML
+ * special char entities to prevent them from being decoded back to their
+ * unsafe original forms.
+ *
+ * This relies on html_entity_decode() not translating entities when
+ * doing so leaves behind another entity, e.g. &amp;gt; if decoded would
+ * create &gt; which is another entity itself. This seems to escape the
+ * usual behaviour where any two paired entities creating a HTML tag are
+ * usually decoded, i.e. a lone &gt; is not decoded, but &lt;foo&gt; would
+ * be decoded to <foo> since it creates a full tag.
+ *
+ * Note: This function is poorly explained in the manual - which is really
+ * bad given its potential for misuse on user input already escaped elsewhere.
+ * Stackoverflow is littered with advice to use this function in the precise
+ * way that would lead to user input being capable of injecting arbitrary HTML.
+ *
+ * @param string $string
+ *
+ * @return string
+ *
+ * @author Pádraic Brady
+ * @copyright Copyright (c) 2010 Pádraic Brady (http://blog.astrumfutura.com)
+ * @license Released under dual-license GPL2/MIT by explicit permission of Pádraic Brady
+ *
+ * @access private
+ */
+function _elgg_html_decode($string) {
+ $string = str_replace(
+ array('&gt;', '&lt;', '&amp;', '&quot;', '&#039;'),
+ array('&amp;gt;', '&amp;lt;', '&amp;amp;', '&amp;quot;', '&amp;#039;'),
+ $string
+ );
+ $string = html_entity_decode($string, ENT_NOQUOTES, 'UTF-8');
+ $string = str_replace(
+ array('&amp;gt;', '&amp;lt;', '&amp;amp;', '&amp;quot;', '&amp;#039;'),
+ array('&gt;', '&lt;', '&amp;', '&quot;', '&#039;'),
+ $string
+ );
+ return $string;
+}
diff --git a/engine/lib/upgrades/2010052601.php b/engine/lib/upgrades/2010052601.php
index 5b477910f..a9cca6dc5 100644
--- a/engine/lib/upgrades/2010052601.php
+++ b/engine/lib/upgrades/2010052601.php
@@ -9,14 +9,14 @@ $params = array('type' => 'group',
$groups = elgg_get_entities($params);
if ($groups) {
foreach ($groups as $group) {
- $group->name = html_entity_decode($group->name, ENT_COMPAT, 'UTF-8');
- $group->description = html_entity_decode($group->description, ENT_COMPAT, 'UTF-8');
- $group->briefdescription = html_entity_decode($group->briefdescription, ENT_COMPAT, 'UTF-8');
- $group->website = html_entity_decode($group->website, ENT_COMPAT, 'UTF-8');
+ $group->name = _elgg_html_decode($group->name);
+ $group->description = _elgg_html_decode($group->description);
+ $group->briefdescription = _elgg_html_decode($group->briefdescription);
+ $group->website = _elgg_html_decode($group->website);
if ($group->interests) {
$tags = $group->interests;
- foreach ($tags as $index=>$tag) {
- $tags[$index] = html_entity_decode($tag, ENT_COMPAT, 'UTF-8');
+ foreach ($tags as $index => $tag) {
+ $tags[$index] = _elgg_html_decode($tag);
}
$group->interests = $tags;
}