aboutsummaryrefslogtreecommitdiff
path: root/engine/classes/ElggAutoP.php
diff options
context:
space:
mode:
Diffstat (limited to 'engine/classes/ElggAutoP.php')
-rw-r--r--engine/classes/ElggAutoP.php59
1 files changed, 43 insertions, 16 deletions
diff --git a/engine/classes/ElggAutoP.php b/engine/classes/ElggAutoP.php
index 89d77e583..05842d1b2 100644
--- a/engine/classes/ElggAutoP.php
+++ b/engine/classes/ElggAutoP.php
@@ -7,6 +7,9 @@
*
* In DIV elements, Ps are only added when there would be at
* least two of them.
+ *
+ * @package Elgg.Core
+ * @subpackage Output
*/
class ElggAutoP {
@@ -51,8 +54,12 @@ class ElggAutoP {
protected $_alterList = 'article aside blockquote body details div footer header
section';
+ /** @var string */
protected $_unique = '';
+ /**
+ * Constructor
+ */
public function __construct() {
$this->_blocks = preg_split('@\\s+@', $this->_blocks);
$this->_descendList = preg_split('@\\s+@', $this->_descendList);
@@ -98,25 +105,34 @@ class ElggAutoP {
$html = str_replace('&', $this->_unique . 'AMP', $html);
$this->_doc = new DOMDocument();
-
+
// parse to DOM, suppressing loadHTML warnings
// http://www.php.net/manual/en/domdocument.loadhtml.php#95463
libxml_use_internal_errors(true);
+ // Do not load entities. May be unnecessary, better safe than sorry
+ $disable_load_entities = libxml_disable_entity_loader(true);
+
if (!$this->_doc->loadHTML("<html><meta http-equiv='content-type' "
. "content='text/html; charset={$this->encoding}'><body>{$html}</body>"
. "</html>")) {
+
+ libxml_disable_entity_loader($disable_load_entities);
return false;
}
+ libxml_disable_entity_loader($disable_load_entities);
+
$this->_xpath = new DOMXPath($this->_doc);
// start processing recursively at the BODY element
$nodeList = $this->_xpath->query('//body[1]');
- $this->_addParagraphs($nodeList->item(0));
+ $this->addParagraphs($nodeList->item(0));
// serialize back to HTML
$html = $this->_doc->saveHTML();
+ // Note: we create <autop> elements, which will later be converted to paragraphs
+
// split AUTOPs into multiples at /\n\n+/
$html = preg_replace('/(' . $this->_unique . 'NL){2,}/', '</autop><autop>', $html);
$html = str_replace(array($this->_unique . 'BR', $this->_unique . 'NL', '<br>'),
@@ -126,14 +142,22 @@ class ElggAutoP {
// re-parse so we can handle new AUTOP elements
+ // Do not load entities. May be unnecessary, better safe than sorry
+ $disable_load_entities = libxml_disable_entity_loader(true);
+
if (!$this->_doc->loadHTML($html)) {
+ libxml_disable_entity_loader($disable_load_entities);
return false;
}
+
+ libxml_disable_entity_loader($disable_load_entities);
+
// must re-create XPath object after DOM load
$this->_xpath = new DOMXPath($this->_doc);
// strip AUTOPs that only have comments/whitespace
foreach ($this->_xpath->query('//autop') as $autop) {
+ /* @var DOMElement $autop */
$hasContent = false;
if (trim($autop->textContent) !== '') {
$hasContent = true;
@@ -146,17 +170,19 @@ class ElggAutoP {
}
}
if (!$hasContent) {
- // strip w/ preg_replace later (faster than moving nodes out)
+ // mark to be later replaced w/ preg_replace (faster than moving nodes out)
$autop->setAttribute("r", "1");
}
}
- // remove a single AUTOP inside certain elements
+ // If a DIV contains a single AUTOP, remove it
foreach ($this->_xpath->query('//div') as $el) {
+ /* @var DOMElement $el */
$autops = $this->_xpath->query('./autop', $el);
if ($autops->length === 1) {
- // strip w/ preg_replace later (faster than moving nodes out)
- $autops->item(0)->setAttribute("r", "1");
+ $firstAutop = $autops->item(0);
+ /* @var DOMElement $firstAutop */
+ $firstAutop->setAttribute("r", "1");
}
}
@@ -182,15 +208,16 @@ class ElggAutoP {
/**
* Add P and BR elements as necessary
*
- * @param DOMElement $el
+ * @param DOMElement $el DOM element
+ * @return void
*/
- protected function _addParagraphs(DOMElement $el) {
- // no need to recurse, just queue up
+ protected function addParagraphs(DOMElement $el) {
+ // no need to call recursively, just queue up
$elsToProcess = array($el);
$inlinesToProcess = array();
while ($el = array_shift($elsToProcess)) {
// if true, we can alter all child nodes, if not, we'll just call
- // _addParagraphs on each element in the descendInto list
+ // addParagraphs on each element in the descendInto list
$alterInline = in_array($el->nodeName, $this->_alterList);
// inside affected elements, we want to trim leading whitespace from
@@ -216,16 +243,16 @@ class ElggAutoP {
$isElement = ($node->nodeType === XML_ELEMENT_NODE);
if ($isElement) {
- $elName = $node->nodeName;
+ $isBlock = in_array($node->nodeName, $this->_blocks);
+ } else {
+ $isBlock = false;
}
- $isBlock = ($isElement && in_array($elName, $this->_blocks));
if ($alterInline) {
- $isInline = $isElement && ! $isBlock;
$isText = ($node->nodeType === XML_TEXT_NODE);
$isLastInline = (! $node->nextSibling
- || ($node->nextSibling->nodeType === XML_ELEMENT_NODE
- && in_array($node->nextSibling->nodeName, $this->_blocks)));
+ || ($node->nextSibling->nodeType === XML_ELEMENT_NODE
+ && in_array($node->nextSibling->nodeName, $this->_blocks)));
if ($isElement) {
$isFollowingBr = ($node->nodeName === 'br');
}
@@ -258,7 +285,7 @@ class ElggAutoP {
if ($isBlock) {
if (in_array($node->nodeName, $this->_descendList)) {
$elsToProcess[] = $node;
- //$this->_addParagraphs($node);
+ //$this->addParagraphs($node);
}
}
$openP = true;