diff options
Diffstat (limited to 'engine/classes/ElggAutoP.php')
-rw-r--r-- | engine/classes/ElggAutoP.php | 38 |
1 files changed, 30 insertions, 8 deletions
diff --git a/engine/classes/ElggAutoP.php b/engine/classes/ElggAutoP.php index f3c7cc972..05842d1b2 100644 --- a/engine/classes/ElggAutoP.php +++ b/engine/classes/ElggAutoP.php @@ -7,6 +7,9 @@ * * In DIV elements, Ps are only added when there would be at * least two of them. + * + * @package Elgg.Core + * @subpackage Output */ class ElggAutoP { @@ -51,8 +54,12 @@ class ElggAutoP { protected $_alterList = 'article aside blockquote body details div footer header section'; + /** @var string */ protected $_unique = ''; + /** + * Constructor + */ public function __construct() { $this->_blocks = preg_split('@\\s+@', $this->_blocks); $this->_descendList = preg_split('@\\s+@', $this->_descendList); @@ -98,21 +105,28 @@ class ElggAutoP { $html = str_replace('&', $this->_unique . 'AMP', $html); $this->_doc = new DOMDocument(); - + // parse to DOM, suppressing loadHTML warnings // http://www.php.net/manual/en/domdocument.loadhtml.php#95463 libxml_use_internal_errors(true); + // Do not load entities. May be unnecessary, better safe than sorry + $disable_load_entities = libxml_disable_entity_loader(true); + if (!$this->_doc->loadHTML("<html><meta http-equiv='content-type' " . "content='text/html; charset={$this->encoding}'><body>{$html}</body>" . "</html>")) { + + libxml_disable_entity_loader($disable_load_entities); return false; } + libxml_disable_entity_loader($disable_load_entities); + $this->_xpath = new DOMXPath($this->_doc); // start processing recursively at the BODY element $nodeList = $this->_xpath->query('//body[1]'); - $this->_addParagraphs($nodeList->item(0)); + $this->addParagraphs($nodeList->item(0)); // serialize back to HTML $html = $this->_doc->saveHTML(); @@ -128,9 +142,16 @@ class ElggAutoP { // re-parse so we can handle new AUTOP elements + // Do not load entities. May be unnecessary, better safe than sorry + $disable_load_entities = libxml_disable_entity_loader(true); + if (!$this->_doc->loadHTML($html)) { + libxml_disable_entity_loader($disable_load_entities); return false; } + + libxml_disable_entity_loader($disable_load_entities); + // must re-create XPath object after DOM load $this->_xpath = new DOMXPath($this->_doc); @@ -187,15 +208,16 @@ class ElggAutoP { /** * Add P and BR elements as necessary * - * @param DOMElement $el + * @param DOMElement $el DOM element + * @return void */ - protected function _addParagraphs(DOMElement $el) { + protected function addParagraphs(DOMElement $el) { // no need to call recursively, just queue up $elsToProcess = array($el); $inlinesToProcess = array(); while ($el = array_shift($elsToProcess)) { // if true, we can alter all child nodes, if not, we'll just call - // _addParagraphs on each element in the descendInto list + // addParagraphs on each element in the descendInto list $alterInline = in_array($el->nodeName, $this->_alterList); // inside affected elements, we want to trim leading whitespace from @@ -229,8 +251,8 @@ class ElggAutoP { if ($alterInline) { $isText = ($node->nodeType === XML_TEXT_NODE); $isLastInline = (! $node->nextSibling - || ($node->nextSibling->nodeType === XML_ELEMENT_NODE - && in_array($node->nextSibling->nodeName, $this->_blocks))); + || ($node->nextSibling->nodeType === XML_ELEMENT_NODE + && in_array($node->nextSibling->nodeName, $this->_blocks))); if ($isElement) { $isFollowingBr = ($node->nodeName === 'br'); } @@ -263,7 +285,7 @@ class ElggAutoP { if ($isBlock) { if (in_array($node->nodeName, $this->_descendList)) { $elsToProcess[] = $node; - //$this->_addParagraphs($node); + //$this->addParagraphs($node); } } $openP = true; |