diff options
Diffstat (limited to 'engine/classes/ElggAutoP.php')
| -rw-r--r-- | engine/classes/ElggAutoP.php | 59 | 
1 files changed, 43 insertions, 16 deletions
| diff --git a/engine/classes/ElggAutoP.php b/engine/classes/ElggAutoP.php index 89d77e583..05842d1b2 100644 --- a/engine/classes/ElggAutoP.php +++ b/engine/classes/ElggAutoP.php @@ -7,6 +7,9 @@   *   * In DIV elements, Ps are only added when there would be at   * least two of them. + *  + * @package    Elgg.Core + * @subpackage Output   */  class ElggAutoP { @@ -51,8 +54,12 @@ class ElggAutoP {  	protected $_alterList = 'article aside blockquote body details div footer header  		section'; +	/** @var string */  	protected $_unique = ''; +	/** +	 * Constructor +	 */  	public function __construct() {  		$this->_blocks = preg_split('@\\s+@', $this->_blocks);  		$this->_descendList = preg_split('@\\s+@', $this->_descendList); @@ -98,25 +105,34 @@ class ElggAutoP {  		$html = str_replace('&', $this->_unique . 'AMP', $html);  		$this->_doc = new DOMDocument(); -	    +  		// parse to DOM, suppressing loadHTML warnings  		// http://www.php.net/manual/en/domdocument.loadhtml.php#95463  		libxml_use_internal_errors(true); +		// Do not load entities. May be unnecessary, better safe than sorry +		$disable_load_entities = libxml_disable_entity_loader(true); +  		if (!$this->_doc->loadHTML("<html><meta http-equiv='content-type' "   				. "content='text/html; charset={$this->encoding}'><body>{$html}</body>"  				. "</html>")) { + +			libxml_disable_entity_loader($disable_load_entities);  			return false;  		} +		libxml_disable_entity_loader($disable_load_entities); +  		$this->_xpath = new DOMXPath($this->_doc);  		// start processing recursively at the BODY element  		$nodeList = $this->_xpath->query('//body[1]'); -		$this->_addParagraphs($nodeList->item(0)); +		$this->addParagraphs($nodeList->item(0));  		// serialize back to HTML  		$html = $this->_doc->saveHTML(); +		// Note: we create <autop> elements, which will later be converted to paragraphs +  		// split AUTOPs into multiples at /\n\n+/  		$html = preg_replace('/(' . $this->_unique . 'NL){2,}/', '</autop><autop>', $html);  		$html = str_replace(array($this->_unique . 'BR', $this->_unique . 'NL', '<br>'),  @@ -126,14 +142,22 @@ class ElggAutoP {  		// re-parse so we can handle new AUTOP elements +		// Do not load entities. May be unnecessary, better safe than sorry +		$disable_load_entities = libxml_disable_entity_loader(true); +  		if (!$this->_doc->loadHTML($html)) { +			libxml_disable_entity_loader($disable_load_entities);  			return false;  		} + +		libxml_disable_entity_loader($disable_load_entities); +  		// must re-create XPath object after DOM load  		$this->_xpath = new DOMXPath($this->_doc);  		// strip AUTOPs that only have comments/whitespace  		foreach ($this->_xpath->query('//autop') as $autop) { +			/* @var DOMElement $autop */  			$hasContent = false;  			if (trim($autop->textContent) !== '') {  				$hasContent = true; @@ -146,17 +170,19 @@ class ElggAutoP {  				}  			}  			if (!$hasContent) { -				// strip w/ preg_replace later (faster than moving nodes out) +				// mark to be later replaced w/ preg_replace (faster than moving nodes out)  				$autop->setAttribute("r", "1");  			}  		} -		// remove a single AUTOP inside certain elements +		// If a DIV contains a single AUTOP, remove it  		foreach ($this->_xpath->query('//div') as $el) { +			/* @var DOMElement $el */  			$autops = $this->_xpath->query('./autop', $el);  			if ($autops->length === 1) { -				// strip w/ preg_replace later (faster than moving nodes out) -				$autops->item(0)->setAttribute("r", "1"); +				$firstAutop = $autops->item(0); +				/* @var DOMElement $firstAutop */ +				$firstAutop->setAttribute("r", "1");  			}  		} @@ -182,15 +208,16 @@ class ElggAutoP {  	/**  	 * Add P and BR elements as necessary  	 * -	 * @param DOMElement $el +	 * @param DOMElement $el DOM element +	 * @return void  	 */ -	protected function _addParagraphs(DOMElement $el) { -		// no need to recurse, just queue up +	protected function addParagraphs(DOMElement $el) { +		// no need to call recursively, just queue up  		$elsToProcess = array($el);  		$inlinesToProcess = array();  		while ($el = array_shift($elsToProcess)) {  			// if true, we can alter all child nodes, if not, we'll just call -			// _addParagraphs on each element in the descendInto list +			// addParagraphs on each element in the descendInto list  			$alterInline = in_array($el->nodeName, $this->_alterList);  			// inside affected elements, we want to trim leading whitespace from @@ -216,16 +243,16 @@ class ElggAutoP {  				$isElement = ($node->nodeType === XML_ELEMENT_NODE);  				if ($isElement) { -					$elName = $node->nodeName; +					$isBlock = in_array($node->nodeName, $this->_blocks); +				} else { +					$isBlock = false;  				} -				$isBlock = ($isElement && in_array($elName, $this->_blocks));  				if ($alterInline) { -					$isInline = $isElement && ! $isBlock;  					$isText = ($node->nodeType === XML_TEXT_NODE);  					$isLastInline = (! $node->nextSibling -								   || ($node->nextSibling->nodeType === XML_ELEMENT_NODE -									   && in_array($node->nextSibling->nodeName, $this->_blocks))); +							|| ($node->nextSibling->nodeType === XML_ELEMENT_NODE +								&& in_array($node->nextSibling->nodeName, $this->_blocks)));  					if ($isElement) {  						$isFollowingBr = ($node->nodeName === 'br');  					} @@ -258,7 +285,7 @@ class ElggAutoP {  					if ($isBlock) {  						if (in_array($node->nodeName, $this->_descendList)) {  							$elsToProcess[] = $node; -							//$this->_addParagraphs($node); +							//$this->addParagraphs($node);  						}  					}  					$openP = true; | 
