From 43a395ae735777bfb5474c4f6a37dc1cd0818a37 Mon Sep 17 00:00:00 2001 From: Brett Profitt Date: Mon, 10 Dec 2012 15:50:25 -0500 Subject: Fixes #1479. Added ElggAutoP. Removing [\n\r] from test strings before compare to deal with differing whitespace between tags among PHP versions. --- engine/classes/ElggAutoP.php | 309 ++++++++++++++++++++++++++++++++++++++++++ engine/classes/ElggAutop.php | 315 ------------------------------------------- 2 files changed, 309 insertions(+), 315 deletions(-) create mode 100644 engine/classes/ElggAutoP.php delete mode 100644 engine/classes/ElggAutop.php (limited to 'engine/classes') diff --git a/engine/classes/ElggAutoP.php b/engine/classes/ElggAutoP.php new file mode 100644 index 000000000..89d77e583 --- /dev/null +++ b/engine/classes/ElggAutoP.php @@ -0,0 +1,309 @@ +_blocks = preg_split('@\\s+@', $this->_blocks); + $this->_descendList = preg_split('@\\s+@', $this->_descendList); + $this->_alterList = preg_split('@\\s+@', $this->_alterList); + $this->_inlines = preg_split('@\\s+@', $this->_inlines); + $this->_unique = md5(__FILE__); + } + + /** + * Intance of class for singleton pattern. + * @var ElggAutoP + */ + private static $instance; + + /** + * Singleton pattern. + * @return ElggAutoP + */ + public static function getInstance() { + $className = __CLASS__; + if (!(self::$instance instanceof $className)) { + self::$instance = new $className(); + } + return self::$instance; + } + + /** + * Create wrapper P and BR elements in HTML depending on newlines. Useful when + * users use newlines to signal line and paragraph breaks. In all cases output + * should be well-formed markup. + * + * In DIV, LI, TD, and TH elements, Ps are only added when their would be at + * least two of them. + * + * @param string $html snippet + * @return string|false output or false if parse error occurred + */ + public function process($html) { + // normalize whitespace + $html = str_replace(array("\r\n", "\r"), "\n", $html); + + // allows preserving entities untouched + $html = str_replace('&', $this->_unique . 'AMP', $html); + + $this->_doc = new DOMDocument(); + + // parse to DOM, suppressing loadHTML warnings + // http://www.php.net/manual/en/domdocument.loadhtml.php#95463 + libxml_use_internal_errors(true); + + if (!$this->_doc->loadHTML("{$html}" + . "")) { + return false; + } + + $this->_xpath = new DOMXPath($this->_doc); + // start processing recursively at the BODY element + $nodeList = $this->_xpath->query('//body[1]'); + $this->_addParagraphs($nodeList->item(0)); + + // serialize back to HTML + $html = $this->_doc->saveHTML(); + + // split AUTOPs into multiples at /\n\n+/ + $html = preg_replace('/(' . $this->_unique . 'NL){2,}/', '', $html); + $html = str_replace(array($this->_unique . 'BR', $this->_unique . 'NL', '
'), + '
', + $html); + $html = str_replace('
', '', $html); + + // re-parse so we can handle new AUTOP elements + + if (!$this->_doc->loadHTML($html)) { + return false; + } + // must re-create XPath object after DOM load + $this->_xpath = new DOMXPath($this->_doc); + + // strip AUTOPs that only have comments/whitespace + foreach ($this->_xpath->query('//autop') as $autop) { + $hasContent = false; + if (trim($autop->textContent) !== '') { + $hasContent = true; + } else { + foreach ($autop->childNodes as $node) { + if ($node->nodeType === XML_ELEMENT_NODE) { + $hasContent = true; + break; + } + } + } + if (!$hasContent) { + // strip w/ preg_replace later (faster than moving nodes out) + $autop->setAttribute("r", "1"); + } + } + + // remove a single AUTOP inside certain elements + foreach ($this->_xpath->query('//div') as $el) { + $autops = $this->_xpath->query('./autop', $el); + if ($autops->length === 1) { + // strip w/ preg_replace later (faster than moving nodes out) + $autops->item(0)->setAttribute("r", "1"); + } + } + + $html = $this->_doc->saveHTML(); + + // trim to the contents of BODY + $bodyStart = strpos($html, ''); + $bodyEnd = strpos($html, '', $bodyStart + 6); + $html = substr($html, $bodyStart + 6, $bodyEnd - $bodyStart - 6); + + // strip AUTOPs that should be removed + $html = preg_replace('@(.*?)@', '\\1', $html); + + // commit to converting AUTOPs to Ps + $html = str_replace('', "\n

", $html); + $html = str_replace('', "

\n", $html); + + $html = str_replace('
', '
', $html); + $html = str_replace($this->_unique . 'AMP', '&', $html); + return $html; + } + + /** + * Add P and BR elements as necessary + * + * @param DOMElement $el + */ + protected function _addParagraphs(DOMElement $el) { + // no need to recurse, just queue up + $elsToProcess = array($el); + $inlinesToProcess = array(); + while ($el = array_shift($elsToProcess)) { + // if true, we can alter all child nodes, if not, we'll just call + // _addParagraphs on each element in the descendInto list + $alterInline = in_array($el->nodeName, $this->_alterList); + + // inside affected elements, we want to trim leading whitespace from + // the first text node + $ltrimFirstTextNode = true; + + // should we open a new AUTOP element to move inline elements into? + $openP = true; + $autop = null; + + // after BR, ignore a newline + $isFollowingBr = false; + + $node = $el->firstChild; + while (null !== $node) { + if ($alterInline) { + if ($openP) { + $openP = false; + // create a P to move inline content into (this may be removed later) + $autop = $el->insertBefore($this->_doc->createElement('autop'), $node); + } + } + + $isElement = ($node->nodeType === XML_ELEMENT_NODE); + if ($isElement) { + $elName = $node->nodeName; + } + $isBlock = ($isElement && in_array($elName, $this->_blocks)); + + if ($alterInline) { + $isInline = $isElement && ! $isBlock; + $isText = ($node->nodeType === XML_TEXT_NODE); + $isLastInline = (! $node->nextSibling + || ($node->nextSibling->nodeType === XML_ELEMENT_NODE + && in_array($node->nextSibling->nodeName, $this->_blocks))); + if ($isElement) { + $isFollowingBr = ($node->nodeName === 'br'); + } + + if ($isText) { + $nodeText = $node->nodeValue; + if ($ltrimFirstTextNode) { + $nodeText = ltrim($nodeText); + $ltrimFirstTextNode = false; + } + if ($isFollowingBr && preg_match('@^[ \\t]*\\n[ \\t]*@', $nodeText, $m)) { + // if a user ends a line with
, don't add a second BR + $nodeText = substr($nodeText, strlen($m[0])); + } + if ($isLastInline) { + $nodeText = rtrim($nodeText); + } + $nodeText = str_replace("\n", $this->_unique . 'NL', $nodeText); + $tmpNode = $node; + $node = $node->nextSibling; // move loop to next node + + // alter node in place, then move into AUTOP + $tmpNode->nodeValue = $nodeText; + $autop->appendChild($tmpNode); + + continue; + } + } + if ($isBlock || ! $node->nextSibling) { + if ($isBlock) { + if (in_array($node->nodeName, $this->_descendList)) { + $elsToProcess[] = $node; + //$this->_addParagraphs($node); + } + } + $openP = true; + $ltrimFirstTextNode = true; + } + if ($alterInline) { + if (! $isBlock) { + $tmpNode = $node; + if ($isElement && false !== strpos($tmpNode->textContent, "\n")) { + $inlinesToProcess[] = $tmpNode; + } + $node = $node->nextSibling; + $autop->appendChild($tmpNode); + continue; + } + } + + $node = $node->nextSibling; + } + } + + // handle inline nodes + // no need to recurse, just queue up + while ($el = array_shift($inlinesToProcess)) { + $ignoreLeadingNewline = false; + foreach ($el->childNodes as $node) { + if ($node->nodeType === XML_ELEMENT_NODE) { + if ($node->nodeValue === 'BR') { + $ignoreLeadingNewline = true; + } else { + $ignoreLeadingNewline = false; + if (false !== strpos($node->textContent, "\n")) { + $inlinesToProcess[] = $node; + } + } + continue; + } elseif ($node->nodeType === XML_TEXT_NODE) { + $text = $node->nodeValue; + if ($text[0] === "\n" && $ignoreLeadingNewline) { + $text = substr($text, 1); + $ignoreLeadingNewline = false; + } + $node->nodeValue = str_replace("\n", $this->_unique . 'BR', $text); + } + } + } + } +} diff --git a/engine/classes/ElggAutop.php b/engine/classes/ElggAutop.php deleted file mode 100644 index fa0c34225..000000000 --- a/engine/classes/ElggAutop.php +++ /dev/null @@ -1,315 +0,0 @@ - - * @license http://www.opensource.org/licenses/mit-license.php MIT License - */ -class ElggAutop { - - public $encoding = 'UTF-8'; - - /** - * @var DOMDocument - */ - protected $_doc = null; - - /** - * @var DOMXPath - */ - protected $_xpath = null; - - protected $_blocks = 'address article area aside blockquote caption col colgroup dd - details div dl dt fieldset figure figcaption footer form h1 h2 h3 h4 h5 h6 header - hr hgroup legend map math menu nav noscript p pre section select style summary - table tbody td tfoot th thead tr ul ol option li'; - - /** - * @var array - */ - protected $_inlines = 'a abbr audio b button canvas caption cite code command datalist - del dfn em embed i iframe img input ins kbd keygen label map mark meter object - output progress q rp rt ruby s samp script select small source span strong style - sub sup textarea time var video wbr'; - - /** - * Descend into these elements to add Ps - * - * @var array - */ - protected $_descendList = 'article aside blockquote body details div footer form - header section'; - - /** - * Add Ps inside these elements - * - * @var array - */ - protected $_alterList = 'article aside blockquote body details div footer header - section'; - - protected $_unique = ''; - - public function __construct() - { - $this->_blocks = preg_split('@\\s+@', $this->_blocks); - $this->_descendList = preg_split('@\\s+@', $this->_descendList); - $this->_alterList = preg_split('@\\s+@', $this->_alterList); - $this->_inlines = preg_split('@\\s+@', $this->_inlines); - $this->_unique = md5(__FILE__); - } - - /** - * Intance of class for singleton pattern. - * @var ElggAutop - */ - private static $instance; - - /** - * Singleton pattern. - * @return ElggAutop - */ - public static function getInstance() { - $className = __CLASS__; - if (!(self::$instance instanceof $className)) { - self::$instance = new $className(); - } - return self::$instance; - } - - /** - * Create wrapper P and BR elements in HTML depending on newlines. Useful when - * users use newlines to signal line and paragraph breaks. In all cases output - * should be well-formed markup. - * - * In DIV, LI, TD, and TH elements, Ps are only added when their would be at - * least two of them. - * - * @param string $html snippet - * @return string|false output or false if parse error occurred - */ - public function process($html) - { - // normalize whitespace - $html = str_replace(array("\r\n", "\r"), "\n", $html); - - // allows preserving entities untouched - $html = str_replace('&', $this->_unique . 'AMP', $html); - - $this->_doc = new DOMDocument(); - - // parse to DOM, suppressing loadHTML warnings - // http://www.php.net/manual/en/domdocument.loadhtml.php#95463 - libxml_use_internal_errors(true); - if (! @$this->_doc->loadHTML("{$html}" - . "")) { - return false; - } - - $this->_xpath = new DOMXPath($this->_doc); - // start processing recursively at the BODY element - $nodeList = $this->_xpath->query('//body[1]'); - $this->_addParagraphs($nodeList->item(0)); - - // serialize back to HTML - $html = $this->_doc->saveHTML(); - - // split AUTOPs into multiples at /\n\n+/ - $html = preg_replace('/(' . $this->_unique . 'NL){2,}/', '
', $html); - $html = str_replace(array($this->_unique . 'BR', $this->_unique . 'NL', '
'), - '
', - $html); - $html = str_replace('
', '', $html); - - // re-parse so we can handle new AUTOP elements - - if (! @$this->_doc->loadHTML($html)) { - return false; - } - // must re-create XPath object after DOM load - $this->_xpath = new DOMXPath($this->_doc); - - // strip AUTOPs that only have comments/whitespace - foreach ($this->_xpath->query('//autop') as $autop) { - $hasContent = false; - if (trim($autop->textContent) !== '') { - $hasContent = true; - } else { - foreach ($autop->childNodes as $node) { - if ($node->nodeType === XML_ELEMENT_NODE) { - $hasContent = true; - break; - } - } - } - if (! $hasContent) { - // strip w/ preg_replace later (faster than moving nodes out) - $autop->setAttribute("r", "1"); - } - } - - // remove a single AUTOP inside certain elements - - foreach ($this->_xpath->query('//div') as $el) { - $autops = $this->_xpath->query('./autop', $el); - if ($autops->length === 1) { - // strip w/ preg_replace later (faster than moving nodes out) - $autops->item(0)->setAttribute("r", "1"); - } - } - - $html = $this->_doc->saveHTML(); - - // trim to the contents of BODY - $bodyStart = strpos($html, ''); - $bodyEnd = strpos($html, '', $bodyStart + 6); - $html = substr($html, $bodyStart + 6, $bodyEnd - $bodyStart - 6); - - // strip AUTOPs that should be removed - $html = preg_replace('@(.*?)@', '\\1', $html); - - // commit to converting AUTOPs to Ps - $html = str_replace('', "\n

", $html); - $html = str_replace('', "

\n", $html); - - $html = str_replace('
', '
', $html); - $html = str_replace($this->_unique . 'AMP', '&', $html); - return $html; - } - - /** - * Add P and BR elements as necessary - * - * @param DOMElement $el - */ - protected function _addParagraphs(DOMElement $el) - { - // no need to recurse, just queue up - $elsToProcess = array($el); - $inlinesToProcess = array(); - while ($el = array_shift($elsToProcess)) { - // if true, we can alter all child nodes, if not, we'll just call - // _addParagraphs on each element in the descendInto list - $alterInline = in_array($el->nodeName, $this->_alterList); - - // inside affected elements, we want to trim leading whitespace from - // the first text node - $ltrimFirstTextNode = true; - - // should we open a new AUTOP element to move inline elements into? - $openP = true; - $autop = null; - - // after BR, ignore a newline - $isFollowingBr = false; - - $node = $el->firstChild; - while (null !== $node) { - if ($alterInline) { - if ($openP) { - $openP = false; - // create a P to move inline content into (this may be removed later) - $autop = $el->insertBefore($this->_doc->createElement('autop'), $node); - } - } - - $isElement = ($node->nodeType === XML_ELEMENT_NODE); - if ($isElement) { - $elName = $node->nodeName; - } - $isBlock = ($isElement && in_array($elName, $this->_blocks)); - - if ($alterInline) { - $isInline = $isElement && ! $isBlock; - $isText = ($node->nodeType === XML_TEXT_NODE); - $isLastInline = (! $node->nextSibling - || ($node->nextSibling->nodeType === XML_ELEMENT_NODE - && in_array($node->nextSibling->nodeName, $this->_blocks))); - if ($isElement) { - $isFollowingBr = ($node->nodeName === 'br'); - } - - if ($isText) { - $nodeText = $node->nodeValue; - if ($ltrimFirstTextNode) { - $nodeText = ltrim($nodeText); - $ltrimFirstTextNode = false; - } - if ($isFollowingBr && preg_match('@^[ \\t]*\\n[ \\t]*@', $nodeText, $m)) { - // if a user ends a line with
, don't add a second BR - $nodeText = substr($nodeText, strlen($m[0])); - } - if ($isLastInline) { - $nodeText = rtrim($nodeText); - } - $nodeText = str_replace("\n", $this->_unique . 'NL', $nodeText); - $tmpNode = $node; - $node = $node->nextSibling; // move loop to next node - - // alter node in place, then move into AUTOP - $tmpNode->nodeValue = $nodeText; - $autop->appendChild($tmpNode); - - continue; - } - } - if ($isBlock || ! $node->nextSibling) { - if ($isBlock) { - if (in_array($node->nodeName, $this->_descendList)) { - $elsToProcess[] = $node; - //$this->_addParagraphs($node); - } - } - $openP = true; - $ltrimFirstTextNode = true; - } - if ($alterInline) { - if (! $isBlock) { - $tmpNode = $node; - if ($isElement && false !== strpos($tmpNode->textContent, "\n")) { - $inlinesToProcess[] = $tmpNode; - } - $node = $node->nextSibling; - $autop->appendChild($tmpNode); - continue; - } - } - - $node = $node->nextSibling; - } - } - - // handle inline nodes - // no need to recurse, just queue up - while ($el = array_shift($inlinesToProcess)) { - $ignoreLeadingNewline = false; - foreach ($el->childNodes as $node) { - if ($node->nodeType === XML_ELEMENT_NODE) { - if ($node->nodeValue === 'BR') { - $ignoreLeadingNewline = true; - } else { - $ignoreLeadingNewline = false; - if (false !== strpos($node->textContent, "\n")) { - $inlinesToProcess[] = $node; - } - } - continue; - } elseif ($node->nodeType === XML_TEXT_NODE) { - $text = $node->nodeValue; - if ($text[0] === "\n" && $ignoreLeadingNewline) { - $text = substr($text, 1); - $ignoreLeadingNewline = false; - } - $node->nodeValue = str_replace("\n", $this->_unique . 'BR', $text); - } - } - } - } -} -- cgit v1.2.3