diff options
Diffstat (limited to 'vendors/simpletest/test/parser_test.php')
| -rwxr-xr-x | vendors/simpletest/test/parser_test.php | 551 | 
1 files changed, 551 insertions, 0 deletions
diff --git a/vendors/simpletest/test/parser_test.php b/vendors/simpletest/test/parser_test.php new file mode 100755 index 000000000..83268d9e1 --- /dev/null +++ b/vendors/simpletest/test/parser_test.php @@ -0,0 +1,551 @@ +<?php +// $Id: parser_test.php 1608 2007-12-27 09:03:07Z pp11 $ +require_once(dirname(__FILE__) . '/../autorun.php'); +require_once(dirname(__FILE__) . '/../parser.php'); +Mock::generate('SimpleHtmlSaxParser'); +Mock::generate('SimpleSaxListener'); + +class TestOfParallelRegex extends UnitTestCase { +     +    function testNoPatterns() { +        $regex = &new ParallelRegex(false); +        $this->assertFalse($regex->match("Hello", $match)); +        $this->assertEqual($match, ""); +    } +     +    function testNoSubject() { +        $regex = &new ParallelRegex(false); +        $regex->addPattern(".*"); +        $this->assertTrue($regex->match("", $match)); +        $this->assertEqual($match, ""); +    } +     +    function testMatchAll() { +        $regex = &new ParallelRegex(false); +        $regex->addPattern(".*"); +        $this->assertTrue($regex->match("Hello", $match)); +        $this->assertEqual($match, "Hello"); +    } +     +    function testCaseSensitive() { +        $regex = &new ParallelRegex(true); +        $regex->addPattern("abc"); +        $this->assertTrue($regex->match("abcdef", $match)); +        $this->assertEqual($match, "abc"); +        $this->assertTrue($regex->match("AAABCabcdef", $match)); +        $this->assertEqual($match, "abc"); +    } +     +    function testCaseInsensitive() { +        $regex = &new ParallelRegex(false); +        $regex->addPattern("abc"); +        $this->assertTrue($regex->match("abcdef", $match)); +        $this->assertEqual($match, "abc"); +        $this->assertTrue($regex->match("AAABCabcdef", $match)); +        $this->assertEqual($match, "ABC"); +    } +     +    function testMatchMultiple() { +        $regex = &new ParallelRegex(true); +        $regex->addPattern("abc"); +        $regex->addPattern("ABC"); +        $this->assertTrue($regex->match("abcdef", $match)); +        $this->assertEqual($match, "abc"); +        $this->assertTrue($regex->match("AAABCabcdef", $match)); +        $this->assertEqual($match, "ABC"); +        $this->assertFalse($regex->match("Hello", $match)); +    } +     +    function testPatternLabels() { +        $regex = &new ParallelRegex(false); +        $regex->addPattern("abc", "letter"); +        $regex->addPattern("123", "number"); +        $this->assertIdentical($regex->match("abcdef", $match), "letter"); +        $this->assertEqual($match, "abc"); +        $this->assertIdentical($regex->match("0123456789", $match), "number"); +        $this->assertEqual($match, "123"); +    } +} + +class TestOfStateStack extends UnitTestCase { +     +    function testStartState() { +        $stack = &new SimpleStateStack("one"); +        $this->assertEqual($stack->getCurrent(), "one"); +    } +     +    function testExhaustion() { +        $stack = &new SimpleStateStack("one"); +        $this->assertFalse($stack->leave()); +    } +     +    function testStateMoves() { +        $stack = &new SimpleStateStack("one"); +        $stack->enter("two"); +        $this->assertEqual($stack->getCurrent(), "two"); +        $stack->enter("three"); +        $this->assertEqual($stack->getCurrent(), "three"); +        $this->assertTrue($stack->leave()); +        $this->assertEqual($stack->getCurrent(), "two"); +        $stack->enter("third"); +        $this->assertEqual($stack->getCurrent(), "third"); +        $this->assertTrue($stack->leave()); +        $this->assertTrue($stack->leave()); +        $this->assertEqual($stack->getCurrent(), "one"); +    } +} + +class TestParser { +     +    function accept() { +    } +     +    function a() { +    } +     +    function b() { +    } +} +Mock::generate('TestParser'); + +class TestOfLexer extends UnitTestCase { +     +    function testEmptyPage() { +        $handler = &new MockTestParser(); +        $handler->expectNever("accept"); +        $handler->setReturnValue("accept", true); +        $handler->expectNever("accept"); +        $handler->setReturnValue("accept", true); +        $lexer = &new SimpleLexer($handler); +        $lexer->addPattern("a+"); +        $this->assertTrue($lexer->parse("")); +    } +     +    function testSinglePattern() { +        $handler = &new MockTestParser(); +        $handler->expectArgumentsAt(0, "accept", array("aaa", LEXER_MATCHED)); +        $handler->expectArgumentsAt(1, "accept", array("x", LEXER_UNMATCHED)); +        $handler->expectArgumentsAt(2, "accept", array("a", LEXER_MATCHED)); +        $handler->expectArgumentsAt(3, "accept", array("yyy", LEXER_UNMATCHED)); +        $handler->expectArgumentsAt(4, "accept", array("a", LEXER_MATCHED)); +        $handler->expectArgumentsAt(5, "accept", array("x", LEXER_UNMATCHED)); +        $handler->expectArgumentsAt(6, "accept", array("aaa", LEXER_MATCHED)); +        $handler->expectArgumentsAt(7, "accept", array("z", LEXER_UNMATCHED)); +        $handler->expectCallCount("accept", 8); +        $handler->setReturnValue("accept", true); +        $lexer = &new SimpleLexer($handler); +        $lexer->addPattern("a+"); +        $this->assertTrue($lexer->parse("aaaxayyyaxaaaz")); +    } +     +    function testMultiplePattern() { +        $handler = &new MockTestParser(); +        $target = array("a", "b", "a", "bb", "x", "b", "a", "xxxxxx", "a", "x"); +        for ($i = 0; $i < count($target); $i++) { +            $handler->expectArgumentsAt($i, "accept", array($target[$i], '*')); +        } +        $handler->expectCallCount("accept", count($target)); +        $handler->setReturnValue("accept", true); +        $lexer = &new SimpleLexer($handler); +        $lexer->addPattern("a+"); +        $lexer->addPattern("b+"); +        $this->assertTrue($lexer->parse("ababbxbaxxxxxxax")); +    } +} + +class TestOfLexerModes extends UnitTestCase { +     +    function testIsolatedPattern() { +        $handler = &new MockTestParser(); +        $handler->expectArgumentsAt(0, "a", array("a", LEXER_MATCHED)); +        $handler->expectArgumentsAt(1, "a", array("b", LEXER_UNMATCHED)); +        $handler->expectArgumentsAt(2, "a", array("aa", LEXER_MATCHED)); +        $handler->expectArgumentsAt(3, "a", array("bxb", LEXER_UNMATCHED)); +        $handler->expectArgumentsAt(4, "a", array("aaa", LEXER_MATCHED)); +        $handler->expectArgumentsAt(5, "a", array("x", LEXER_UNMATCHED)); +        $handler->expectArgumentsAt(6, "a", array("aaaa", LEXER_MATCHED)); +        $handler->expectArgumentsAt(7, "a", array("x", LEXER_UNMATCHED)); +        $handler->expectCallCount("a", 8); +        $handler->setReturnValue("a", true); +        $lexer = &new SimpleLexer($handler, "a"); +        $lexer->addPattern("a+", "a"); +        $lexer->addPattern("b+", "b"); +        $this->assertTrue($lexer->parse("abaabxbaaaxaaaax")); +    } +     +    function testModeChange() { +        $handler = &new MockTestParser(); +        $handler->expectArgumentsAt(0, "a", array("a", LEXER_MATCHED)); +        $handler->expectArgumentsAt(1, "a", array("b", LEXER_UNMATCHED)); +        $handler->expectArgumentsAt(2, "a", array("aa", LEXER_MATCHED)); +        $handler->expectArgumentsAt(3, "a", array("b", LEXER_UNMATCHED)); +        $handler->expectArgumentsAt(4, "a", array("aaa", LEXER_MATCHED)); +        $handler->expectArgumentsAt(0, "b", array(":", LEXER_ENTER)); +        $handler->expectArgumentsAt(1, "b", array("a", LEXER_UNMATCHED)); +        $handler->expectArgumentsAt(2, "b", array("b", LEXER_MATCHED)); +        $handler->expectArgumentsAt(3, "b", array("a", LEXER_UNMATCHED)); +        $handler->expectArgumentsAt(4, "b", array("bb", LEXER_MATCHED)); +        $handler->expectArgumentsAt(5, "b", array("a", LEXER_UNMATCHED)); +        $handler->expectArgumentsAt(6, "b", array("bbb", LEXER_MATCHED)); +        $handler->expectArgumentsAt(7, "b", array("a", LEXER_UNMATCHED)); +        $handler->expectCallCount("a", 5); +        $handler->expectCallCount("b", 8); +        $handler->setReturnValue("a", true); +        $handler->setReturnValue("b", true); +        $lexer = &new SimpleLexer($handler, "a"); +        $lexer->addPattern("a+", "a"); +        $lexer->addEntryPattern(":", "a", "b"); +        $lexer->addPattern("b+", "b"); +        $this->assertTrue($lexer->parse("abaabaaa:ababbabbba")); +    } +     +    function testNesting() { +        $handler = &new MockTestParser(); +        $handler->setReturnValue("a", true); +        $handler->setReturnValue("b", true); +        $handler->expectArgumentsAt(0, "a", array("aa", LEXER_MATCHED)); +        $handler->expectArgumentsAt(1, "a", array("b", LEXER_UNMATCHED)); +        $handler->expectArgumentsAt(2, "a", array("aa", LEXER_MATCHED)); +        $handler->expectArgumentsAt(3, "a", array("b", LEXER_UNMATCHED)); +        $handler->expectArgumentsAt(0, "b", array("(", LEXER_ENTER)); +        $handler->expectArgumentsAt(1, "b", array("bb", LEXER_MATCHED)); +        $handler->expectArgumentsAt(2, "b", array("a", LEXER_UNMATCHED)); +        $handler->expectArgumentsAt(3, "b", array("bb", LEXER_MATCHED)); +        $handler->expectArgumentsAt(4, "b", array(")", LEXER_EXIT)); +        $handler->expectArgumentsAt(4, "a", array("aa", LEXER_MATCHED)); +        $handler->expectArgumentsAt(5, "a", array("b", LEXER_UNMATCHED)); +        $handler->expectCallCount("a", 6); +        $handler->expectCallCount("b", 5); +        $lexer = &new SimpleLexer($handler, "a"); +        $lexer->addPattern("a+", "a"); +        $lexer->addEntryPattern("(", "a", "b"); +        $lexer->addPattern("b+", "b"); +        $lexer->addExitPattern(")", "b"); +        $this->assertTrue($lexer->parse("aabaab(bbabb)aab")); +    } +     +    function testSingular() { +        $handler = &new MockTestParser(); +        $handler->setReturnValue("a", true); +        $handler->setReturnValue("b", true); +        $handler->expectArgumentsAt(0, "a", array("aa", LEXER_MATCHED)); +        $handler->expectArgumentsAt(1, "a", array("aa", LEXER_MATCHED)); +        $handler->expectArgumentsAt(2, "a", array("xx", LEXER_UNMATCHED)); +        $handler->expectArgumentsAt(3, "a", array("xx", LEXER_UNMATCHED)); +        $handler->expectArgumentsAt(0, "b", array("b", LEXER_SPECIAL)); +        $handler->expectArgumentsAt(1, "b", array("bbb", LEXER_SPECIAL)); +        $handler->expectCallCount("a", 4); +        $handler->expectCallCount("b", 2); +        $lexer = &new SimpleLexer($handler, "a"); +        $lexer->addPattern("a+", "a"); +        $lexer->addSpecialPattern("b+", "a", "b"); +        $this->assertTrue($lexer->parse("aabaaxxbbbxx")); +    } +     +    function testUnwindTooFar() { +        $handler = &new MockTestParser(); +        $handler->setReturnValue("a", true); +        $handler->expectArgumentsAt(0, "a", array("aa", LEXER_MATCHED)); +        $handler->expectArgumentsAt(1, "a", array(")", LEXER_EXIT)); +        $handler->expectCallCount("a", 2); +        $lexer = &new SimpleLexer($handler, "a"); +        $lexer->addPattern("a+", "a"); +        $lexer->addExitPattern(")", "a"); +        $this->assertFalse($lexer->parse("aa)aa")); +    } +} + +class TestOfLexerHandlers extends UnitTestCase { +     +    function testModeMapping() { +        $handler = &new MockTestParser(); +        $handler->setReturnValue("a", true); +        $handler->expectArgumentsAt(0, "a", array("aa", LEXER_MATCHED)); +        $handler->expectArgumentsAt(1, "a", array("(", LEXER_ENTER)); +        $handler->expectArgumentsAt(2, "a", array("bb", LEXER_MATCHED)); +        $handler->expectArgumentsAt(3, "a", array("a", LEXER_UNMATCHED)); +        $handler->expectArgumentsAt(4, "a", array("bb", LEXER_MATCHED)); +        $handler->expectArgumentsAt(5, "a", array(")", LEXER_EXIT)); +        $handler->expectArgumentsAt(6, "a", array("b", LEXER_UNMATCHED)); +        $handler->expectCallCount("a", 7); +        $lexer = &new SimpleLexer($handler, "mode_a"); +        $lexer->addPattern("a+", "mode_a"); +        $lexer->addEntryPattern("(", "mode_a", "mode_b"); +        $lexer->addPattern("b+", "mode_b"); +        $lexer->addExitPattern(")", "mode_b"); +        $lexer->mapHandler("mode_a", "a"); +        $lexer->mapHandler("mode_b", "a"); +        $this->assertTrue($lexer->parse("aa(bbabb)b")); +    } +} + +class TestOfSimpleHtmlLexer extends UnitTestCase { +     +    function &createParser() { +        $parser = &new MockSimpleHtmlSaxParser(); +        $parser->setReturnValue('acceptStartToken', true); +        $parser->setReturnValue('acceptEndToken', true); +        $parser->setReturnValue('acceptAttributeToken', true); +        $parser->setReturnValue('acceptEntityToken', true); +        $parser->setReturnValue('acceptTextToken', true); +        $parser->setReturnValue('ignore', true); +        return $parser; +    } +     +    function testNoContent() { +        $parser = &$this->createParser(); +        $parser->expectNever('acceptStartToken'); +        $parser->expectNever('acceptEndToken'); +        $parser->expectNever('acceptAttributeToken'); +        $parser->expectNever('acceptEntityToken'); +        $parser->expectNever('acceptTextToken'); +        $lexer = &new SimpleHtmlLexer($parser); +        $this->assertTrue($lexer->parse('')); +    } +     +    function testUninteresting() { +        $parser = &$this->createParser(); +        $parser->expectOnce('acceptTextToken', array('<html></html>', '*')); +        $lexer = &new SimpleHtmlLexer($parser); +        $this->assertTrue($lexer->parse('<html></html>')); +    } +     +    function testSkipCss() { +        $parser = &$this->createParser(); +        $parser->expectNever('acceptTextToken'); +        $parser->expectAtLeastOnce('ignore'); +        $lexer = &new SimpleHtmlLexer($parser); +        $this->assertTrue($lexer->parse("<style>Lot's of styles</style>")); +    } +     +    function testSkipJavaScript() { +        $parser = &$this->createParser(); +        $parser->expectNever('acceptTextToken'); +        $parser->expectAtLeastOnce('ignore'); +        $lexer = &new SimpleHtmlLexer($parser); +        $this->assertTrue($lexer->parse("<SCRIPT>Javascript code {';:^%^%£$'@\"*(}</SCRIPT>")); +    } +     +    function testSkipHtmlComments() { +        $parser = &$this->createParser(); +        $parser->expectNever('acceptTextToken'); +        $parser->expectAtLeastOnce('ignore'); +        $lexer = &new SimpleHtmlLexer($parser); +        $this->assertTrue($lexer->parse("<!-- <title>title</title><style>styles</style> -->")); +    } +     +    function testTagWithNoAttributes() { +        $parser = &$this->createParser(); +        $parser->expectAt(0, 'acceptStartToken', array('<title', '*')); +        $parser->expectAt(1, 'acceptStartToken', array('>', '*')); +        $parser->expectCallCount('acceptStartToken', 2); +        $parser->expectOnce('acceptTextToken', array('Hello', '*')); +        $parser->expectOnce('acceptEndToken', array('</title>', '*')); +        $lexer = &new SimpleHtmlLexer($parser); +        $this->assertTrue($lexer->parse('<title>Hello</title>')); +    } +     +    function testTagWithAttributes() { +        $parser = &$this->createParser(); +        $parser->expectOnce('acceptTextToken', array('label', '*')); +        $parser->expectAt(0, 'acceptStartToken', array('<a', '*')); +        $parser->expectAt(1, 'acceptStartToken', array('href', '*')); +        $parser->expectAt(2, 'acceptStartToken', array('>', '*')); +        $parser->expectCallCount('acceptStartToken', 3); +        $parser->expectAt(0, 'acceptAttributeToken', array('= "', '*')); +        $parser->expectAt(1, 'acceptAttributeToken', array('here.html', '*')); +        $parser->expectAt(2, 'acceptAttributeToken', array('"', '*')); +        $parser->expectCallCount('acceptAttributeToken', 3); +        $parser->expectOnce('acceptEndToken', array('</a>', '*')); +        $lexer = &new SimpleHtmlLexer($parser); +        $this->assertTrue($lexer->parse('<a href = "here.html">label</a>')); +    } +} + +class TestOfHtmlSaxParser extends UnitTestCase { +     +    function &createListener() { +        $listener = &new MockSimpleSaxListener(); +        $listener->setReturnValue('startElement', true); +        $listener->setReturnValue('addContent', true); +        $listener->setReturnValue('endElement', true); +        return $listener; +    } +     +    function testFramesetTag() { +        $listener = &$this->createListener(); +        $listener->expectOnce('startElement', array('frameset', array())); +        $listener->expectOnce('addContent', array('Frames')); +        $listener->expectOnce('endElement', array('frameset')); +        $parser = &new SimpleHtmlSaxParser($listener); +        $this->assertTrue($parser->parse('<frameset>Frames</frameset>')); +    } +     +    function testTagWithUnquotedAttributes() { +        $listener = &$this->createListener(); +        $listener->expectOnce( +                'startElement', +                array('input', array('name' => 'a.b.c', 'value' => 'd'))); +        $parser = &new SimpleHtmlSaxParser($listener); +        $this->assertTrue($parser->parse('<input name=a.b.c value = d>')); +    } +     +    function testTagInsideContent() { +        $listener = &$this->createListener(); +        $listener->expectOnce('startElement', array('a', array())); +        $listener->expectAt(0, 'addContent', array('<html>')); +        $listener->expectAt(1, 'addContent', array('</html>')); +        $parser = &new SimpleHtmlSaxParser($listener); +        $this->assertTrue($parser->parse('<html><a></a></html>')); +    } +     +    function testTagWithInternalContent() { +        $listener = &$this->createListener(); +        $listener->expectOnce('startElement', array('a', array())); +        $listener->expectOnce('addContent', array('label')); +        $listener->expectOnce('endElement', array('a')); +        $parser = &new SimpleHtmlSaxParser($listener); +        $this->assertTrue($parser->parse('<a>label</a>')); +    } +     +    function testLinkAddress() { +        $listener = &$this->createListener(); +        $listener->expectOnce('startElement', array('a', array('href' => 'here.html'))); +        $listener->expectOnce('addContent', array('label')); +        $listener->expectOnce('endElement', array('a')); +        $parser = &new SimpleHtmlSaxParser($listener); +        $this->assertTrue($parser->parse("<a href = 'here.html'>label</a>")); +    } +     +    function testEncodedAttribute() { +        $listener = &$this->createListener(); +        $listener->expectOnce('startElement', array('a', array('href' => 'here&there.html'))); +        $listener->expectOnce('addContent', array('label')); +        $listener->expectOnce('endElement', array('a')); +        $parser = &new SimpleHtmlSaxParser($listener); +        $this->assertTrue($parser->parse("<a href = 'here&there.html'>label</a>")); +    } +     +    function testTagWithId() { +        $listener = &$this->createListener(); +        $listener->expectOnce('startElement', array('a', array('id' => '0'))); +        $listener->expectOnce('addContent', array('label')); +        $listener->expectOnce('endElement', array('a')); +        $parser = &new SimpleHtmlSaxParser($listener); +        $this->assertTrue($parser->parse('<a id="0">label</a>')); +    } +      +    function testTagWithEmptyAttributes() { +        $listener = &$this->createListener(); +        $listener->expectOnce( +                'startElement', +                array('option', array('value' => '', 'selected' => ''))); +        $listener->expectOnce('addContent', array('label')); +        $listener->expectOnce('endElement', array('option')); +        $parser = &new SimpleHtmlSaxParser($listener); +        $this->assertTrue($parser->parse('<option value="" selected>label</option>')); +    } +    +    function testComplexTagWithLotsOfCaseVariations() { +        $listener = &$this->createListener(); +        $listener->expectOnce( +                'startElement', +                array('a', array('href' => 'here.html', 'style' => "'cool'"))); +        $listener->expectOnce('addContent', array('label')); +        $listener->expectOnce('endElement', array('a')); +        $parser = &new SimpleHtmlSaxParser($listener); +        $this->assertTrue($parser->parse('<A HREF = \'here.html\' Style="\'cool\'">label</A>')); +    } +     +    function testXhtmlSelfClosingTag() { +        $listener = &$this->createListener(); +        $listener->expectOnce( +                'startElement', +                array('input', array('type' => 'submit', 'name' => 'N', 'value' => 'V'))); +        $parser = &new SimpleHtmlSaxParser($listener); +        $this->assertTrue($parser->parse('<input type="submit" name="N" value="V" />')); +    } +     +    function testNestedFrameInFrameset() { +        $listener = &$this->createListener(); +        $listener->expectAt(0, 'startElement', array('frameset', array())); +        $listener->expectAt(1, 'startElement', array('frame', array('src' => 'frame.html'))); +        $listener->expectCallCount('startElement', 2); +        $listener->expectOnce('addContent', array('<noframes>Hello</noframes>')); +        $listener->expectOnce('endElement', array('frameset')); +        $parser = &new SimpleHtmlSaxParser($listener); +        $this->assertTrue($parser->parse( +                '<frameset><frame src="frame.html"><noframes>Hello</noframes></frameset>')); +    } +} + +class TestOfTextExtraction extends UnitTestCase { +     +	function testImageSuppressionWhileKeepingParagraphsAndAltText() { +        $this->assertEqual( +                SimpleHtmlSaxParser::normalise('<img src="foo.png" /><p>some text</p><img src="bar.png" alt="bar" />'), +                'some text bar'); +		 +	} + +    function testSpaceNormalisation() { +        $this->assertEqual( +                SimpleHtmlSaxParser::normalise("\nOne\tTwo   \nThree\t"), +                'One Two Three'); +    } +     +    function testMultilinesCommentSuppression() { +        $this->assertEqual( +                SimpleHtmlSaxParser::normalise('<!--\n Hello \n-->'), +                ''); +    } +     +    function testCommentSuppression() { +        $this->assertEqual( +                SimpleHtmlSaxParser::normalise('<!--Hello-->'), +                ''); +    } +     +    function testJavascriptSuppression() { +        $this->assertEqual( +                SimpleHtmlSaxParser::normalise('<script attribute="test">\nHello\n</script>'), +                ''); +        $this->assertEqual( +                SimpleHtmlSaxParser::normalise('<script attribute="test">Hello</script>'), +                ''); +        $this->assertEqual( +                SimpleHtmlSaxParser::normalise('<script>Hello</script>'), +                ''); +    } +     +    function testTagSuppression() { +        $this->assertEqual( +                SimpleHtmlSaxParser::normalise('<b>Hello</b>'), +                'Hello'); +    } +     +    function testAdjoiningTagSuppression() { +        $this->assertEqual( +                SimpleHtmlSaxParser::normalise('<b>Hello</b><em>Goodbye</em>'), +                'HelloGoodbye'); +    } +     +    function testExtractImageAltTextWithDifferentQuotes() { +        $this->assertEqual( +                SimpleHtmlSaxParser::normalise('<img alt="One"><img alt=\'Two\'><img alt=Three>'), +                'One Two Three'); +    } +     +    function testExtractImageAltTextMultipleTimes() { +        $this->assertEqual( +                SimpleHtmlSaxParser::normalise('<img alt="One"><img alt="Two"><img alt="Three">'), +                'One Two Three'); +    } +     +    function testHtmlEntityTranslation() { +        $this->assertEqual( +                SimpleHtmlSaxParser::normalise('<>"&''), +                '<>"&\''); +    } +} +?>
\ No newline at end of file  | 
