aboutsummaryrefslogtreecommitdiff
path: root/models/openid-php-openid-782224d/admin/phpaliases.py
blob: c4ce21684b262a6ecd1bc0a503d4cf0e94541cd3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/usr/bin/env python

"""This script searches files for functions that are just aliases in
PHP source code. This is not 100% reliable, so it should not be
automated, but it's useful to run once in a while to make sure that
all of the matches it finds are not really legitimate aliases.

Usage:

  parse_aliases.py <name of alias file> [PHP source code filename]...
"""

import sys

# Fetch this URL to get the file that is parsed into the aliases list
alias_url = 'http://www.zend.com/phpfunc/all_aliases.php'

header_tok = '<!-- END OF HEADER -->';
footer_tok = '<!-- FOOTER -->';

# Example line of the table that we parse:
# '<tr bgcolor="#EFEFFF"><td><a href="function.bzclose.php">bzclose</a></td><td><a href="http://lxr.php.net/source/php-src/ext/bz2/bz2.c#48">php-src/ext/bz2/bz2.c</a></td><td><a href="function.fclose.php">fclose</a></td></tr>'

import re

line_re = re.compile(r'''
\A

<tr\ bgcolor="[^">]+">

<td><a\ href="[^>"]+\.php">([^<>]+)</a></td>

<td><a\ href="[^">]+">[^<>]+</a></td>

<td>
(?:
    <a\ href="[^">]+\.php">
    ( [^<>]+ )
    </a>
|   ( [^<>]+ )
)
</td>

</tr>

\Z
''', re.VERBOSE)

def parseString(s):
    _, rest = s.split(header_tok, 1)
    body, _ = rest.split(footer_tok, 1)

    lines = body.split('\n')
    assert [s.strip() for s in lines[-2:]] == ['</table>', '']
    assert lines[0].strip().startswith('<table')
    del lines[0], lines[-2:]
    aliases = {}
    for line in lines:
        mo = line_re.match(line)
        assert mo, line
        alias, master1, master2 = mo.groups()
        if master1:
            master = master1
        else:
            assert master2
            master = master2
        aliases[alias] = master

    return aliases

def parseFile(f):
    return parseString(f.read())

def parseFileName(fn):
    return parseFile(file(fn, 'r'))

def parseURL(url):
    return parseFile(urllib2.urlopen(url))

def getAliasRE(aliases):
    return re.compile(r'(->|\$|)\s*\b(%s)\b' % ('|'.join(aliases.keys())))

def checkAliasesFile(alias_re, f):
    found = []
    line_num = 1
    for line in f:
        for mo in alias_re.finditer(line):
            if mo.group(1):
                continue
            alias = mo.group(2)
            found.append((line_num, alias))
        line_num += 1
    return found

def checkAliases(alias_re, filename):
    return checkAliasesFile(alias_re, file(filename, 'r'))

def checkAliasesFiles(alias_re, filenames):
    found = []
    for filename in filenames:
        file_found = checkAliases(alias_re, filename)
        found.extend([(filename, n, a) for (n, a) in file_found])
    return found

def dumpResults(aliases, found, out=sys.stdout):
    for filename, n, a in found:
        print >>out, "%s:%d %s -> %s" % (filename, n, a, aliases[a])

def main(alias_file, *filenames):
    aliases = parseFileName(alias_file)
    alias_re = getAliasRE(aliases)
    found = checkAliasesFiles(alias_re, filenames)
    dumpResults(aliases, found)
    return found

if __name__ == '__main__':
    found = main(*sys.argv[1:])
    if found:
        sys.exit(1)