aboutsummaryrefslogtreecommitdiff
path: root/src/SemanticScuttle/UrlHelper.php
blob: 5417b9b4c8b8c672ccca6acd691bed14407eee0a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
<?php
/**
 * SemanticScuttle - your social bookmark manager.
 *
 * PHP version 5.
 *
 * @category Bookmarking
 * @package  SemanticScuttle
 * @author   Christian Weiske <cweiske@cweiske.de>
 * @license  GPL http://www.gnu.org/licenses/gpl.html
 * @link     http://sourceforge.net/projects/semanticscuttle
 */

/**
 * Work with URLs
 *
 * @category Bookmarking
 * @package  SemanticScuttle
 * @author   Christian Weiske <cweiske@cweiske.de>
 * @license  GPL http://www.gnu.org/licenses/gpl.html
 * @link     http://sourceforge.net/projects/semanticscuttle
 */
class SemanticScuttle_UrlHelper
{
    function getTitle($url)
    {
        $fd = @fopen($url, 'r');
        $title = '';
        if ($fd) {
            $html = fread($fd, 1750);
            fclose($fd);

            // Get title from title tag
            preg_match_all('/<title[^>]*>(.*)<\/title>/si', $html, $matches);
            $title = $matches[1][0];

            $encoding = 'utf-8';
            // Get encoding from charset attribute
            preg_match_all('/<meta.*charset=([^;"]*)">/i', $html, $matches);
            if (isset($matches[1][0])) {
                $encoding = strtoupper($matches[1][0]);
            }

            // Convert to UTF-8 from the original encoding
            if (function_exists("mb_convert_encoding")) {
                $title = @mb_convert_encoding($title, 'UTF-8', $encoding);
            }

            $title = trim($title);
        }

        if (utf8_strlen($title) > 0) {
            $title = html_entity_decode($title, ENT_QUOTES, 'UTF-8');
            return $title;
        } else {
            // No title, so return filename
            $uriparts = explode('/', $url);
            $filename = end($uriparts);
            unset($uriparts);

            return $filename;
        }
    }
}
?>