From 833537692e62db2a7dea1c9eb74b79c5311fc39b Mon Sep 17 00:00:00 2001 From: Christian Weiske Date: Mon, 9 May 2016 21:39:18 +0200 Subject: Move URL title method to dedicated class --- src/SemanticScuttle/UrlHelper.php | 65 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 src/SemanticScuttle/UrlHelper.php (limited to 'src') diff --git a/src/SemanticScuttle/UrlHelper.php b/src/SemanticScuttle/UrlHelper.php new file mode 100644 index 0000000..5417b9b --- /dev/null +++ b/src/SemanticScuttle/UrlHelper.php @@ -0,0 +1,65 @@ + + * @license GPL http://www.gnu.org/licenses/gpl.html + * @link http://sourceforge.net/projects/semanticscuttle + */ + +/** + * Work with URLs + * + * @category Bookmarking + * @package SemanticScuttle + * @author Christian Weiske + * @license GPL http://www.gnu.org/licenses/gpl.html + * @link http://sourceforge.net/projects/semanticscuttle + */ +class SemanticScuttle_UrlHelper +{ + function getTitle($url) + { + $fd = @fopen($url, 'r'); + $title = ''; + if ($fd) { + $html = fread($fd, 1750); + fclose($fd); + + // Get title from title tag + preg_match_all('/]*>(.*)<\/title>/si', $html, $matches); + $title = $matches[1][0]; + + $encoding = 'utf-8'; + // Get encoding from charset attribute + preg_match_all('//i', $html, $matches); + if (isset($matches[1][0])) { + $encoding = strtoupper($matches[1][0]); + } + + // Convert to UTF-8 from the original encoding + if (function_exists("mb_convert_encoding")) { + $title = @mb_convert_encoding($title, 'UTF-8', $encoding); + } + + $title = trim($title); + } + + if (utf8_strlen($title) > 0) { + $title = html_entity_decode($title, ENT_QUOTES, 'UTF-8'); + return $title; + } else { + // No title, so return filename + $uriparts = explode('/', $url); + $filename = end($uriparts); + unset($uriparts); + + return $filename; + } + } +} +?> -- cgit v1.2.3