diff options
| author | sembrestels <sembrestels@riseup.net> | 2011-10-13 15:23:11 +0200 | 
|---|---|---|
| committer | sembrestels <sembrestels@riseup.net> | 2011-10-13 15:23:11 +0200 | 
| commit | 74bd6999c5e5c23ebbf90dbb6bdaabbddd7594cf (patch) | |
| tree | 834c120d692be288f261bcae169eedd3d6b31d74 /vendors/dokuwiki/lib/exe/indexer.php | |
| parent | f8be8643f0faadb2c0ce87d553b7b9d569af5afd (diff) | |
| download | elgg-74bd6999c5e5c23ebbf90dbb6bdaabbddd7594cf.tar.gz elgg-74bd6999c5e5c23ebbf90dbb6bdaabbddd7594cf.tar.bz2 | |
Rename lib/dokuwiki to vendors/dokuwiki
Diffstat (limited to 'vendors/dokuwiki/lib/exe/indexer.php')
| -rw-r--r-- | vendors/dokuwiki/lib/exe/indexer.php | 375 | 
1 files changed, 375 insertions, 0 deletions
| diff --git a/vendors/dokuwiki/lib/exe/indexer.php b/vendors/dokuwiki/lib/exe/indexer.php new file mode 100644 index 000000000..872f6b5be --- /dev/null +++ b/vendors/dokuwiki/lib/exe/indexer.php @@ -0,0 +1,375 @@ +<?php +/** + * DokuWiki indexer + * + * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html) + * @author     Andreas Gohr <andi@splitbrain.org> + */ +if(!defined('DOKU_INC')) define('DOKU_INC',dirname(__FILE__).'/../../'); +define('DOKU_DISABLE_GZIP_OUTPUT',1); +require_once(DOKU_INC.'inc/init.php'); +require_once(DOKU_INC.'inc/auth.php'); +require_once(DOKU_INC.'inc/events.php'); +session_write_close();  //close session +if(!defined('NL')) define('NL',"\n"); +global $ID; +// Version tag used to force rebuild on upgrade +define('INDEXER_VERSION', 2); + +// keep running after browser closes connection +@ignore_user_abort(true); + +// check if user abort worked, if yes send output early +$defer = !@ignore_user_abort() || $conf['broken_iua']; +if(!$defer){ +    sendGIF(); // send gif +} + +$ID = cleanID($_REQUEST['id']); + +// Catch any possible output (e.g. errors) +if(!isset($_REQUEST['debug'])) ob_start(); + +// run one of the jobs +$tmp = array(); // No event data +$evt = new Doku_Event('INDEXER_TASKS_RUN', $tmp); +if ($evt->advise_before()) { +  runIndexer() or +  metaUpdate() or +  runSitemapper() or +  runTrimRecentChanges() or +  runTrimRecentChanges(true) or +  $evt->advise_after(); +} +if($defer) sendGIF(); + +if(!isset($_REQUEST['debug'])) ob_end_clean(); +exit; + +// -------------------------------------------------------------------- + +/** + * Trims the recent changes cache (or imports the old changelog) as needed. + * + * @param media_changes If the media changelog shall be trimmed instead of + * the page changelog + * + * @author Ben Coburn <btcoburn@silicodon.net> + */ +function runTrimRecentChanges($media_changes = false) { +    global $conf; + +    $fn = ($media_changes ? $conf['media_changelog'] : $conf['changelog']); + +    // Trim the Recent Changes +    // Trims the recent changes cache to the last $conf['changes_days'] recent +    // changes or $conf['recent'] items, which ever is larger. +    // The trimming is only done once a day. +    if (@file_exists($fn) && +        (@filemtime($fn.'.trimmed')+86400)<time() && +        !@file_exists($fn.'_tmp')) { +            @touch($fn.'.trimmed'); +            io_lock($fn); +            $lines = file($fn); +            if (count($lines)<=$conf['recent']) { +                // nothing to trim +                io_unlock($fn); +                return false; +            } + +            io_saveFile($fn.'_tmp', '');          // presave tmp as 2nd lock +            $trim_time = time() - $conf['recent_days']*86400; +            $out_lines = array(); + +            for ($i=0; $i<count($lines); $i++) { +              $log = parseChangelogLine($lines[$i]); +              if ($log === false) continue;                      // discard junk +              if ($log['date'] < $trim_time) { +                $old_lines[$log['date'].".$i"] = $lines[$i];     // keep old lines for now (append .$i to prevent key collisions) +              } else { +                $out_lines[$log['date'].".$i"] = $lines[$i];     // definitely keep these lines +              } +            } + +            if (count($lines)==count($out_lines)) { +              // nothing to trim +              @unlink($fn.'_tmp'); +              io_unlock($fn); +              return false; +            } + +            // sort the final result, it shouldn't be necessary, +            //   however the extra robustness in making the changelog cache self-correcting is worth it +            ksort($out_lines); +            $extra = $conf['recent'] - count($out_lines);        // do we need extra lines do bring us up to minimum +            if ($extra > 0) { +              ksort($old_lines); +              $out_lines = array_merge(array_slice($old_lines,-$extra),$out_lines); +            } + +            // save trimmed changelog +            io_saveFile($fn.'_tmp', implode('', $out_lines)); +            @unlink($fn); +            if (!rename($fn.'_tmp', $fn)) { +                // rename failed so try another way... +                io_unlock($fn); +                io_saveFile($fn, implode('', $out_lines)); +                @unlink($fn.'_tmp'); +            } else { +                io_unlock($fn); +            } +            return true; +    } + +    // nothing done +    return false; +} + +/** + * Runs the indexer for the current page + * + * @author Andreas Gohr <andi@splitbrain.org> + */ +function runIndexer(){ +    global $ID; +    global $conf; +    print "runIndexer(): started".NL; + +    // Move index files (if needed) +    // Uses the importoldindex plugin to upgrade the index automatically. +    // FIXME: Remove this from runIndexer when it is no longer needed. +    if (@file_exists($conf['cachedir'].'/page.idx') && +        (!@file_exists($conf['indexdir'].'/page.idx') || +         !filesize($conf['indexdir'].'/page.idx'))  && +        !@file_exists($conf['indexdir'].'/index_importing')) { +        echo "trigger TEMPORARY_INDEX_UPGRADE_EVENT\n"; +        $tmp = array(); // no event data +        trigger_event('TEMPORARY_INDEX_UPGRADE_EVENT', $tmp); +    } + +    if(!$ID) return false; + +    // check if indexing needed +    $idxtag = metaFN($ID,'.indexed'); +    if(@file_exists($idxtag)){ +        if(io_readFile($idxtag) >= INDEXER_VERSION){ +            $last = @filemtime($idxtag); +            if($last > @filemtime(wikiFN($ID))){ +                print "runIndexer(): index for $ID up to date".NL; +                return false; +            } +        } +    } + +    // try to aquire a lock +    $lock = $conf['lockdir'].'/_indexer.lock'; +    while(!@mkdir($lock,$conf['dmode'])){ +        usleep(50); +        if(time()-@filemtime($lock) > 60*5){ +            // looks like a stale lock - remove it +            @rmdir($lock); +            print "runIndexer(): stale lock removed".NL; +        }else{ +            print "runIndexer(): indexer locked".NL; +            return false; +        } +    } +    if($conf['dperm']) chmod($lock, $conf['dperm']); + +    require_once(DOKU_INC.'inc/indexer.php'); + +    // upgrade to version 2 +    if (!@file_exists($conf['indexdir'].'/pageword.idx')) +        idx_upgradePageWords(); + +    // do the work +    idx_addPage($ID); + +    // we're finished - save and free lock +    io_saveFile(metaFN($ID,'.indexed'),INDEXER_VERSION); +    @rmdir($lock); +    print "runIndexer(): finished".NL; +    return true; +} + +/** + * Will render the metadata for the page if not exists yet + * + * This makes sure pages which are created from outside DokuWiki will + * gain their data when viewed for the first time. + */ +function metaUpdate(){ +    global $ID; +    print "metaUpdate(): started".NL; + +    if(!$ID) return false; +    $file = metaFN($ID, '.meta'); +    echo "meta file: $file".NL; + +    // rendering needed? +    if (@file_exists($file)) return false; +    if (!@file_exists(wikiFN($ID))) return false; + +    require_once(DOKU_INC.'inc/common.php'); +    require_once(DOKU_INC.'inc/parserutils.php'); +    global $conf; + + +    // gather some additional info from changelog +    $info = io_grep($conf['changelog'], +                    '/^(\d+)\t(\d+\.\d+\.\d+\.\d+)\t'.preg_quote($ID,'/').'\t([^\t]+)\t([^\t\n]+)/', +                    0,true); + +    $meta = array(); +    if(!empty($info)){ +        $meta['date']['created'] = $info[0][1]; +        foreach($info as $item){ +            if($item[4] != '*'){ +                $meta['date']['modified'] = $item[1]; +                if($item[3]){ +                    $meta['contributor'][$item[3]] = $item[3]; +                } +            } +        } +    } + +    $meta = p_render_metadata($ID, $meta); +    io_saveFile($file, serialize($meta)); + +    echo "metaUpdate(): finished".NL; +    return true; +} + +/** + * Builds a Google Sitemap of all public pages known to the indexer + * + * The map is placed in the root directory named sitemap.xml.gz - This + * file needs to be writable! + * + * @author Andreas Gohr + * @link   https://www.google.com/webmasters/sitemaps/docs/en/about.html + */ +function runSitemapper(){ +    global $conf; +    print "runSitemapper(): started".NL; +    if(!$conf['sitemap']) return false; + +    if($conf['compression'] == 'bz2' || $conf['compression'] == 'gz'){ +        $sitemap = 'sitemap.xml.gz'; +    }else{ +        $sitemap = 'sitemap.xml'; +    } +    print "runSitemapper(): using $sitemap".NL; + +    if(@file_exists(DOKU_INC.$sitemap)){ +        if(!is_writable(DOKU_INC.$sitemap)) return false; +    }else{ +        if(!is_writable(DOKU_INC)) return false; +    } + +    if(@filesize(DOKU_INC.$sitemap) && +       @filemtime(DOKU_INC.$sitemap) > (time()-($conf['sitemap']*60*60*24))){ +       print 'runSitemapper(): Sitemap up to date'.NL; +       return false; +    } + +    $pages = file($conf['indexdir'].'/page.idx'); +    print 'runSitemapper(): creating sitemap using '.count($pages).' pages'.NL; + +    // build the sitemap +    ob_start(); +    print '<?xml version="1.0" encoding="UTF-8"?>'.NL; +    print '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'.NL; +    foreach($pages as $id){ +        $id = trim($id); +        $file = wikiFN($id); + +        //skip hidden, non existing and restricted files +        if(isHiddenPage($id)) continue; +        $date = @filemtime($file); +        if(!$date) continue; +        if(auth_aclcheck($id,'','') < AUTH_READ) continue; + +        print '  <url>'.NL; +        print '    <loc>'.wl($id,'',true).'</loc>'.NL; +        print '    <lastmod>'.date_iso8601($date).'</lastmod>'.NL; +        print '  </url>'.NL; +    } +    print '</urlset>'.NL; +    $data = ob_get_contents(); +    ob_end_clean(); + +    //save the new sitemap +    io_saveFile(DOKU_INC.$sitemap,$data); + +    //ping search engines... +    $http = new DokuHTTPClient(); +    $http->timeout = 8; + +    //ping google +    print 'runSitemapper(): pinging google'.NL; +    $url  = 'http://www.google.com/webmasters/sitemaps/ping?sitemap='; +    $url .= urlencode(DOKU_URL.$sitemap); +    $resp = $http->get($url); +    if($http->error) print 'runSitemapper(): '.$http->error.NL; +    print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL; + +    //ping yahoo +    print 'runSitemapper(): pinging yahoo'.NL; +    $url  = 'http://search.yahooapis.com/SiteExplorerService/V1/updateNotification?appid=dokuwiki&url='; +    $url .= urlencode(DOKU_URL.$sitemap); +    $resp = $http->get($url); +    if($http->error) print 'runSitemapper(): '.$http->error.NL; +    print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL; + +    //ping microsoft +    print 'runSitemapper(): pinging microsoft'.NL; +    $url  = 'http://www.bing.com/webmaster/ping.aspx?siteMap='; +    $url .= urlencode(DOKU_URL.$sitemap); +    $resp = $http->get($url); +    if($http->error) print 'runSitemapper(): '.$http->error.NL; +    print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL; + +    print 'runSitemapper(): finished'.NL; +    return true; +} + +/** + * Formats a timestamp as ISO 8601 date + * + * @author <ungu at terong dot com> + * @link http://www.php.net/manual/en/function.date.php#54072 + */ +function date_iso8601($int_date) { +   //$int_date: current date in UNIX timestamp +   $date_mod = date('Y-m-d\TH:i:s', $int_date); +   $pre_timezone = date('O', $int_date); +   $time_zone = substr($pre_timezone, 0, 3).":".substr($pre_timezone, 3, 2); +   $date_mod .= $time_zone; +   return $date_mod; +} + +/** + * Just send a 1x1 pixel blank gif to the browser + * + * @author Andreas Gohr <andi@splitbrain.org> + * @author Harry Fuecks <fuecks@gmail.com> + */ +function sendGIF(){ +    if(isset($_REQUEST['debug'])){ +        header('Content-Type: text/plain'); +        return; +    } +    $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7'); +    header('Content-Type: image/gif'); +    header('Content-Length: '.strlen($img)); +    header('Connection: Close'); +    print $img; +    flush(); +    // Browser should drop connection after this +    // Thinks it's got the whole image +} + +//Setup VIM: ex: et ts=4 enc=utf-8 : +// No trailing PHP closing tag - no output please! +// See Note at http://www.php.net/manual/en/language.basic-syntax.instruction-separation.php | 
