xref: /dokuwiki/lib/exe/indexer.php (revision 33225f5153fdf492cb988b63ff9a3d0ff92ec23d)
1<?php
2/**
3 * DokuWiki indexer
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author     Andreas Gohr <andi@splitbrain.org>
7 */
8if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../../').'/');
9require_once(DOKU_INC.'inc/init.php');
10require_once(DOKU_INC.'inc/auth.php');
11require_once(DOKU_INC.'inc/events.php');
12session_write_close();  //close session
13if(!defined('NL')) define('NL',"\n");
14
15// keep running after browser closes connection
16@ignore_user_abort(true);
17
18// send gif
19sendGIF();
20
21// Catch any possible output (e.g. errors)
22if(!$_REQUEST['debug']) ob_start();
23
24// run one of the jobs
25runIndexer() or metaUpdate() or runSitemapper();
26
27if(!$_REQUEST['debug']) ob_end_clean();
28exit;
29
30// --------------------------------------------------------------------
31
32/**
33 * Runs the indexer for the current page
34 *
35 * @author Andreas Gohr <andi@splitbrain.org>
36 */
37function runIndexer(){
38    global $conf;
39    print "runIndexer(): started".NL;
40
41    $ID = cleanID($_REQUEST['id']);
42    if(!$ID) return false;
43
44    // check if indexing needed
45    $last = @filemtime(metaFN($ID,'.indexed'));
46    if($last > @filemtime(wikiFN($ID))){
47        print "runIndexer(): index for $ID up to date".NL;
48        return false;
49    }
50
51    // try to aquire a lock
52    $lock = $conf['lockdir'].'/_indexer.lock';
53    while(!@mkdir($lock,$conf['dmode'])){
54        usleep(50);
55        if(time()-@filemtime($lock) > 60*5){
56            // looks like a stale lock - remove it
57            @rmdir($lock);
58            print "runIndexer(): stale lock removed".NL;
59        }else{
60            print "runIndexer(): indexer locked".NL;
61            return false;
62        }
63    }
64    if($conf['dperm']) chmod($lock, $conf['dperm']);
65
66    require_once(DOKU_INC.'inc/indexer.php');
67
68    // do the work
69    idx_addPage($ID);
70
71    // we're finished - save and free lock
72    io_saveFile(metaFN($ID,'.indexed'),' ');
73    @rmdir($lock);
74    print "runIndexer(): finished".NL;
75    return true;
76}
77
78/**
79 * Will render the metadata for the page if not exists yet
80 *
81 * This makes sure pages which are created from outside DokuWiki will
82 * gain their data when viewed for the first time.
83 */
84function metaUpdate(){
85    print "metaUpdate(): started".NL;
86
87    $ID = cleanID($_REQUEST['id']);
88    if(!$ID) return false;
89    $file = metaFN($ID, '.meta');
90    echo "meta file: $file".NL;
91
92    // rendering needed?
93    if (@file_exists($file)) return false;
94    if (!@file_exists(wikiFN($ID))) return false;
95
96    require_once(DOKU_INC.'inc/common.php');
97    require_once(DOKU_INC.'inc/parserutils.php');
98    global $conf;
99
100
101    // gather some additional info from changelog
102    $info = io_grep($conf['changelog'],
103                    '/^(\d+)\t(\d+\.\d+\.\d+\.\d+)\t'.preg_quote($ID,'/').'\t([^\t]+)\t([^\t\n]+)/',
104                    0,true);
105
106    $meta = array();
107    if(count($info)){
108        $meta['date']['created'] = $info[0][1];
109        foreach($info as $item){
110            if($item[4] != '*'){
111                $meta['date']['modified'] = $item[1];
112                if($item[3]){
113                    $meta['contributor'][$item[3]] = $item[3];
114                }
115            }
116        }
117    }
118
119    $meta = p_render_metadata($ID, $meta);
120    io_saveFile($file, serialize($meta));
121
122    echo "metaUpdate(): finished".NL;
123    return true;
124}
125
126/**
127 * Builds a Google Sitemap of all public pages known to the indexer
128 *
129 * The map is placed in the root directory named sitemap.xml.gz - This
130 * file needs to be writable!
131 *
132 * @author Andreas Gohr
133 * @link   https://www.google.com/webmasters/sitemaps/docs/en/about.html
134 */
135function runSitemapper(){
136    global $conf;
137    print "runSitemapper(): started".NL;
138    if(!$conf['sitemap']) return false;
139
140    if($conf['usegzip']){
141        $sitemap = 'sitemap.xml.gz';
142    }else{
143        $sitemap = 'sitemap.xml';
144    }
145    print "runSitemapper(): using $sitemap".NL;
146
147    if(!is_writable(DOKU_INC.$sitemap)) return false;
148    if(@filesize(DOKU_INC.$sitemap) &&
149       @filemtime(DOKU_INC.$sitemap) > (time()-($conf['sitemap']*60*60*24))){
150       print 'runSitemapper(): Sitemap up to date'.NL;
151       return false;
152    }
153
154    $pages = file($conf['cachedir'].'/page.idx');
155    print 'runSitemapper(): creating sitemap using '.count($pages).' pages'.NL;
156
157    // build the sitemap
158    ob_start();
159    print '<?xml version="1.0" encoding="UTF-8"?>'.NL;
160    print '<urlset xmlns="http://www.google.com/schemas/sitemap/0.84">'.NL;
161    foreach($pages as $id){
162        $id = trim($id);
163        $file = wikiFN($id);
164
165        //skip hidden, non existing and restricted files
166        if(isHiddenPage($id)) continue;
167        $date = @filemtime($file);
168        if(!$date) continue;
169        if(auth_aclcheck($id,'','') < AUTH_READ) continue;
170
171        print '  <url>'.NL;
172        print '    <loc>'.wl($id,'',true).'</loc>'.NL;
173        print '    <lastmod>'.date_iso8601($date).'</lastmod>'.NL;
174        print '  </url>'.NL;
175    }
176    print '</urlset>'.NL;
177    $data = ob_get_contents();
178    ob_end_clean();
179
180    //save the new sitemap
181    io_saveFile(DOKU_INC.$sitemap,$data);
182
183    print 'runSitemapper(): pinging google'.NL;
184    //ping google
185    $url  = 'http://www.google.com/webmasters/sitemaps/ping?sitemap=';
186    $url .= urlencode(DOKU_URL.$sitemap);
187    $http = new DokuHTTPClient();
188    $http->get($url);
189    if($http->error) print 'runSitemapper(): '.$http->error.NL;
190
191    print 'runSitemapper(): finished'.NL;
192    return true;
193}
194
195/**
196 * Formats a timestamp as ISO 8601 date
197 *
198 * @author <ungu at terong dot com>
199 * @link http://www.php.net/manual/en/function.date.php#54072
200 */
201function date_iso8601($int_date) {
202   //$int_date: current date in UNIX timestamp
203   $date_mod = date('Y-m-d\TH:i:s', $int_date);
204   $pre_timezone = date('O', $int_date);
205   $time_zone = substr($pre_timezone, 0, 3).":".substr($pre_timezone, 3, 2);
206   $date_mod .= $time_zone;
207   return $date_mod;
208}
209
210/**
211 * Just send a 1x1 pixel blank gif to the browser
212 *
213 * @author Andreas Gohr <andi@splitbrain.org>
214 * @author Harry Fuecks <fuecks@gmail.com>
215 */
216function sendGIF(){
217    if($_REQUEST['debug']){
218        header('Content-Type: text/plain');
219        return;
220    }
221    $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7');
222    header('Content-Type: image/gif');
223    header('Content-Length: '.strlen($img));
224    header('Connection: Close');
225    print $img;
226    flush();
227    // Browser should drop connection after this
228    // Thinks it's got the whole image
229}
230
231//Setup VIM: ex: et ts=4 enc=utf-8 :
232// No trailing PHP closing tag - no output please!
233// See Note at http://www.php.net/manual/en/language.basic-syntax.instruction-separation.php
234