xref: /dokuwiki/lib/exe/indexer.php (revision b4f284ac58f1be8e0efde3ef86ba0fcb88b49bb4)
1<?php
2/**
3 * DokuWiki indexer
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author     Andreas Gohr <andi@splitbrain.org>
7 */
8if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../../').'/');
9require_once(DOKU_INC.'inc/init.php');
10require_once(DOKU_INC.'inc/auth.php');
11require_once(DOKU_INC.'inc/events.php');
12session_write_close();  //close session
13if(!defined('NL')) define('NL',"\n");
14
15// keep running after browser closes connection
16@ignore_user_abort(true);
17
18// check if user abort worked, if yes send output early
19if(@ignore_user_abort()){
20    sendGIF(); // send gif
21    $defer = false;
22}else{
23    $defer = true;
24}
25
26// Catch any possible output (e.g. errors)
27if(!$_REQUEST['debug']) ob_start();
28
29// run one of the jobs
30runIndexer() or metaUpdate() or runSitemapper();
31if($defer) sendGIF();
32
33if(!$_REQUEST['debug']) ob_end_clean();
34exit;
35
36// --------------------------------------------------------------------
37
38/**
39 * Runs the indexer for the current page
40 *
41 * @author Andreas Gohr <andi@splitbrain.org>
42 */
43function runIndexer(){
44    global $conf;
45    print "runIndexer(): started".NL;
46
47    $ID = cleanID($_REQUEST['id']);
48    if(!$ID) return false;
49
50    // check if indexing needed
51    $last = @filemtime(metaFN($ID,'.indexed'));
52    if($last > @filemtime(wikiFN($ID))){
53        print "runIndexer(): index for $ID up to date".NL;
54        return false;
55    }
56
57    // try to aquire a lock
58    $lock = $conf['lockdir'].'/_indexer.lock';
59    while(!@mkdir($lock,$conf['dmode'])){
60        usleep(50);
61        if(time()-@filemtime($lock) > 60*5){
62            // looks like a stale lock - remove it
63            @rmdir($lock);
64            print "runIndexer(): stale lock removed".NL;
65        }else{
66            print "runIndexer(): indexer locked".NL;
67            return false;
68        }
69    }
70    if($conf['dperm']) chmod($lock, $conf['dperm']);
71
72    require_once(DOKU_INC.'inc/indexer.php');
73
74    // do the work
75    idx_addPage($ID);
76
77    // we're finished - save and free lock
78    io_saveFile(metaFN($ID,'.indexed'),' ');
79    @rmdir($lock);
80    print "runIndexer(): finished".NL;
81    return true;
82}
83
84/**
85 * Will render the metadata for the page if not exists yet
86 *
87 * This makes sure pages which are created from outside DokuWiki will
88 * gain their data when viewed for the first time.
89 */
90function metaUpdate(){
91    print "metaUpdate(): started".NL;
92
93    $ID = cleanID($_REQUEST['id']);
94    if(!$ID) return false;
95    $file = metaFN($ID, '.meta');
96    echo "meta file: $file".NL;
97
98    // rendering needed?
99    if (@file_exists($file)) return false;
100    if (!@file_exists(wikiFN($ID))) return false;
101
102    require_once(DOKU_INC.'inc/common.php');
103    require_once(DOKU_INC.'inc/parserutils.php');
104    global $conf;
105
106
107    // gather some additional info from changelog
108    $info = io_grep($conf['changelog'],
109                    '/^(\d+)\t(\d+\.\d+\.\d+\.\d+)\t'.preg_quote($ID,'/').'\t([^\t]+)\t([^\t\n]+)/',
110                    0,true);
111
112    $meta = array();
113    if(count($info)){
114        $meta['date']['created'] = $info[0][1];
115        foreach($info as $item){
116            if($item[4] != '*'){
117                $meta['date']['modified'] = $item[1];
118                if($item[3]){
119                    $meta['contributor'][$item[3]] = $item[3];
120                }
121            }
122        }
123    }
124
125    $meta = p_render_metadata($ID, $meta);
126    io_saveFile($file, serialize($meta));
127
128    echo "metaUpdate(): finished".NL;
129    return true;
130}
131
132/**
133 * Builds a Google Sitemap of all public pages known to the indexer
134 *
135 * The map is placed in the root directory named sitemap.xml.gz - This
136 * file needs to be writable!
137 *
138 * @author Andreas Gohr
139 * @link   https://www.google.com/webmasters/sitemaps/docs/en/about.html
140 */
141function runSitemapper(){
142    global $conf;
143    print "runSitemapper(): started".NL;
144    if(!$conf['sitemap']) return false;
145
146    if($conf['usegzip']){
147        $sitemap = 'sitemap.xml.gz';
148    }else{
149        $sitemap = 'sitemap.xml';
150    }
151    print "runSitemapper(): using $sitemap".NL;
152
153    if(!is_writable(DOKU_INC.$sitemap)) return false;
154    if(@filesize(DOKU_INC.$sitemap) &&
155       @filemtime(DOKU_INC.$sitemap) > (time()-($conf['sitemap']*60*60*24))){
156       print 'runSitemapper(): Sitemap up to date'.NL;
157       return false;
158    }
159
160    $pages = file($conf['cachedir'].'/page.idx');
161    print 'runSitemapper(): creating sitemap using '.count($pages).' pages'.NL;
162
163    // build the sitemap
164    ob_start();
165    print '<?xml version="1.0" encoding="UTF-8"?>'.NL;
166    print '<urlset xmlns="http://www.google.com/schemas/sitemap/0.84">'.NL;
167    foreach($pages as $id){
168        $id = trim($id);
169        $file = wikiFN($id);
170
171        //skip hidden, non existing and restricted files
172        if(isHiddenPage($id)) continue;
173        $date = @filemtime($file);
174        if(!$date) continue;
175        if(auth_aclcheck($id,'','') < AUTH_READ) continue;
176
177        print '  <url>'.NL;
178        print '    <loc>'.wl($id,'',true).'</loc>'.NL;
179        print '    <lastmod>'.date_iso8601($date).'</lastmod>'.NL;
180        print '  </url>'.NL;
181    }
182    print '</urlset>'.NL;
183    $data = ob_get_contents();
184    ob_end_clean();
185
186    //save the new sitemap
187    io_saveFile(DOKU_INC.$sitemap,$data);
188
189    print 'runSitemapper(): pinging google'.NL;
190    //ping google
191    $url  = 'http://www.google.com/webmasters/sitemaps/ping?sitemap=';
192    $url .= urlencode(DOKU_URL.$sitemap);
193    $http = new DokuHTTPClient();
194    $http->get($url);
195    if($http->error) print 'runSitemapper(): '.$http->error.NL;
196
197    print 'runSitemapper(): finished'.NL;
198    return true;
199}
200
201/**
202 * Formats a timestamp as ISO 8601 date
203 *
204 * @author <ungu at terong dot com>
205 * @link http://www.php.net/manual/en/function.date.php#54072
206 */
207function date_iso8601($int_date) {
208   //$int_date: current date in UNIX timestamp
209   $date_mod = date('Y-m-d\TH:i:s', $int_date);
210   $pre_timezone = date('O', $int_date);
211   $time_zone = substr($pre_timezone, 0, 3).":".substr($pre_timezone, 3, 2);
212   $date_mod .= $time_zone;
213   return $date_mod;
214}
215
216/**
217 * Just send a 1x1 pixel blank gif to the browser
218 *
219 * @author Andreas Gohr <andi@splitbrain.org>
220 * @author Harry Fuecks <fuecks@gmail.com>
221 */
222function sendGIF(){
223    if($_REQUEST['debug']){
224        header('Content-Type: text/plain');
225        return;
226    }
227    $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7');
228    header('Content-Type: image/gif');
229    header('Content-Length: '.strlen($img));
230    header('Connection: Close');
231    print $img;
232    flush();
233    // Browser should drop connection after this
234    // Thinks it's got the whole image
235}
236
237//Setup VIM: ex: et ts=4 enc=utf-8 :
238// No trailing PHP closing tag - no output please!
239// See Note at http://www.php.net/manual/en/language.basic-syntax.instruction-separation.php
240