xref: /dokuwiki/lib/exe/indexer.php (revision 3a97d936870170491bdd7d03d71143143b10191d)
1<?php
2/**
3 * DokuWiki indexer
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author     Andreas Gohr <andi@splitbrain.org>
7 */
8if(!defined('DOKU_INC')) define('DOKU_INC',dirname(__FILE__).'/../../');
9define('DOKU_DISABLE_GZIP_OUTPUT',1);
10require_once(DOKU_INC.'inc/init.php');
11session_write_close();  //close session
12if(!defined('NL')) define('NL',"\n");
13
14// keep running after browser closes connection
15@ignore_user_abort(true);
16
17// check if user abort worked, if yes send output early
18$defer = !@ignore_user_abort() || $conf['broken_iua'];
19$output = $INPUT->has('debug') && $conf['allowdebug'];
20if(!$defer && !$output){
21    sendGIF(); // send gif
22}
23
24$ID = cleanID($INPUT->str('id'));
25
26// Catch any possible output (e.g. errors)
27if(!$output) ob_start();
28else header('Content-Type: text/plain');
29
30// run one of the jobs
31$tmp = array(); // No event data
32$evt = new Doku_Event('INDEXER_TASKS_RUN', $tmp);
33if ($evt->advise_before()) {
34    runIndexer() or
35    runSitemapper() or
36    sendDigest() or
37    runTrimRecentChanges() or
38    runTrimRecentChanges(true) or
39    $evt->advise_after();
40}
41
42if(!$output) {
43    ob_end_clean();
44    if($defer) sendGIF();
45}
46
47exit;
48
49// --------------------------------------------------------------------
50
51/**
52 * Trims the recent changes cache (or imports the old changelog) as needed.
53 *
54 * @param bool $media_changes If the media changelog shall be trimmed instead of
55 *                              the page changelog
56 * @return bool
57 *
58 * @author Ben Coburn <btcoburn@silicodon.net>
59 */
60function runTrimRecentChanges($media_changes = false) {
61    global $conf;
62
63    echo "runTrimRecentChanges($media_changes): started".NL;
64
65    $fn = ($media_changes ? $conf['media_changelog'] : $conf['changelog']);
66
67    // Trim the Recent Changes
68    // Trims the recent changes cache to the last $conf['changes_days'] recent
69    // changes or $conf['recent'] items, which ever is larger.
70    // The trimming is only done once a day.
71    if (file_exists($fn) &&
72        (@filemtime($fn.'.trimmed')+86400)<time() &&
73        !file_exists($fn.'_tmp')) {
74            @touch($fn.'.trimmed');
75            io_lock($fn);
76            $lines = file($fn);
77            if (count($lines)<=$conf['recent']) {
78                // nothing to trim
79                io_unlock($fn);
80                echo "runTrimRecentChanges($media_changes): finished".NL;
81                return false;
82            }
83
84            io_saveFile($fn.'_tmp', '');          // presave tmp as 2nd lock
85            $trim_time = time() - $conf['recent_days']*86400;
86            $out_lines = array();
87            $old_lines = array();
88            for ($i=0; $i<count($lines); $i++) {
89                $log = parseChangelogLine($lines[$i]);
90                if ($log === false) continue;                      // discard junk
91                if ($log['date'] < $trim_time) {
92                    // keep old lines for now (append .$i to prevent key collisions)
93                    $old_lines[$log['date'].".$i"] = $lines[$i];
94                } else {
95                    // definitely keep these lines
96                    $out_lines[$log['date'].".$i"] = $lines[$i];
97                }
98            }
99
100            if (count($lines)==count($out_lines)) {
101              // nothing to trim
102              @unlink($fn.'_tmp');
103              io_unlock($fn);
104              echo "runTrimRecentChanges($media_changes): finished".NL;
105              return false;
106            }
107
108            // sort the final result, it shouldn't be necessary,
109            //   however the extra robustness in making the changelog cache self-correcting is worth it
110            ksort($out_lines);
111            $extra = $conf['recent'] - count($out_lines);        // do we need extra lines do bring us up to minimum
112            if ($extra > 0) {
113              ksort($old_lines);
114              $out_lines = array_merge(array_slice($old_lines,-$extra),$out_lines);
115            }
116
117            // save trimmed changelog
118            io_saveFile($fn.'_tmp', implode('', $out_lines));
119            @unlink($fn);
120            if (!rename($fn.'_tmp', $fn)) {
121                // rename failed so try another way...
122                io_unlock($fn);
123                io_saveFile($fn, implode('', $out_lines));
124                @unlink($fn.'_tmp');
125            } else {
126                io_unlock($fn);
127            }
128            echo "runTrimRecentChanges($media_changes): finished".NL;
129            return true;
130    }
131
132    // nothing done
133    echo "runTrimRecentChanges($media_changes): finished".NL;
134    return false;
135}
136
137/**
138 * Runs the indexer for the current page
139 *
140 * @author Andreas Gohr <andi@splitbrain.org>
141 */
142function runIndexer(){
143    global $ID;
144    global $conf;
145    print "runIndexer(): started".NL;
146
147    if(!$ID) return false;
148
149    // do the work
150    return idx_addPage($ID, true);
151}
152
153/**
154 * Builds a Google Sitemap of all public pages known to the indexer
155 *
156 * The map is placed in the root directory named sitemap.xml.gz - This
157 * file needs to be writable!
158 *
159 * @author Andreas Gohr
160 * @link   https://www.google.com/webmasters/sitemaps/docs/en/about.html
161 */
162function runSitemapper(){
163    print "runSitemapper(): started".NL;
164    $result = Sitemapper::generate() && Sitemapper::pingSearchEngines();
165    print 'runSitemapper(): finished'.NL;
166    return $result;
167}
168
169/**
170 * Send digest and list mails for all subscriptions which are in effect for the
171 * current page
172 *
173 * @author Adrian Lang <lang@cosmocode.de>
174 */
175function sendDigest() {
176    global $conf;
177    global $ID;
178
179    echo 'sendDigest(): started'.NL;
180    if(!actionOK('subscribe')) {
181        echo 'sendDigest(): disabled'.NL;
182        return false;
183    }
184    $sub = new Subscription();
185    $sent = $sub->send_bulk($ID);
186
187    echo "sendDigest(): sent $sent mails".NL;
188    echo 'sendDigest(): finished'.NL;
189    return (bool) $sent;
190}
191
192/**
193 * Just send a 1x1 pixel blank gif to the browser
194 *
195 * @author Andreas Gohr <andi@splitbrain.org>
196 * @author Harry Fuecks <fuecks@gmail.com>
197 */
198function sendGIF(){
199    $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7');
200    header('Content-Type: image/gif');
201    header('Content-Length: '.strlen($img));
202    header('Connection: Close');
203    print $img;
204    tpl_flush();
205    // Browser should drop connection after this
206    // Thinks it's got the whole image
207}
208
209//Setup VIM: ex: et ts=4 :
210// No trailing PHP closing tag - no output please!
211// See Note at http://php.net/manual/en/language.basic-syntax.instruction-separation.php
212