xref: /dokuwiki/lib/exe/indexer.php (revision 73d7249768ff5d989255df170bb05518a643977a)
1<?php
2/**
3 * DokuWiki indexer
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author     Andreas Gohr <andi@splitbrain.org>
7 */
8if(!defined('DOKU_INC')) define('DOKU_INC',dirname(__FILE__).'/../../');
9define('DOKU_DISABLE_GZIP_OUTPUT',1);
10require_once(DOKU_INC.'inc/init.php');
11session_write_close();  //close session
12if(!defined('NL')) define('NL',"\n");
13
14// keep running after browser closes connection
15@ignore_user_abort(true);
16
17// check if user abort worked, if yes send output early
18$defer = !@ignore_user_abort() || $conf['broken_iua'];
19$output = $INPUT->has('debug') && $conf['allowdebug'];
20if(!$defer && !$output){
21    sendGIF(); // send gif
22}
23
24$ID = cleanID($INPUT->str('id'));
25
26// Catch any possible output (e.g. errors)
27if(!$output) ob_start();
28else header('Content-Type: text/plain');
29
30// run one of the jobs
31$tmp = array(); // No event data
32$evt = new Doku_Event('INDEXER_TASKS_RUN', $tmp);
33if ($evt->advise_before()) {
34    runIndexer() or
35    runSitemapper() or
36    sendDigest() or
37    runTrimRecentChanges() or
38    runTrimRecentChanges(true) or
39    $evt->advise_after();
40}
41
42if(!$output) {
43    ob_end_clean();
44    if($defer) sendGIF();
45}
46
47exit;
48
49// --------------------------------------------------------------------
50
51/**
52 * Trims the recent changes cache (or imports the old changelog) as needed.
53 *
54 * @param bool $media_changes If the media changelog shall be trimmed instead of
55 *                              the page changelog
56 * @return bool
57 *
58 * @author Ben Coburn <btcoburn@silicodon.net>
59 */
60function runTrimRecentChanges($media_changes = false) {
61    global $conf;
62
63    echo "runTrimRecentChanges($media_changes): started".NL;
64
65    $fn = ($media_changes ? $conf['media_changelog'] : $conf['changelog']);
66
67    // Trim the Recent Changes
68    // Trims the recent changes cache to the last $conf['changes_days'] recent
69    // changes or $conf['recent'] items, which ever is larger.
70    // The trimming is only done once a day.
71    if (file_exists($fn) &&
72        (@filemtime($fn.'.trimmed')+86400)<time() &&
73        !file_exists($fn.'_tmp')) {
74            @touch($fn.'.trimmed');
75            io_lock($fn);
76            $lines = file($fn);
77            if (count($lines)<=$conf['recent']) {
78                // nothing to trim
79                io_unlock($fn);
80                echo "runTrimRecentChanges($media_changes): finished".NL;
81                return false;
82            }
83
84            io_saveFile($fn.'_tmp', '');          // presave tmp as 2nd lock
85            $trim_time = time() - $conf['recent_days']*86400;
86            $out_lines = array();
87            $old_lines = array();
88            for ($i=0; $i<count($lines); $i++) {
89                $log = parseChangelogLine($lines[$i]);
90                if ($log === false) continue;                      // discard junk
91                if ($log['date'] < $trim_time) {
92                    $old_lines[$log['date'].".$i"] = $lines[$i];     // keep old lines for now (append .$i to prevent key collisions)
93                } else {
94                    $out_lines[$log['date'].".$i"] = $lines[$i];     // definitely keep these lines
95                }
96            }
97
98            if (count($lines)==count($out_lines)) {
99              // nothing to trim
100              @unlink($fn.'_tmp');
101              io_unlock($fn);
102              echo "runTrimRecentChanges($media_changes): finished".NL;
103              return false;
104            }
105
106            // sort the final result, it shouldn't be necessary,
107            //   however the extra robustness in making the changelog cache self-correcting is worth it
108            ksort($out_lines);
109            $extra = $conf['recent'] - count($out_lines);        // do we need extra lines do bring us up to minimum
110            if ($extra > 0) {
111              ksort($old_lines);
112              $out_lines = array_merge(array_slice($old_lines,-$extra),$out_lines);
113            }
114
115            // save trimmed changelog
116            io_saveFile($fn.'_tmp', implode('', $out_lines));
117            @unlink($fn);
118            if (!rename($fn.'_tmp', $fn)) {
119                // rename failed so try another way...
120                io_unlock($fn);
121                io_saveFile($fn, implode('', $out_lines));
122                @unlink($fn.'_tmp');
123            } else {
124                io_unlock($fn);
125            }
126            echo "runTrimRecentChanges($media_changes): finished".NL;
127            return true;
128    }
129
130    // nothing done
131    echo "runTrimRecentChanges($media_changes): finished".NL;
132    return false;
133}
134
135/**
136 * Runs the indexer for the current page
137 *
138 * @author Andreas Gohr <andi@splitbrain.org>
139 */
140function runIndexer(){
141    global $ID;
142    global $conf;
143    print "runIndexer(): started".NL;
144
145    if(!$ID) return false;
146
147    // do the work
148    return idx_addPage($ID, true);
149}
150
151/**
152 * Builds a Google Sitemap of all public pages known to the indexer
153 *
154 * The map is placed in the root directory named sitemap.xml.gz - This
155 * file needs to be writable!
156 *
157 * @author Andreas Gohr
158 * @link   https://www.google.com/webmasters/sitemaps/docs/en/about.html
159 */
160function runSitemapper(){
161    print "runSitemapper(): started".NL;
162    $result = Sitemapper::generate() && Sitemapper::pingSearchEngines();
163    print 'runSitemapper(): finished'.NL;
164    return $result;
165}
166
167/**
168 * Send digest and list mails for all subscriptions which are in effect for the
169 * current page
170 *
171 * @author Adrian Lang <lang@cosmocode.de>
172 */
173function sendDigest() {
174    global $conf;
175    global $ID;
176
177    echo 'sendDigest(): started'.NL;
178    if(!actionOK('subscribe')) {
179        echo 'sendDigest(): disabled'.NL;
180        return false;
181    }
182    $sub = new Subscription();
183    $sent = $sub->send_bulk($ID);
184
185    echo "sendDigest(): sent $sent mails".NL;
186    echo 'sendDigest(): finished'.NL;
187    return (bool) $sent;
188}
189
190/**
191 * Just send a 1x1 pixel blank gif to the browser
192 *
193 * @author Andreas Gohr <andi@splitbrain.org>
194 * @author Harry Fuecks <fuecks@gmail.com>
195 */
196function sendGIF(){
197    $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7');
198    header('Content-Type: image/gif');
199    header('Content-Length: '.strlen($img));
200    header('Connection: Close');
201    print $img;
202    tpl_flush();
203    // Browser should drop connection after this
204    // Thinks it's got the whole image
205}
206
207//Setup VIM: ex: et ts=4 :
208// No trailing PHP closing tag - no output please!
209// See Note at http://php.net/manual/en/language.basic-syntax.instruction-separation.php
210