xref: /dokuwiki/lib/exe/indexer.php (revision 3daf9b20cab478b0c91d02f47cc3e0de195961ae)
1<?php
2/**
3 * DokuWiki indexer
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author     Andreas Gohr <andi@splitbrain.org>
7 */
8if(!defined('DOKU_INC')) define('DOKU_INC',dirname(__FILE__).'/../../');
9define('DOKU_DISABLE_GZIP_OUTPUT',1);
10require_once(DOKU_INC.'inc/init.php');
11session_write_close();  //close session
12if(!defined('NL')) define('NL',"\n");
13
14// keep running after browser closes connection
15@ignore_user_abort(true);
16
17// check if user abort worked, if yes send output early
18$defer = !@ignore_user_abort() || $conf['broken_iua'];
19$output = $INPUT->has('debug') && $conf['allowdebug'];
20if(!$defer && !$output){
21    sendGIF(); // send gif
22}
23
24$ID = cleanID($INPUT->str('id'));
25
26// Catch any possible output (e.g. errors)
27if(!$output) ob_start();
28else header('Content-Type: text/plain');
29
30// run one of the jobs
31$tmp = array(); // No event data
32$evt = new Doku_Event('INDEXER_TASKS_RUN', $tmp);
33if ($evt->advise_before()) {
34    runIndexer() or
35    runSitemapper() or
36    sendDigest() or
37    runTrimRecentChanges() or
38    runTrimRecentChanges(true) or
39    $evt->advise_after();
40}
41
42if(!$output) {
43    ob_end_clean();
44    if($defer) sendGIF();
45}
46
47exit;
48
49// --------------------------------------------------------------------
50
51/**
52 * Trims the recent changes cache (or imports the old changelog) as needed.
53 *
54 * @param media_changes If the media changelog shall be trimmed instead of
55 * the page changelog
56 *
57 * @author Ben Coburn <btcoburn@silicodon.net>
58 */
59function runTrimRecentChanges($media_changes = false) {
60    global $conf;
61
62    echo "runTrimRecentChanges($media_changes): started".NL;
63
64    $fn = ($media_changes ? $conf['media_changelog'] : $conf['changelog']);
65
66    // Trim the Recent Changes
67    // Trims the recent changes cache to the last $conf['changes_days'] recent
68    // changes or $conf['recent'] items, which ever is larger.
69    // The trimming is only done once a day.
70    if (@file_exists($fn) &&
71        (@filemtime($fn.'.trimmed')+86400)<time() &&
72        !@file_exists($fn.'_tmp')) {
73            @touch($fn.'.trimmed');
74            io_lock($fn);
75            $lines = file($fn);
76            if (count($lines)<=$conf['recent']) {
77                // nothing to trim
78                io_unlock($fn);
79                echo "runTrimRecentChanges($media_changes): finished".NL;
80                return false;
81            }
82
83            io_saveFile($fn.'_tmp', '');          // presave tmp as 2nd lock
84            $trim_time = time() - $conf['recent_days']*86400;
85            $out_lines = array();
86
87            for ($i=0; $i<count($lines); $i++) {
88                $log = parseChangelogLine($lines[$i]);
89                if ($log === false) continue;                      // discard junk
90                if ($log['date'] < $trim_time) {
91                    $old_lines[$log['date'].".$i"] = $lines[$i];     // keep old lines for now (append .$i to prevent key collisions)
92                } else {
93                    $out_lines[$log['date'].".$i"] = $lines[$i];     // definitely keep these lines
94                }
95            }
96
97            if (count($lines)==count($out_lines)) {
98              // nothing to trim
99              @unlink($fn.'_tmp');
100              io_unlock($fn);
101              echo "runTrimRecentChanges($media_changes): finished".NL;
102              return false;
103            }
104
105            // sort the final result, it shouldn't be necessary,
106            //   however the extra robustness in making the changelog cache self-correcting is worth it
107            ksort($out_lines);
108            $extra = $conf['recent'] - count($out_lines);        // do we need extra lines do bring us up to minimum
109            if ($extra > 0) {
110              ksort($old_lines);
111              $out_lines = array_merge(array_slice($old_lines,-$extra),$out_lines);
112            }
113
114            // save trimmed changelog
115            io_saveFile($fn.'_tmp', implode('', $out_lines));
116            @unlink($fn);
117            if (!rename($fn.'_tmp', $fn)) {
118                // rename failed so try another way...
119                io_unlock($fn);
120                io_saveFile($fn, implode('', $out_lines));
121                @unlink($fn.'_tmp');
122            } else {
123                io_unlock($fn);
124            }
125            echo "runTrimRecentChanges($media_changes): finished".NL;
126            return true;
127    }
128
129    // nothing done
130    echo "runTrimRecentChanges($media_changes): finished".NL;
131    return false;
132}
133
134/**
135 * Runs the indexer for the current page
136 *
137 * @author Andreas Gohr <andi@splitbrain.org>
138 */
139function runIndexer(){
140    global $ID;
141    global $conf;
142    print "runIndexer(): started".NL;
143
144    if(!$ID) return false;
145
146    // do the work
147    return idx_addPage($ID, true);
148}
149
150/**
151 * Builds a Google Sitemap of all public pages known to the indexer
152 *
153 * The map is placed in the root directory named sitemap.xml.gz - This
154 * file needs to be writable!
155 *
156 * @author Andreas Gohr
157 * @link   https://www.google.com/webmasters/sitemaps/docs/en/about.html
158 */
159function runSitemapper(){
160    print "runSitemapper(): started".NL;
161    $result = Sitemapper::generate() && Sitemapper::pingSearchEngines();
162    print 'runSitemapper(): finished'.NL;
163    return $result;
164}
165
166/**
167 * Send digest and list mails for all subscriptions which are in effect for the
168 * current page
169 *
170 * @author Adrian Lang <lang@cosmocode.de>
171 */
172function sendDigest() {
173    global $conf;
174    global $ID;
175
176    echo 'sendDigest(): started'.NL;
177    if(!actionOK('subscribe')) {
178        echo 'sendDigest(): disabled'.NL;
179        return false;
180    }
181    $sub = new Subscription();
182    $sent = $sub->send_bulk($ID);
183
184    echo "sendDigest(): sent $sent mails".NL;
185    echo 'sendDigest(): finished'.NL;
186    return (bool) $sent;
187}
188
189/**
190 * Just send a 1x1 pixel blank gif to the browser
191 *
192 * @author Andreas Gohr <andi@splitbrain.org>
193 * @author Harry Fuecks <fuecks@gmail.com>
194 */
195function sendGIF(){
196    $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7');
197    header('Content-Type: image/gif');
198    header('Content-Length: '.strlen($img));
199    header('Connection: Close');
200    print $img;
201    flush();
202    // Browser should drop connection after this
203    // Thinks it's got the whole image
204}
205
206//Setup VIM: ex: et ts=4 :
207// No trailing PHP closing tag - no output please!
208// See Note at http://www.php.net/manual/en/language.basic-syntax.instruction-separation.php
209