xref: /dokuwiki/inc/TaskRunner.php (revision 2cda016644e923dbda996c52bedee2113ba6d653)
1<?php
2
3namespace dokuwiki;
4
5use dokuwiki\Search\Exception\SearchException;
6use dokuwiki\Extension\Event;
7use dokuwiki\Logger;
8use dokuwiki\Search\Indexer;
9use dokuwiki\Sitemap\Mapper;
10use dokuwiki\Subscriptions\BulkSubscriptionSender;
11use dokuwiki\ChangeLog\ChangeLog;
12
13/**
14 * Class TaskRunner
15 *
16 * Run an asynchronous task.
17 */
18class TaskRunner
19{
20    /**
21     * Run the next task
22     *
23     * @todo refactor to remove dependencies on globals
24     * @triggers INDEXER_TASKS_RUN
25     */
26    public function run()
27    {
28        global $INPUT, $conf, $ID;
29
30        // keep running after browser closes connection
31        @ignore_user_abort(true);
32
33        // check if user abort worked, if yes send output early
34        $defer = !@ignore_user_abort() || $conf['broken_iua'];
35        $output = $INPUT->has('debug') && $conf['allowdebug'];
36        if (!$defer && !$output) {
37            $this->sendGIF();
38        }
39
40        $ID = cleanID($INPUT->str('id'));
41
42        // Catch any possible output (e.g. errors)
43        if (!$output) {
44            ob_start();
45        } else {
46            header('Content-Type: text/plain');
47        }
48
49        // run one of the jobs
50        $tmp = []; // No event data
51        $evt = new Event('INDEXER_TASKS_RUN', $tmp);
52        if ($evt->advise_before()) {
53            if (
54                !(
55                $this->runIndexer() ||
56                $this->runSitemapper() ||
57                $this->sendDigest() ||
58                $this->runTrimRecentChanges() ||
59                $this->runTrimRecentChanges(true))
60            ) {
61                $evt->advise_after();
62            }
63        }
64
65        if (!$output) {
66            ob_end_clean();
67            if ($defer) {
68                $this->sendGIF();
69            }
70        }
71    }
72
73    /**
74     * Just send a 1x1 pixel blank gif to the browser
75     *
76     * @author Andreas Gohr <andi@splitbrain.org>
77     * @author Harry Fuecks <fuecks@gmail.com>
78     */
79    protected function sendGIF()
80    {
81        $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7');
82        header('Content-Type: image/gif');
83        header('Content-Length: ' . strlen($img));
84        header('Connection: Close');
85        echo $img;
86        tpl_flush();
87        // Browser should drop connection after this
88        // Thinks it's got the whole image
89    }
90
91    /**
92     * Trims the recent changes cache (or imports the old changelog) as needed.
93     *
94     * @param bool $media_changes   If the media changelog shall be trimmed instead of
95     *                              the page changelog
96     *
97     * @return bool
98     * @triggers TASK_RECENTCHANGES_TRIM
99     * @author Ben Coburn <btcoburn@silicodon.net>
100     */
101    protected function runTrimRecentChanges($media_changes = false)
102    {
103        global $conf;
104
105        echo "runTrimRecentChanges($media_changes): started" . NL;
106
107        $fn = ($media_changes ? $conf['media_changelog'] : $conf['changelog']);
108
109        // Trim the Recent Changes
110        // Trims the recent changes cache to the last $conf['changes_days'] recent
111        // changes or $conf['recent'] items, which ever is larger.
112        // The trimming is only done once a day.
113        if (
114            file_exists($fn) &&
115            (@filemtime($fn . '.trimmed') + 86400) < time() &&
116            !file_exists($fn . '_tmp')
117        ) {
118            @touch($fn . '.trimmed');
119            io_lock($fn);
120            $lines = file($fn);
121            if (count($lines) <= $conf['recent']) {
122                // nothing to trim
123                io_unlock($fn);
124                echo "runTrimRecentChanges($media_changes): finished" . NL;
125                return false;
126            }
127
128            io_saveFile($fn . '_tmp', '');          // presave tmp as 2nd lock
129            $trim_time = time() - $conf['recent_days'] * 86400;
130            $out_lines = [];
131            $old_lines = [];
132            $counter = count($lines);
133            for ($i = 0; $i < $counter; $i++) {
134                $log = ChangeLog::parseLogLine($lines[$i]);
135                if ($log === false) {
136                    continue; // discard junk
137                }
138
139                if ($log['date'] < $trim_time) {
140                    // keep old lines for now (append .$i to prevent key collisions)
141                    $old_lines[$log['date'] . ".$i"] = $lines[$i];
142                } else {
143                    // definitely keep these lines
144                    $out_lines[$log['date'] . ".$i"] = $lines[$i];
145                }
146            }
147
148            if (count($lines) === count($out_lines)) {
149                // nothing to trim
150                @unlink($fn . '_tmp');
151                io_unlock($fn);
152                echo "runTrimRecentChanges($media_changes): finished" . NL;
153                return false;
154            }
155
156            // sort the final result, it shouldn't be necessary,
157            //   however the extra robustness in making the changelog cache self-correcting is worth it
158            ksort($out_lines);
159            $extra = $conf['recent'] - count($out_lines);        // do we need extra lines do bring us up to minimum
160            if ($extra > 0) {
161                ksort($old_lines);
162                $out_lines = array_merge(array_slice($old_lines, -$extra), $out_lines);
163            }
164
165            $eventData = [
166                'isMedia' => $media_changes,
167                'trimmedChangelogLines' => $out_lines,
168                'removedChangelogLines' => $extra > 0 ? array_slice($old_lines, 0, -$extra) : $old_lines,
169            ];
170            Event::createAndTrigger('TASK_RECENTCHANGES_TRIM', $eventData);
171            $out_lines = $eventData['trimmedChangelogLines'];
172
173            // save trimmed changelog
174            io_saveFile($fn . '_tmp', implode('', $out_lines));
175            @unlink($fn);
176            if (!rename($fn . '_tmp', $fn)) {
177                // rename failed so try another way...
178                io_unlock($fn);
179                io_saveFile($fn, implode('', $out_lines));
180                @unlink($fn . '_tmp');
181            } else {
182                io_unlock($fn);
183            }
184            echo "runTrimRecentChanges($media_changes): finished" . NL;
185            return true;
186        }
187
188        // nothing done
189        echo "runTrimRecentChanges($media_changes): finished" . NL;
190        return false;
191    }
192
193
194    /**
195     * Runs the indexer for the current page
196     *
197     * @author Andreas Gohr <andi@splitbrain.org>
198     */
199    protected function runIndexer()
200    {
201        global $ID;
202        echo 'runIndexer(): started' . NL;
203
204        if ((string) $ID === '') {
205            return false;
206        }
207
208        // do the work
209        try {
210            $indexer = (new Indexer())->setLogger(function ($msg) {
211                echo $msg . NL;
212            });
213            if (!page_exists($ID)) {
214                return $indexer->deletePage($ID);
215            } else {
216                return $indexer->addPage($ID);
217            }
218        } catch (SearchException $e) {
219            $msg = $e::class . ' : ' . $e->getMessage();
220            echo $msg;
221            Logger::debug($msg);
222            return false;
223        }
224    }
225
226    /**
227     * Builds a Google Sitemap of all public pages known to the indexer
228     *
229     * The map is placed in the root directory named sitemap.xml.gz - This
230     * file needs to be writable!
231     *
232     * @author Andreas Gohr
233     * @link   https://www.google.com/webmasters/sitemaps/docs/en/about.html
234     */
235    protected function runSitemapper()
236    {
237        echo 'runSitemapper(): started' . NL;
238        $result = Mapper::generate() && Mapper::pingSearchEngines();
239        echo 'runSitemapper(): finished' . NL;
240        return $result;
241    }
242
243    /**
244     * Send digest and list mails for all subscriptions which are in effect for the
245     * current page
246     *
247     * @author Adrian Lang <lang@cosmocode.de>
248     */
249    protected function sendDigest()
250    {
251        global $ID;
252
253        echo 'sendDigest(): started' . NL;
254        if (!actionOK('subscribe')) {
255            echo 'sendDigest(): disabled' . NL;
256            return false;
257        }
258        $sub = new BulkSubscriptionSender();
259        $sent = $sub->sendBulk($ID);
260
261        echo "sendDigest(): sent $sent mails" . NL;
262        echo 'sendDigest(): finished' . NL;
263        return (bool)$sent;
264    }
265}
266