19493c275SMichael Große<?php 29493c275SMichael Große 39493c275SMichael Großenamespace dokuwiki; 49493c275SMichael Große 5*8788dbbdSsplitbrainuse dokuwiki\Search\Exception\SearchException; 6e1d9dcc8SAndreas Gohruse dokuwiki\Extension\Event; 73df1553dSSatoshi Saharause dokuwiki\Logger; 84027a91aSSatoshi Saharause dokuwiki\Search\Indexer; 9432adb37SAndreas Gohruse dokuwiki\Sitemap\Mapper; 10704a815fSMichael Großeuse dokuwiki\Subscriptions\BulkSubscriptionSender; 111d11f1d3SSatoshi Saharause dokuwiki\ChangeLog\ChangeLog; 129493c275SMichael Große 133ad4c3cdSAndreas Gohr/** 143ad4c3cdSAndreas Gohr * Class TaskRunner 153ad4c3cdSAndreas Gohr * 163ad4c3cdSAndreas Gohr * Run an asynchronous task. 173ad4c3cdSAndreas Gohr */ 189493c275SMichael Großeclass TaskRunner 199493c275SMichael Große{ 203ad4c3cdSAndreas Gohr /** 213ad4c3cdSAndreas Gohr * Run the next task 223ad4c3cdSAndreas Gohr * 233ad4c3cdSAndreas Gohr * @todo refactor to remove dependencies on globals 243ad4c3cdSAndreas Gohr * @triggers INDEXER_TASKS_RUN 253ad4c3cdSAndreas Gohr */ 269493c275SMichael Große public function run() 279493c275SMichael Große { 283b58faf6SMichael Große global $INPUT, $conf, $ID; 293b58faf6SMichael Große 303b58faf6SMichael Große // keep running after browser closes connection 313b58faf6SMichael Große @ignore_user_abort(true); 323b58faf6SMichael Große 333b58faf6SMichael Große // check if user abort worked, if yes send output early 343b58faf6SMichael Große $defer = !@ignore_user_abort() || $conf['broken_iua']; 353b58faf6SMichael Große $output = $INPUT->has('debug') && $conf['allowdebug']; 363b58faf6SMichael Große if (!$defer && !$output) { 373b58faf6SMichael Große $this->sendGIF(); 383b58faf6SMichael Große } 393b58faf6SMichael Große 403b58faf6SMichael Große $ID = cleanID($INPUT->str('id')); 413b58faf6SMichael Große 423b58faf6SMichael Große // Catch any possible output (e.g. errors) 433b58faf6SMichael Große if (!$output) { 443b58faf6SMichael Große ob_start(); 453b58faf6SMichael Große } else { 463b58faf6SMichael Große header('Content-Type: text/plain'); 473b58faf6SMichael Große } 483b58faf6SMichael Große 499493c275SMichael Große // run one of the jobs 509493c275SMichael Große $tmp = []; // No event data 51e1d9dcc8SAndreas Gohr $evt = new Event('INDEXER_TASKS_RUN', $tmp); 529493c275SMichael Große if ($evt->advise_before()) { 537d34963bSAndreas Gohr if ( 547d34963bSAndreas Gohr !( 5524870174SAndreas Gohr $this->runIndexer() || 5624870174SAndreas Gohr $this->runSitemapper() || 5724870174SAndreas Gohr $this->sendDigest() || 5824870174SAndreas Gohr $this->runTrimRecentChanges() || 5924870174SAndreas Gohr $this->runTrimRecentChanges(true)) 6024870174SAndreas Gohr ) { 619493c275SMichael Große $evt->advise_after(); 629493c275SMichael Große } 6324870174SAndreas Gohr } 643b58faf6SMichael Große 653b58faf6SMichael Große if (!$output) { 663b58faf6SMichael Große ob_end_clean(); 673b58faf6SMichael Große if ($defer) { 683b58faf6SMichael Große $this->sendGIF(); 693b58faf6SMichael Große } 703b58faf6SMichael Große } 713b58faf6SMichael Große } 723b58faf6SMichael Große 733b58faf6SMichael Große /** 743b58faf6SMichael Große * Just send a 1x1 pixel blank gif to the browser 753b58faf6SMichael Große * 763b58faf6SMichael Große * @author Andreas Gohr <andi@splitbrain.org> 773b58faf6SMichael Große * @author Harry Fuecks <fuecks@gmail.com> 783b58faf6SMichael Große */ 793ad4c3cdSAndreas Gohr protected function sendGIF() 803ad4c3cdSAndreas Gohr { 813b58faf6SMichael Große $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7'); 823b58faf6SMichael Große header('Content-Type: image/gif'); 833b58faf6SMichael Große header('Content-Length: ' . strlen($img)); 843b58faf6SMichael Große header('Connection: Close'); 8526dfc232SAndreas Gohr echo $img; 863b58faf6SMichael Große tpl_flush(); 873b58faf6SMichael Große // Browser should drop connection after this 883b58faf6SMichael Große // Thinks it's got the whole image 899493c275SMichael Große } 909493c275SMichael Große 919493c275SMichael Große /** 929493c275SMichael Große * Trims the recent changes cache (or imports the old changelog) as needed. 939493c275SMichael Große * 949493c275SMichael Große * @param bool $media_changes If the media changelog shall be trimmed instead of 959493c275SMichael Große * the page changelog 96b5cf9c44SMichael Große * 979493c275SMichael Große * @return bool 9850d9e958SAndreas Gohr * @triggers TASK_RECENTCHANGES_TRIM 999493c275SMichael Große * @author Ben Coburn <btcoburn@silicodon.net> 1009493c275SMichael Große */ 101b5cf9c44SMichael Große protected function runTrimRecentChanges($media_changes = false) 102b5cf9c44SMichael Große { 1039493c275SMichael Große global $conf; 1049493c275SMichael Große 1059493c275SMichael Große echo "runTrimRecentChanges($media_changes): started" . NL; 1069493c275SMichael Große 1079493c275SMichael Große $fn = ($media_changes ? $conf['media_changelog'] : $conf['changelog']); 1089493c275SMichael Große 1099493c275SMichael Große // Trim the Recent Changes 1109493c275SMichael Große // Trims the recent changes cache to the last $conf['changes_days'] recent 1119493c275SMichael Große // changes or $conf['recent'] items, which ever is larger. 1129493c275SMichael Große // The trimming is only done once a day. 1137d34963bSAndreas Gohr if ( 1147d34963bSAndreas Gohr file_exists($fn) && 1159493c275SMichael Große (@filemtime($fn . '.trimmed') + 86400) < time() && 1167d34963bSAndreas Gohr !file_exists($fn . '_tmp') 1177d34963bSAndreas Gohr ) { 1189493c275SMichael Große @touch($fn . '.trimmed'); 1199493c275SMichael Große io_lock($fn); 1209493c275SMichael Große $lines = file($fn); 1219493c275SMichael Große if (count($lines) <= $conf['recent']) { 1229493c275SMichael Große // nothing to trim 1239493c275SMichael Große io_unlock($fn); 1249493c275SMichael Große echo "runTrimRecentChanges($media_changes): finished" . NL; 1259493c275SMichael Große return false; 1269493c275SMichael Große } 1279493c275SMichael Große 1289493c275SMichael Große io_saveFile($fn . '_tmp', ''); // presave tmp as 2nd lock 1299493c275SMichael Große $trim_time = time() - $conf['recent_days'] * 86400; 130b5cf9c44SMichael Große $out_lines = []; 131b5cf9c44SMichael Große $old_lines = []; 13224870174SAndreas Gohr $counter = count($lines); 13324870174SAndreas Gohr for ($i = 0; $i < $counter; $i++) { 1341d11f1d3SSatoshi Sahara $log = ChangeLog::parseLogLine($lines[$i]); 135b5cf9c44SMichael Große if ($log === false) { 136e24a74c0SAndreas Gohr continue; // discard junk 137e24a74c0SAndreas Gohr } 138e24a74c0SAndreas Gohr 1399493c275SMichael Große if ($log['date'] < $trim_time) { 140e24a74c0SAndreas Gohr // keep old lines for now (append .$i to prevent key collisions) 141e24a74c0SAndreas Gohr $old_lines[$log['date'] . ".$i"] = $lines[$i]; 1429493c275SMichael Große } else { 143e24a74c0SAndreas Gohr // definitely keep these lines 144e24a74c0SAndreas Gohr $out_lines[$log['date'] . ".$i"] = $lines[$i]; 1459493c275SMichael Große } 1469493c275SMichael Große } 1479493c275SMichael Große 14828a6ee9aSKlap-in if (count($lines) === count($out_lines)) { 1499493c275SMichael Große // nothing to trim 1509493c275SMichael Große @unlink($fn . '_tmp'); 1519493c275SMichael Große io_unlock($fn); 1529493c275SMichael Große echo "runTrimRecentChanges($media_changes): finished" . NL; 1539493c275SMichael Große return false; 1549493c275SMichael Große } 1559493c275SMichael Große 1569493c275SMichael Große // sort the final result, it shouldn't be necessary, 1579493c275SMichael Große // however the extra robustness in making the changelog cache self-correcting is worth it 1589493c275SMichael Große ksort($out_lines); 1599493c275SMichael Große $extra = $conf['recent'] - count($out_lines); // do we need extra lines do bring us up to minimum 1609493c275SMichael Große if ($extra > 0) { 1619493c275SMichael Große ksort($old_lines); 1629493c275SMichael Große $out_lines = array_merge(array_slice($old_lines, -$extra), $out_lines); 1639493c275SMichael Große } 1649493c275SMichael Große 165b413fb0bSMichael Große $eventData = [ 166eb787020SMichael Große 'isMedia' => $media_changes, 167b413fb0bSMichael Große 'trimmedChangelogLines' => $out_lines, 168b413fb0bSMichael Große 'removedChangelogLines' => $extra > 0 ? array_slice($old_lines, 0, -$extra) : $old_lines, 169b413fb0bSMichael Große ]; 170cbb44eabSAndreas Gohr Event::createAndTrigger('TASK_RECENTCHANGES_TRIM', $eventData); 171b413fb0bSMichael Große $out_lines = $eventData['trimmedChangelogLines']; 172b413fb0bSMichael Große 1739493c275SMichael Große // save trimmed changelog 1749493c275SMichael Große io_saveFile($fn . '_tmp', implode('', $out_lines)); 1759493c275SMichael Große @unlink($fn); 1769493c275SMichael Große if (!rename($fn . '_tmp', $fn)) { 1779493c275SMichael Große // rename failed so try another way... 1789493c275SMichael Große io_unlock($fn); 1799493c275SMichael Große io_saveFile($fn, implode('', $out_lines)); 1809493c275SMichael Große @unlink($fn . '_tmp'); 1819493c275SMichael Große } else { 1829493c275SMichael Große io_unlock($fn); 1839493c275SMichael Große } 1849493c275SMichael Große echo "runTrimRecentChanges($media_changes): finished" . NL; 1859493c275SMichael Große return true; 1869493c275SMichael Große } 1879493c275SMichael Große 1889493c275SMichael Große // nothing done 1899493c275SMichael Große echo "runTrimRecentChanges($media_changes): finished" . NL; 1909493c275SMichael Große return false; 1919493c275SMichael Große } 1929493c275SMichael Große 1939493c275SMichael Große 1949493c275SMichael Große /** 1959493c275SMichael Große * Runs the indexer for the current page 1969493c275SMichael Große * 1979493c275SMichael Große * @author Andreas Gohr <andi@splitbrain.org> 1989493c275SMichael Große */ 199b5cf9c44SMichael Große protected function runIndexer() 200b5cf9c44SMichael Große { 2019493c275SMichael Große global $ID; 20226dfc232SAndreas Gohr echo 'runIndexer(): started' . NL; 2039493c275SMichael Große 20473f05217SPhy if ((string) $ID === '') { 205b5cf9c44SMichael Große return false; 206b5cf9c44SMichael Große } 2079493c275SMichael Große 2089493c275SMichael Große // do the work 20915f699acSAndreas Gohr try { 210*8788dbbdSsplitbrain $indexer = (new Indexer())->setLogger(function ($msg) { 211*8788dbbdSsplitbrain echo $msg . NL; 212*8788dbbdSsplitbrain }); 21383b3acccSAndreas Gohr if (!page_exists($ID)) { 21483b3acccSAndreas Gohr $indexer->deletePage($ID, true); 21583b3acccSAndreas Gohr } else { 21683b3acccSAndreas Gohr $indexer->addPage($ID, true); 21783b3acccSAndreas Gohr } 21883b3acccSAndreas Gohr return true; 219*8788dbbdSsplitbrain } catch (SearchException $e) { 220*8788dbbdSsplitbrain $msg = $e::class . ' : ' . $e->getMessage(); 22172ebc99bSSatoshi Sahara echo $msg; 2223df1553dSSatoshi Sahara Logger::debug($msg); 22315f699acSAndreas Gohr return false; 22415f699acSAndreas Gohr } 2259493c275SMichael Große } 2269493c275SMichael Große 2279493c275SMichael Große /** 2289493c275SMichael Große * Builds a Google Sitemap of all public pages known to the indexer 2299493c275SMichael Große * 2309493c275SMichael Große * The map is placed in the root directory named sitemap.xml.gz - This 2319493c275SMichael Große * file needs to be writable! 2329493c275SMichael Große * 2339493c275SMichael Große * @author Andreas Gohr 2349493c275SMichael Große * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html 2359493c275SMichael Große */ 236b5cf9c44SMichael Große protected function runSitemapper() 237b5cf9c44SMichael Große { 23826dfc232SAndreas Gohr echo 'runSitemapper(): started' . NL; 239432adb37SAndreas Gohr $result = Mapper::generate() && Mapper::pingSearchEngines(); 24026dfc232SAndreas Gohr echo 'runSitemapper(): finished' . NL; 2419493c275SMichael Große return $result; 2429493c275SMichael Große } 2439493c275SMichael Große 2449493c275SMichael Große /** 2459493c275SMichael Große * Send digest and list mails for all subscriptions which are in effect for the 2469493c275SMichael Große * current page 2479493c275SMichael Große * 2489493c275SMichael Große * @author Adrian Lang <lang@cosmocode.de> 2499493c275SMichael Große */ 250b5cf9c44SMichael Große protected function sendDigest() 251b5cf9c44SMichael Große { 2529493c275SMichael Große global $ID; 2539493c275SMichael Große 2549493c275SMichael Große echo 'sendDigest(): started' . NL; 2559493c275SMichael Große if (!actionOK('subscribe')) { 2569493c275SMichael Große echo 'sendDigest(): disabled' . NL; 2579493c275SMichael Große return false; 2589493c275SMichael Große } 259704a815fSMichael Große $sub = new BulkSubscriptionSender(); 26075d66495SMichael Große $sent = $sub->sendBulk($ID); 2619493c275SMichael Große 2629493c275SMichael Große echo "sendDigest(): sent $sent mails" . NL; 2639493c275SMichael Große echo 'sendDigest(): finished' . NL; 2649493c275SMichael Große return (bool)$sent; 2659493c275SMichael Große } 2669493c275SMichael Große} 267