1<?php 2 3namespace dokuwiki; 4 5use dokuwiki\Search\Exception\SearchException; 6use dokuwiki\Extension\Event; 7use dokuwiki\Logger; 8use dokuwiki\Search\Indexer; 9use dokuwiki\Sitemap\Mapper; 10use dokuwiki\Subscriptions\BulkSubscriptionSender; 11use dokuwiki\ChangeLog\ChangeLog; 12 13/** 14 * Class TaskRunner 15 * 16 * Run an asynchronous task. 17 */ 18class TaskRunner 19{ 20 /** 21 * Run the next task 22 * 23 * @todo refactor to remove dependencies on globals 24 * @triggers INDEXER_TASKS_RUN 25 */ 26 public function run() 27 { 28 global $INPUT, $conf, $ID; 29 30 // keep running after browser closes connection 31 @ignore_user_abort(true); 32 33 // check if user abort worked, if yes send output early 34 $defer = !@ignore_user_abort() || $conf['broken_iua']; 35 $output = $INPUT->has('debug') && $conf['allowdebug']; 36 if (!$defer && !$output) { 37 $this->sendGIF(); 38 } 39 40 $ID = cleanID($INPUT->str('id')); 41 42 // Catch any possible output (e.g. errors) 43 if (!$output) { 44 ob_start(); 45 } else { 46 header('Content-Type: text/plain'); 47 } 48 49 // run one of the jobs 50 $tmp = []; // No event data 51 $evt = new Event('INDEXER_TASKS_RUN', $tmp); 52 if ($evt->advise_before()) { 53 if ( 54 !( 55 $this->runIndexer() || 56 $this->runSitemapper() || 57 $this->sendDigest() || 58 $this->runTrimRecentChanges() || 59 $this->runTrimRecentChanges(true)) 60 ) { 61 $evt->advise_after(); 62 } 63 } 64 65 if (!$output) { 66 ob_end_clean(); 67 if ($defer) { 68 $this->sendGIF(); 69 } 70 } 71 } 72 73 /** 74 * Just send a 1x1 pixel blank gif to the browser 75 * 76 * @author Andreas Gohr <andi@splitbrain.org> 77 * @author Harry Fuecks <fuecks@gmail.com> 78 */ 79 protected function sendGIF() 80 { 81 $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7'); 82 header('Content-Type: image/gif'); 83 header('Content-Length: ' . strlen($img)); 84 header('Connection: Close'); 85 echo $img; 86 tpl_flush(); 87 // Browser should drop connection after this 88 // Thinks it's got the whole image 89 } 90 91 /** 92 * Trims the recent changes cache (or imports the old changelog) as needed. 93 * 94 * @param bool $media_changes If the media changelog shall be trimmed instead of 95 * the page changelog 96 * 97 * @return bool 98 * @triggers TASK_RECENTCHANGES_TRIM 99 * @author Ben Coburn <btcoburn@silicodon.net> 100 */ 101 protected function runTrimRecentChanges($media_changes = false) 102 { 103 global $conf; 104 105 echo "runTrimRecentChanges($media_changes): started" . NL; 106 107 $fn = ($media_changes ? $conf['media_changelog'] : $conf['changelog']); 108 109 // Trim the Recent Changes 110 // Trims the recent changes cache to the last $conf['changes_days'] recent 111 // changes or $conf['recent'] items, which ever is larger. 112 // The trimming is only done once a day. 113 if ( 114 file_exists($fn) && 115 (@filemtime($fn . '.trimmed') + 86400) < time() && 116 !file_exists($fn . '_tmp') 117 ) { 118 @touch($fn . '.trimmed'); 119 io_lock($fn); 120 $lines = file($fn); 121 if (count($lines) <= $conf['recent']) { 122 // nothing to trim 123 io_unlock($fn); 124 echo "runTrimRecentChanges($media_changes): finished" . NL; 125 return false; 126 } 127 128 io_saveFile($fn . '_tmp', ''); // presave tmp as 2nd lock 129 $trim_time = time() - $conf['recent_days'] * 86400; 130 $out_lines = []; 131 $old_lines = []; 132 $counter = count($lines); 133 for ($i = 0; $i < $counter; $i++) { 134 $log = ChangeLog::parseLogLine($lines[$i]); 135 if ($log === false) { 136 continue; // discard junk 137 } 138 139 if ($log['date'] < $trim_time) { 140 // keep old lines for now (append .$i to prevent key collisions) 141 $old_lines[$log['date'] . ".$i"] = $lines[$i]; 142 } else { 143 // definitely keep these lines 144 $out_lines[$log['date'] . ".$i"] = $lines[$i]; 145 } 146 } 147 148 if (count($lines) === count($out_lines)) { 149 // nothing to trim 150 @unlink($fn . '_tmp'); 151 io_unlock($fn); 152 echo "runTrimRecentChanges($media_changes): finished" . NL; 153 return false; 154 } 155 156 // sort the final result, it shouldn't be necessary, 157 // however the extra robustness in making the changelog cache self-correcting is worth it 158 ksort($out_lines); 159 $extra = $conf['recent'] - count($out_lines); // do we need extra lines do bring us up to minimum 160 if ($extra > 0) { 161 ksort($old_lines); 162 $out_lines = array_merge(array_slice($old_lines, -$extra), $out_lines); 163 } 164 165 $eventData = [ 166 'isMedia' => $media_changes, 167 'trimmedChangelogLines' => $out_lines, 168 'removedChangelogLines' => $extra > 0 ? array_slice($old_lines, 0, -$extra) : $old_lines, 169 ]; 170 Event::createAndTrigger('TASK_RECENTCHANGES_TRIM', $eventData); 171 $out_lines = $eventData['trimmedChangelogLines']; 172 173 // save trimmed changelog 174 io_saveFile($fn . '_tmp', implode('', $out_lines)); 175 @unlink($fn); 176 if (!rename($fn . '_tmp', $fn)) { 177 // rename failed so try another way... 178 io_unlock($fn); 179 io_saveFile($fn, implode('', $out_lines)); 180 @unlink($fn . '_tmp'); 181 } else { 182 io_unlock($fn); 183 } 184 echo "runTrimRecentChanges($media_changes): finished" . NL; 185 return true; 186 } 187 188 // nothing done 189 echo "runTrimRecentChanges($media_changes): finished" . NL; 190 return false; 191 } 192 193 194 /** 195 * Runs the indexer for the current page 196 * 197 * @author Andreas Gohr <andi@splitbrain.org> 198 */ 199 protected function runIndexer() 200 { 201 global $ID; 202 echo 'runIndexer(): started' . NL; 203 204 if ((string) $ID === '') { 205 return false; 206 } 207 208 // do the work 209 try { 210 $indexer = (new Indexer())->setLogger(function ($msg) { 211 echo $msg . NL; 212 }); 213 if (!page_exists($ID)) { 214 $indexer->deletePage($ID, true); 215 } else { 216 $indexer->addPage($ID, true); 217 } 218 return true; 219 } catch (SearchException $e) { 220 $msg = $e::class . ' : ' . $e->getMessage(); 221 echo $msg; 222 Logger::debug($msg); 223 return false; 224 } 225 } 226 227 /** 228 * Builds a Google Sitemap of all public pages known to the indexer 229 * 230 * The map is placed in the root directory named sitemap.xml.gz - This 231 * file needs to be writable! 232 * 233 * @author Andreas Gohr 234 * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html 235 */ 236 protected function runSitemapper() 237 { 238 echo 'runSitemapper(): started' . NL; 239 $result = Mapper::generate() && Mapper::pingSearchEngines(); 240 echo 'runSitemapper(): finished' . NL; 241 return $result; 242 } 243 244 /** 245 * Send digest and list mails for all subscriptions which are in effect for the 246 * current page 247 * 248 * @author Adrian Lang <lang@cosmocode.de> 249 */ 250 protected function sendDigest() 251 { 252 global $ID; 253 254 echo 'sendDigest(): started' . NL; 255 if (!actionOK('subscribe')) { 256 echo 'sendDigest(): disabled' . NL; 257 return false; 258 } 259 $sub = new BulkSubscriptionSender(); 260 $sent = $sub->sendBulk($ID); 261 262 echo "sendDigest(): sent $sent mails" . NL; 263 echo 'sendDigest(): finished' . NL; 264 return (bool)$sent; 265 } 266} 267