1<?php 2/** 3 * DokuWiki indexer 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8if(!defined('DOKU_INC')) define('DOKU_INC',dirname(__FILE__).'/../../'); 9define('DOKU_DISABLE_GZIP_OUTPUT',1); 10require_once(DOKU_INC.'inc/init.php'); 11session_write_close(); //close session 12if(!defined('NL')) define('NL',"\n"); 13 14// keep running after browser closes connection 15@ignore_user_abort(true); 16 17// check if user abort worked, if yes send output early 18$defer = !@ignore_user_abort() || $conf['broken_iua']; 19$output = $INPUT->has('debug') && $conf['allowdebug']; 20if(!$defer && !$output){ 21 sendGIF(); // send gif 22} 23 24$ID = cleanID($INPUT->str('id')); 25 26// Catch any possible output (e.g. errors) 27if(!$output) ob_start(); 28else header('Content-Type: text/plain'); 29 30// run one of the jobs 31$tmp = array(); // No event data 32$evt = new Doku_Event('INDEXER_TASKS_RUN', $tmp); 33if ($evt->advise_before()) { 34 runIndexer() or 35 runSitemapper() or 36 sendDigest() or 37 runTrimRecentChanges() or 38 runTrimRecentChanges(true) or 39 $evt->advise_after(); 40} 41 42if(!$output) { 43 ob_end_clean(); 44 if($defer) sendGIF(); 45} 46 47exit; 48 49// -------------------------------------------------------------------- 50 51/** 52 * Trims the recent changes cache (or imports the old changelog) as needed. 53 * 54 * @param bool $media_changes If the media changelog shall be trimmed instead of 55 * the page changelog 56 * @return bool 57 * 58 * @author Ben Coburn <btcoburn@silicodon.net> 59 */ 60function runTrimRecentChanges($media_changes = false) { 61 global $conf; 62 63 echo "runTrimRecentChanges($media_changes): started".NL; 64 65 $fn = ($media_changes ? $conf['media_changelog'] : $conf['changelog']); 66 67 // Trim the Recent Changes 68 // Trims the recent changes cache to the last $conf['changes_days'] recent 69 // changes or $conf['recent'] items, which ever is larger. 70 // The trimming is only done once a day. 71 if (file_exists($fn) && 72 (@filemtime($fn.'.trimmed')+86400)<time() && 73 !file_exists($fn.'_tmp')) { 74 @touch($fn.'.trimmed'); 75 io_lock($fn); 76 $lines = file($fn); 77 if (count($lines)<=$conf['recent']) { 78 // nothing to trim 79 io_unlock($fn); 80 echo "runTrimRecentChanges($media_changes): finished".NL; 81 return false; 82 } 83 84 io_saveFile($fn.'_tmp', ''); // presave tmp as 2nd lock 85 $trim_time = time() - $conf['recent_days']*86400; 86 $out_lines = array(); 87 $old_lines = array(); 88 for ($i=0; $i<count($lines); $i++) { 89 $log = parseChangelogLine($lines[$i]); 90 if ($log === false) continue; // discard junk 91 if ($log['date'] < $trim_time) { 92 // keep old lines for now (append .$i to prevent key collisions) 93 $old_lines[$log['date'].".$i"] = $lines[$i]; 94 } else { 95 // definitely keep these lines 96 $out_lines[$log['date'].".$i"] = $lines[$i]; 97 } 98 } 99 100 if (count($lines)==count($out_lines)) { 101 // nothing to trim 102 @unlink($fn.'_tmp'); 103 io_unlock($fn); 104 echo "runTrimRecentChanges($media_changes): finished".NL; 105 return false; 106 } 107 108 // sort the final result, it shouldn't be necessary, 109 // however the extra robustness in making the changelog cache self-correcting is worth it 110 ksort($out_lines); 111 $extra = $conf['recent'] - count($out_lines); // do we need extra lines do bring us up to minimum 112 if ($extra > 0) { 113 ksort($old_lines); 114 $out_lines = array_merge(array_slice($old_lines,-$extra),$out_lines); 115 } 116 117 // save trimmed changelog 118 io_saveFile($fn.'_tmp', implode('', $out_lines)); 119 @unlink($fn); 120 if (!rename($fn.'_tmp', $fn)) { 121 // rename failed so try another way... 122 io_unlock($fn); 123 io_saveFile($fn, implode('', $out_lines)); 124 @unlink($fn.'_tmp'); 125 } else { 126 io_unlock($fn); 127 } 128 echo "runTrimRecentChanges($media_changes): finished".NL; 129 return true; 130 } 131 132 // nothing done 133 echo "runTrimRecentChanges($media_changes): finished".NL; 134 return false; 135} 136 137/** 138 * Runs the indexer for the current page 139 * 140 * @author Andreas Gohr <andi@splitbrain.org> 141 */ 142function runIndexer(){ 143 global $ID; 144 global $conf; 145 print "runIndexer(): started".NL; 146 147 if(!$ID) return false; 148 149 // do the work 150 return idx_addPage($ID, true); 151} 152 153/** 154 * Builds a Google Sitemap of all public pages known to the indexer 155 * 156 * The map is placed in the root directory named sitemap.xml.gz - This 157 * file needs to be writable! 158 * 159 * @author Andreas Gohr 160 * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html 161 */ 162function runSitemapper(){ 163 print "runSitemapper(): started".NL; 164 $result = Sitemapper::generate() && Sitemapper::pingSearchEngines(); 165 print 'runSitemapper(): finished'.NL; 166 return $result; 167} 168 169/** 170 * Send digest and list mails for all subscriptions which are in effect for the 171 * current page 172 * 173 * @author Adrian Lang <lang@cosmocode.de> 174 */ 175function sendDigest() { 176 global $conf; 177 global $ID; 178 179 echo 'sendDigest(): started'.NL; 180 if(!actionOK('subscribe')) { 181 echo 'sendDigest(): disabled'.NL; 182 return false; 183 } 184 $sub = new Subscription(); 185 $sent = $sub->send_bulk($ID); 186 187 echo "sendDigest(): sent $sent mails".NL; 188 echo 'sendDigest(): finished'.NL; 189 return (bool) $sent; 190} 191 192/** 193 * Just send a 1x1 pixel blank gif to the browser 194 * 195 * @author Andreas Gohr <andi@splitbrain.org> 196 * @author Harry Fuecks <fuecks@gmail.com> 197 */ 198function sendGIF(){ 199 $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7'); 200 header('Content-Type: image/gif'); 201 header('Content-Length: '.strlen($img)); 202 header('Connection: Close'); 203 print $img; 204 tpl_flush(); 205 // Browser should drop connection after this 206 // Thinks it's got the whole image 207} 208 209//Setup VIM: ex: et ts=4 : 210// No trailing PHP closing tag - no output please! 211// See Note at http://php.net/manual/en/language.basic-syntax.instruction-separation.php 212