1<?php 2/** 3 * DokuWiki indexer 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8if(!defined('DOKU_INC')) define('DOKU_INC',dirname(__FILE__).'/../../'); 9define('DOKU_DISABLE_GZIP_OUTPUT',1); 10require_once(DOKU_INC.'inc/init.php'); 11session_write_close(); //close session 12if(!defined('NL')) define('NL',"\n"); 13 14// keep running after browser closes connection 15@ignore_user_abort(true); 16 17// check if user abort worked, if yes send output early 18$defer = !@ignore_user_abort() || $conf['broken_iua']; 19if(!$defer){ 20 sendGIF(); // send gif 21} 22 23$ID = cleanID($INPUT->str('id')); 24 25// Catch any possible output (e.g. errors) 26$output = $INPUT->has('debug') && $conf['allowdebug']; 27if(!$output) ob_start(); 28 29// run one of the jobs 30$tmp = array(); // No event data 31$evt = new Doku_Event('INDEXER_TASKS_RUN', $tmp); 32if ($evt->advise_before()) { 33 runIndexer() or 34 runSitemapper() or 35 sendDigest() or 36 runTrimRecentChanges() or 37 runTrimRecentChanges(true) or 38 $evt->advise_after(); 39} 40if($defer) sendGIF(); 41 42if(!$output) ob_end_clean(); 43exit; 44 45// -------------------------------------------------------------------- 46 47/** 48 * Trims the recent changes cache (or imports the old changelog) as needed. 49 * 50 * @param media_changes If the media changelog shall be trimmed instead of 51 * the page changelog 52 * 53 * @author Ben Coburn <btcoburn@silicodon.net> 54 */ 55function runTrimRecentChanges($media_changes = false) { 56 global $conf; 57 58 echo "runTrimRecentChanges($media_changes): started".NL; 59 60 $fn = ($media_changes ? $conf['media_changelog'] : $conf['changelog']); 61 62 // Trim the Recent Changes 63 // Trims the recent changes cache to the last $conf['changes_days'] recent 64 // changes or $conf['recent'] items, which ever is larger. 65 // The trimming is only done once a day. 66 if (@file_exists($fn) && 67 (@filemtime($fn.'.trimmed')+86400)<time() && 68 !@file_exists($fn.'_tmp')) { 69 @touch($fn.'.trimmed'); 70 io_lock($fn); 71 $lines = file($fn); 72 if (count($lines)<=$conf['recent']) { 73 // nothing to trim 74 io_unlock($fn); 75 echo "runTrimRecentChanges($media_changes): finished".NL; 76 return false; 77 } 78 79 io_saveFile($fn.'_tmp', ''); // presave tmp as 2nd lock 80 $trim_time = time() - $conf['recent_days']*86400; 81 $out_lines = array(); 82 83 for ($i=0; $i<count($lines); $i++) { 84 $log = parseChangelogLine($lines[$i]); 85 if ($log === false) continue; // discard junk 86 if ($log['date'] < $trim_time) { 87 $old_lines[$log['date'].".$i"] = $lines[$i]; // keep old lines for now (append .$i to prevent key collisions) 88 } else { 89 $out_lines[$log['date'].".$i"] = $lines[$i]; // definitely keep these lines 90 } 91 } 92 93 if (count($lines)==count($out_lines)) { 94 // nothing to trim 95 @unlink($fn.'_tmp'); 96 io_unlock($fn); 97 echo "runTrimRecentChanges($media_changes): finished".NL; 98 return false; 99 } 100 101 // sort the final result, it shouldn't be necessary, 102 // however the extra robustness in making the changelog cache self-correcting is worth it 103 ksort($out_lines); 104 $extra = $conf['recent'] - count($out_lines); // do we need extra lines do bring us up to minimum 105 if ($extra > 0) { 106 ksort($old_lines); 107 $out_lines = array_merge(array_slice($old_lines,-$extra),$out_lines); 108 } 109 110 // save trimmed changelog 111 io_saveFile($fn.'_tmp', implode('', $out_lines)); 112 @unlink($fn); 113 if (!rename($fn.'_tmp', $fn)) { 114 // rename failed so try another way... 115 io_unlock($fn); 116 io_saveFile($fn, implode('', $out_lines)); 117 @unlink($fn.'_tmp'); 118 } else { 119 io_unlock($fn); 120 } 121 echo "runTrimRecentChanges($media_changes): finished".NL; 122 return true; 123 } 124 125 // nothing done 126 echo "runTrimRecentChanges($media_changes): finished".NL; 127 return false; 128} 129 130/** 131 * Runs the indexer for the current page 132 * 133 * @author Andreas Gohr <andi@splitbrain.org> 134 */ 135function runIndexer(){ 136 global $ID; 137 global $conf; 138 print "runIndexer(): started".NL; 139 140 if(!$ID) return false; 141 142 // do the work 143 return idx_addPage($ID, true); 144} 145 146/** 147 * Builds a Google Sitemap of all public pages known to the indexer 148 * 149 * The map is placed in the root directory named sitemap.xml.gz - This 150 * file needs to be writable! 151 * 152 * @author Andreas Gohr 153 * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html 154 */ 155function runSitemapper(){ 156 print "runSitemapper(): started".NL; 157 $result = Sitemapper::generate() && Sitemapper::pingSearchEngines(); 158 print 'runSitemapper(): finished'.NL; 159 return $result; 160} 161 162/** 163 * Send digest and list mails for all subscriptions which are in effect for the 164 * current page 165 * 166 * @author Adrian Lang <lang@cosmocode.de> 167 */ 168function sendDigest() { 169 global $conf; 170 global $ID; 171 172 echo 'sendDigest(): started'.NL; 173 if(!actionOK('subscribe')) { 174 echo 'sendDigest(): disabled'.NL; 175 return false; 176 } 177 $sub = new Subscription(); 178 $sent = $sub->send_bulk($ID); 179 180 echo "sendDigest(): sent $sent mails".NL; 181 echo 'sendDigest(): finished'.NL; 182 return (bool) $sent; 183} 184 185/** 186 * Just send a 1x1 pixel blank gif to the browser 187 * 188 * @author Andreas Gohr <andi@splitbrain.org> 189 * @author Harry Fuecks <fuecks@gmail.com> 190 */ 191function sendGIF(){ 192 global $INPUT; 193 if($INPUT->has('debug')){ 194 header('Content-Type: text/plain'); 195 return; 196 } 197 $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7'); 198 header('Content-Type: image/gif'); 199 header('Content-Length: '.strlen($img)); 200 header('Connection: Close'); 201 print $img; 202 flush(); 203 // Browser should drop connection after this 204 // Thinks it's got the whole image 205} 206 207//Setup VIM: ex: et ts=4 : 208// No trailing PHP closing tag - no output please! 209// See Note at http://www.php.net/manual/en/language.basic-syntax.instruction-separation.php 210