1<?php 2/** 3 * DokuWiki indexer 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8if(!defined('DOKU_INC')) define('DOKU_INC',dirname(__FILE__).'/../../'); 9define('DOKU_DISABLE_GZIP_OUTPUT',1); 10require_once(DOKU_INC.'inc/init.php'); 11session_write_close(); //close session 12if(!defined('NL')) define('NL',"\n"); 13 14// Version tag used to force rebuild on upgrade 15define('INDEXER_VERSION', 3); 16 17// keep running after browser closes connection 18@ignore_user_abort(true); 19 20// check if user abort worked, if yes send output early 21$defer = !@ignore_user_abort() || $conf['broken_iua']; 22if(!$defer){ 23 sendGIF(); // send gif 24} 25 26$ID = cleanID($_REQUEST['id']); 27 28// Catch any possible output (e.g. errors) 29$output = isset($_REQUEST['debug']) && $conf['allowdebug']; 30if(!$output) ob_start(); 31 32// run one of the jobs 33$tmp = array(); // No event data 34$evt = new Doku_Event('INDEXER_TASKS_RUN', $tmp); 35if ($evt->advise_before()) { 36 runIndexer() or 37 runSitemapper() or 38 sendDigest() or 39 runTrimRecentChanges() or 40 runTrimRecentChanges(true) or 41 $evt->advise_after(); 42} 43if($defer) sendGIF(); 44 45if(!$output) ob_end_clean(); 46exit; 47 48// -------------------------------------------------------------------- 49 50/** 51 * Trims the recent changes cache (or imports the old changelog) as needed. 52 * 53 * @param media_changes If the media changelog shall be trimmed instead of 54 * the page changelog 55 * 56 * @author Ben Coburn <btcoburn@silicodon.net> 57 */ 58function runTrimRecentChanges($media_changes = false) { 59 global $conf; 60 61 $fn = ($media_changes ? $conf['media_changelog'] : $conf['changelog']); 62 63 // Trim the Recent Changes 64 // Trims the recent changes cache to the last $conf['changes_days'] recent 65 // changes or $conf['recent'] items, which ever is larger. 66 // The trimming is only done once a day. 67 if (@file_exists($fn) && 68 (@filemtime($fn.'.trimmed')+86400)<time() && 69 !@file_exists($fn.'_tmp')) { 70 @touch($fn.'.trimmed'); 71 io_lock($fn); 72 $lines = file($fn); 73 if (count($lines)<=$conf['recent']) { 74 // nothing to trim 75 io_unlock($fn); 76 return false; 77 } 78 79 io_saveFile($fn.'_tmp', ''); // presave tmp as 2nd lock 80 $trim_time = time() - $conf['recent_days']*86400; 81 $out_lines = array(); 82 83 for ($i=0; $i<count($lines); $i++) { 84 $log = parseChangelogLine($lines[$i]); 85 if ($log === false) continue; // discard junk 86 if ($log['date'] < $trim_time) { 87 $old_lines[$log['date'].".$i"] = $lines[$i]; // keep old lines for now (append .$i to prevent key collisions) 88 } else { 89 $out_lines[$log['date'].".$i"] = $lines[$i]; // definitely keep these lines 90 } 91 } 92 93 if (count($lines)==count($out_lines)) { 94 // nothing to trim 95 @unlink($fn.'_tmp'); 96 io_unlock($fn); 97 return false; 98 } 99 100 // sort the final result, it shouldn't be necessary, 101 // however the extra robustness in making the changelog cache self-correcting is worth it 102 ksort($out_lines); 103 $extra = $conf['recent'] - count($out_lines); // do we need extra lines do bring us up to minimum 104 if ($extra > 0) { 105 ksort($old_lines); 106 $out_lines = array_merge(array_slice($old_lines,-$extra),$out_lines); 107 } 108 109 // save trimmed changelog 110 io_saveFile($fn.'_tmp', implode('', $out_lines)); 111 @unlink($fn); 112 if (!rename($fn.'_tmp', $fn)) { 113 // rename failed so try another way... 114 io_unlock($fn); 115 io_saveFile($fn, implode('', $out_lines)); 116 @unlink($fn.'_tmp'); 117 } else { 118 io_unlock($fn); 119 } 120 return true; 121 } 122 123 // nothing done 124 return false; 125} 126 127/** 128 * Runs the indexer for the current page 129 * 130 * @author Andreas Gohr <andi@splitbrain.org> 131 */ 132function runIndexer(){ 133 global $ID; 134 global $conf; 135 print "runIndexer(): started".NL; 136 137 if(!$ID) return false; 138 139 // check if indexing needed 140 $idxtag = metaFN($ID,'.indexed'); 141 if(@file_exists($idxtag)){ 142 if(io_readFile($idxtag) >= INDEXER_VERSION){ 143 $last = @filemtime($idxtag); 144 if($last > @filemtime(wikiFN($ID))){ 145 print "runIndexer(): index for $ID up to date".NL; 146 return false; 147 } 148 } 149 } 150 151 // try to aquire a lock 152 $run = 0; 153 $lock = $conf['lockdir'].'/_indexer.lock'; 154 while(!@mkdir($lock,$conf['dmode'])){ 155 usleep(50); 156 if(is_dir($lock) && time()-@filemtime($lock) > 60*5){ 157 // looks like a stale lock - remove it 158 if (!@rmdir($lock)) { 159 print "runIndexer(): removing the stale lock failed".NL; 160 return false; 161 } else { 162 print "runIndexer(): stale lock removed".NL; 163 } 164 }elseif($run++ == 1000){ 165 // we waited 5 seconds for that lock 166 print "runIndexer(): indexer locked".NL; 167 return false; 168 } 169 } 170 if($conf['dperm']) chmod($lock, $conf['dperm']); 171 172 // do the work 173 idx_addPage($ID); 174 175 // we're finished - save and free lock 176 io_saveFile(metaFN($ID,'.indexed'),INDEXER_VERSION); 177 @rmdir($lock); 178 print "runIndexer(): finished".NL; 179 return true; 180} 181 182/** 183 * Builds a Google Sitemap of all public pages known to the indexer 184 * 185 * The map is placed in the root directory named sitemap.xml.gz - This 186 * file needs to be writable! 187 * 188 * @author Andreas Gohr 189 * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html 190 */ 191function runSitemapper(){ 192 print "runSitemapper(): started".NL; 193 $result = Sitemapper::generate() && Sitemapper::pingSearchEngines(); 194 print 'runSitemapper(): finished'.NL; 195 return $result; 196} 197 198/** 199 * Send digest and list mails for all subscriptions which are in effect for the 200 * current page 201 * 202 * @author Adrian Lang <lang@cosmocode.de> 203 */ 204function sendDigest() { 205 echo 'sendDigest(): start'.NL; 206 global $ID; 207 global $conf; 208 if (!$conf['subscribers']) { 209 return; 210 } 211 $subscriptions = subscription_find($ID, array('style' => '(digest|list)', 212 'escaped' => true)); 213 global $auth; 214 global $lang; 215 global $conf; 216 global $USERINFO; 217 218 // remember current user info 219 $olduinfo = $USERINFO; 220 $olduser = $_SERVER['REMOTE_USER']; 221 222 foreach($subscriptions as $id => $users) { 223 if (!subscription_lock($id)) { 224 continue; 225 } 226 foreach($users as $data) { 227 list($user, $style, $lastupdate) = $data; 228 $lastupdate = (int) $lastupdate; 229 if ($lastupdate + $conf['subscribe_time'] > time()) { 230 // Less than the configured time period passed since last 231 // update. 232 continue; 233 } 234 235 // Work as the user to make sure ACLs apply correctly 236 $USERINFO = $auth->getUserData($user); 237 $_SERVER['REMOTE_USER'] = $user; 238 if ($USERINFO === false) { 239 continue; 240 } 241 242 if (substr($id, -1, 1) === ':') { 243 // The subscription target is a namespace 244 $changes = getRecentsSince($lastupdate, null, getNS($id)); 245 } else { 246 if(auth_quickaclcheck($id) < AUTH_READ) continue; 247 248 $meta = p_get_metadata($id); 249 $changes = array($meta['last_change']); 250 } 251 252 // Filter out pages only changed in small and own edits 253 $change_ids = array(); 254 foreach($changes as $rev) { 255 $n = 0; 256 while (!is_null($rev) && $rev['date'] >= $lastupdate && 257 ($_SERVER['REMOTE_USER'] === $rev['user'] || 258 $rev['type'] === DOKU_CHANGE_TYPE_MINOR_EDIT)) { 259 $rev = getRevisions($rev['id'], $n++, 1); 260 $rev = (count($rev) > 0) ? $rev[0] : null; 261 } 262 263 if (!is_null($rev) && $rev['date'] >= $lastupdate) { 264 // Some change was not a minor one and not by myself 265 $change_ids[] = $rev['id']; 266 } 267 } 268 269 if ($style === 'digest') { 270 foreach($change_ids as $change_id) { 271 subscription_send_digest($USERINFO['mail'], $change_id, 272 $lastupdate); 273 } 274 } elseif ($style === 'list') { 275 subscription_send_list($USERINFO['mail'], $change_ids, $id); 276 } 277 // TODO: Handle duplicate subscriptions. 278 279 // Update notification time. 280 subscription_set($user, $id, $style, time(), true); 281 } 282 subscription_unlock($id); 283 } 284 285 // restore current user info 286 $USERINFO = $olduinfo; 287 $_SERVER['REMOTE_USER'] = $olduser; 288} 289 290/** 291 * Just send a 1x1 pixel blank gif to the browser 292 * 293 * @author Andreas Gohr <andi@splitbrain.org> 294 * @author Harry Fuecks <fuecks@gmail.com> 295 */ 296function sendGIF(){ 297 if(isset($_REQUEST['debug'])){ 298 header('Content-Type: text/plain'); 299 return; 300 } 301 $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7'); 302 header('Content-Type: image/gif'); 303 header('Content-Length: '.strlen($img)); 304 header('Connection: Close'); 305 print $img; 306 flush(); 307 // Browser should drop connection after this 308 // Thinks it's got the whole image 309} 310 311//Setup VIM: ex: et ts=4 : 312// No trailing PHP closing tag - no output please! 313// See Note at http://www.php.net/manual/en/language.basic-syntax.instruction-separation.php 314