1<?php 2/** 3 * DokuWiki indexer 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8if(!defined('DOKU_INC')) define('DOKU_INC',dirname(__FILE__).'/../../'); 9define('DOKU_DISABLE_GZIP_OUTPUT',1); 10require_once(DOKU_INC.'inc/init.php'); 11require_once(DOKU_INC.'inc/auth.php'); 12require_once(DOKU_INC.'inc/events.php'); 13session_write_close(); //close session 14if(!defined('NL')) define('NL',"\n"); 15 16// Version tag used to force rebuild on upgrade 17define('INDEXER_VERSION', 2); 18 19// keep running after browser closes connection 20@ignore_user_abort(true); 21 22// check if user abort worked, if yes send output early 23$defer = !@ignore_user_abort() || $conf['broken_iua']; 24if(!$defer){ 25 sendGIF(); // send gif 26} 27 28$ID = cleanID($_REQUEST['id']); 29 30// Catch any possible output (e.g. errors) 31if(!isset($_REQUEST['debug'])) ob_start(); 32 33// run one of the jobs 34$tmp = array(); // No event data 35$evt = new Doku_Event('INDEXER_TASKS_RUN', $tmp); 36if ($evt->advise_before()) { 37 runIndexer() or 38 metaUpdate() or 39 runSitemapper() or 40 sendDigest() or 41 runTrimRecentChanges() or 42 runTrimRecentChanges(true) or 43 $evt->advise_after(); 44} 45if($defer) sendGIF(); 46 47if(!isset($_REQUEST['debug'])) ob_end_clean(); 48exit; 49 50// -------------------------------------------------------------------- 51 52/** 53 * Trims the recent changes cache (or imports the old changelog) as needed. 54 * 55 * @param media_changes If the media changelog shall be trimmed instead of 56 * the page changelog 57 * 58 * @author Ben Coburn <btcoburn@silicodon.net> 59 */ 60function runTrimRecentChanges($media_changes = false) { 61 global $conf; 62 63 $fn = ($media_changes ? $conf['media_changelog'] : $conf['changelog']); 64 65 // Trim the Recent Changes 66 // Trims the recent changes cache to the last $conf['changes_days'] recent 67 // changes or $conf['recent'] items, which ever is larger. 68 // The trimming is only done once a day. 69 if (@file_exists($fn) && 70 (@filemtime($fn.'.trimmed')+86400)<time() && 71 !@file_exists($fn.'_tmp')) { 72 @touch($fn.'.trimmed'); 73 io_lock($fn); 74 $lines = file($fn); 75 if (count($lines)<=$conf['recent']) { 76 // nothing to trim 77 io_unlock($fn); 78 return false; 79 } 80 81 io_saveFile($fn.'_tmp', ''); // presave tmp as 2nd lock 82 $trim_time = time() - $conf['recent_days']*86400; 83 $out_lines = array(); 84 85 for ($i=0; $i<count($lines); $i++) { 86 $log = parseChangelogLine($lines[$i]); 87 if ($log === false) continue; // discard junk 88 if ($log['date'] < $trim_time) { 89 $old_lines[$log['date'].".$i"] = $lines[$i]; // keep old lines for now (append .$i to prevent key collisions) 90 } else { 91 $out_lines[$log['date'].".$i"] = $lines[$i]; // definitely keep these lines 92 } 93 } 94 95 if (count($lines)==count($out_lines)) { 96 // nothing to trim 97 @unlink($fn.'_tmp'); 98 io_unlock($fn); 99 return false; 100 } 101 102 // sort the final result, it shouldn't be necessary, 103 // however the extra robustness in making the changelog cache self-correcting is worth it 104 ksort($out_lines); 105 $extra = $conf['recent'] - count($out_lines); // do we need extra lines do bring us up to minimum 106 if ($extra > 0) { 107 ksort($old_lines); 108 $out_lines = array_merge(array_slice($old_lines,-$extra),$out_lines); 109 } 110 111 // save trimmed changelog 112 io_saveFile($fn.'_tmp', implode('', $out_lines)); 113 @unlink($fn); 114 if (!rename($fn.'_tmp', $fn)) { 115 // rename failed so try another way... 116 io_unlock($fn); 117 io_saveFile($fn, implode('', $out_lines)); 118 @unlink($fn.'_tmp'); 119 } else { 120 io_unlock($fn); 121 } 122 return true; 123 } 124 125 // nothing done 126 return false; 127} 128 129/** 130 * Runs the indexer for the current page 131 * 132 * @author Andreas Gohr <andi@splitbrain.org> 133 */ 134function runIndexer(){ 135 global $ID; 136 global $conf; 137 print "runIndexer(): started".NL; 138 139 // Move index files (if needed) 140 // Uses the importoldindex plugin to upgrade the index automatically. 141 // FIXME: Remove this from runIndexer when it is no longer needed. 142 if (@file_exists($conf['cachedir'].'/page.idx') && 143 (!@file_exists($conf['indexdir'].'/page.idx') || 144 !filesize($conf['indexdir'].'/page.idx')) && 145 !@file_exists($conf['indexdir'].'/index_importing')) { 146 echo "trigger TEMPORARY_INDEX_UPGRADE_EVENT\n"; 147 $tmp = array(); // no event data 148 trigger_event('TEMPORARY_INDEX_UPGRADE_EVENT', $tmp); 149 } 150 151 if(!$ID) return false; 152 153 // check if indexing needed 154 $idxtag = metaFN($ID,'.indexed'); 155 if(@file_exists($idxtag)){ 156 if(io_readFile($idxtag) >= INDEXER_VERSION){ 157 $last = @filemtime($idxtag); 158 if($last > @filemtime(wikiFN($ID))){ 159 print "runIndexer(): index for $ID up to date".NL; 160 return false; 161 } 162 } 163 } 164 165 // try to aquire a lock 166 $lock = $conf['lockdir'].'/_indexer.lock'; 167 while(!@mkdir($lock,$conf['dmode'])){ 168 usleep(50); 169 if(time()-@filemtime($lock) > 60*5){ 170 // looks like a stale lock - remove it 171 @rmdir($lock); 172 print "runIndexer(): stale lock removed".NL; 173 }else{ 174 print "runIndexer(): indexer locked".NL; 175 return false; 176 } 177 } 178 if($conf['dperm']) chmod($lock, $conf['dperm']); 179 180 require_once(DOKU_INC.'inc/indexer.php'); 181 182 // upgrade to version 2 183 if (!@file_exists($conf['indexdir'].'/pageword.idx')) 184 idx_upgradePageWords(); 185 186 // do the work 187 idx_addPage($ID); 188 189 // we're finished - save and free lock 190 io_saveFile(metaFN($ID,'.indexed'),INDEXER_VERSION); 191 @rmdir($lock); 192 print "runIndexer(): finished".NL; 193 return true; 194} 195 196/** 197 * Will render the metadata for the page if not exists yet 198 * 199 * This makes sure pages which are created from outside DokuWiki will 200 * gain their data when viewed for the first time. 201 */ 202function metaUpdate(){ 203 global $ID; 204 print "metaUpdate(): started".NL; 205 206 if(!$ID) return false; 207 $file = metaFN($ID, '.meta'); 208 echo "meta file: $file".NL; 209 210 // rendering needed? 211 if (@file_exists($file)) return false; 212 if (!@file_exists(wikiFN($ID))) return false; 213 214 require_once(DOKU_INC.'inc/common.php'); 215 require_once(DOKU_INC.'inc/parserutils.php'); 216 global $conf; 217 218 219 // gather some additional info from changelog 220 $info = io_grep($conf['changelog'], 221 '/^(\d+)\t(\d+\.\d+\.\d+\.\d+)\t'.preg_quote($ID,'/').'\t([^\t]+)\t([^\t\n]+)/', 222 0,true); 223 224 $meta = array(); 225 if(!empty($info)){ 226 $meta['date']['created'] = $info[0][1]; 227 foreach($info as $item){ 228 if($item[4] != '*'){ 229 $meta['date']['modified'] = $item[1]; 230 if($item[3]){ 231 $meta['contributor'][$item[3]] = $item[3]; 232 } 233 } 234 } 235 } 236 237 $meta = p_render_metadata($ID, $meta); 238 io_saveFile($file, serialize($meta)); 239 240 echo "metaUpdate(): finished".NL; 241 return true; 242} 243 244/** 245 * Builds a Google Sitemap of all public pages known to the indexer 246 * 247 * The map is placed in the root directory named sitemap.xml.gz - This 248 * file needs to be writable! 249 * 250 * @author Andreas Gohr 251 * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html 252 */ 253function runSitemapper(){ 254 global $conf; 255 print "runSitemapper(): started".NL; 256 if(!$conf['sitemap']) return false; 257 258 if($conf['compression'] == 'bz2' || $conf['compression'] == 'gz'){ 259 $sitemap = 'sitemap.xml.gz'; 260 }else{ 261 $sitemap = 'sitemap.xml'; 262 } 263 print "runSitemapper(): using $sitemap".NL; 264 265 if(@file_exists(DOKU_INC.$sitemap)){ 266 if(!is_writable(DOKU_INC.$sitemap)) return false; 267 }else{ 268 if(!is_writable(DOKU_INC)) return false; 269 } 270 271 if(@filesize(DOKU_INC.$sitemap) && 272 @filemtime(DOKU_INC.$sitemap) > (time()-($conf['sitemap']*60*60*24))){ 273 print 'runSitemapper(): Sitemap up to date'.NL; 274 return false; 275 } 276 277 $pages = file($conf['indexdir'].'/page.idx'); 278 print 'runSitemapper(): creating sitemap using '.count($pages).' pages'.NL; 279 280 // build the sitemap 281 ob_start(); 282 print '<?xml version="1.0" encoding="UTF-8"?>'.NL; 283 print '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'.NL; 284 foreach($pages as $id){ 285 $id = trim($id); 286 $file = wikiFN($id); 287 288 //skip hidden, non existing and restricted files 289 if(isHiddenPage($id)) continue; 290 $date = @filemtime($file); 291 if(!$date) continue; 292 if(auth_aclcheck($id,'','') < AUTH_READ) continue; 293 294 print ' <url>'.NL; 295 print ' <loc>'.wl($id,'',true).'</loc>'.NL; 296 print ' <lastmod>'.date_iso8601($date).'</lastmod>'.NL; 297 print ' </url>'.NL; 298 } 299 print '</urlset>'.NL; 300 $data = ob_get_contents(); 301 ob_end_clean(); 302 303 //save the new sitemap 304 io_saveFile(DOKU_INC.$sitemap,$data); 305 306 //ping search engines... 307 $http = new DokuHTTPClient(); 308 $http->timeout = 8; 309 310 //ping google 311 print 'runSitemapper(): pinging google'.NL; 312 $url = 'http://www.google.com/webmasters/sitemaps/ping?sitemap='; 313 $url .= urlencode(DOKU_URL.$sitemap); 314 $resp = $http->get($url); 315 if($http->error) print 'runSitemapper(): '.$http->error.NL; 316 print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL; 317 318 //ping yahoo 319 print 'runSitemapper(): pinging yahoo'.NL; 320 $url = 'http://search.yahooapis.com/SiteExplorerService/V1/updateNotification?appid=dokuwiki&url='; 321 $url .= urlencode(DOKU_URL.$sitemap); 322 $resp = $http->get($url); 323 if($http->error) print 'runSitemapper(): '.$http->error.NL; 324 print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL; 325 326 //ping microsoft 327 print 'runSitemapper(): pinging microsoft'.NL; 328 $url = 'http://www.bing.com/webmaster/ping.aspx?siteMap='; 329 $url .= urlencode(DOKU_URL.$sitemap); 330 $resp = $http->get($url); 331 if($http->error) print 'runSitemapper(): '.$http->error.NL; 332 print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL; 333 334 print 'runSitemapper(): finished'.NL; 335 return true; 336} 337 338/** 339 * Send digest and list mails for all subscriptions which are in effect for the 340 * current page 341 * 342 * @author Adrian Lang <lang@cosmocode.de> 343 */ 344function sendDigest() { 345 echo 'sendDigest(): start'.NL; 346 global $ID; 347 global $conf; 348 if (!$conf['subscribers']) { 349 return; 350 } 351 require_once DOKU_INC . 'inc/subscription.php'; 352 $subscriptions = subscription_find($ID, array('style' => '(digest|list)', 353 'escaped' => true)); 354 global $auth; 355 global $lang; 356 global $conf; 357 global $USERINFO; 358 359 // remember current user info 360 $olduinfo = $USERINFO; 361 $olduser = $_SERVER['REMOTE_USER']; 362 363 foreach($subscriptions as $id => $users) { 364 foreach($users as $data) { 365 list($user, $style, $lastupdate) = $data; 366 $lastupdate = (int) $lastupdate; 367 if ($lastupdate + $conf['subscribe_interval'] > time()) { 368 // Less than a day passed since last update. 369 continue; 370 } 371 372 // Work as the user to make sure ACLs apply correctly 373 $USERINFO = $auth->getUserData($user); 374 $_SERVER['REMOTE_USER'] = $user; 375 if ($USERINFO === false) { 376 continue; 377 } 378 379 if (substr($id, -1, 1) === ':') { 380 // The subscription target is a namespace 381 $changes = getRecentsSince($lastupdate, null, getNS($id)); 382 if (count($changes) === 0) { 383 continue; 384 } 385 if ($style === 'digest') { 386 foreach($changes as $change) { 387 subscription_send_digest($info['mail'], $change, 388 $lastupdate); 389 } 390 } elseif ($style === 'list') { 391 subscription_send_list($info['mail'], $changes, $id); 392 } 393 // TODO: Handle duplicate subscriptions. 394 } else { 395 if(auth_quickaclcheck($id) < AUTH_READ) continue; 396 397 $meta = p_get_metadata($id); 398 $rev = $meta['last_change']['date']; 399 if ($rev < $lastupdate) { 400 // There is no new revision. 401 continue; 402 } 403 subscription_send_digest($info['mail'], $meta['last_change'], 404 $lastupdate); 405 } 406 // Update notification time. 407 subscription_set($user, $id, $style, time(), true); 408 } 409 } 410 411 // restore current user info 412 $USERINFO = $olduinfo; 413 $_SERVER['REMOTE_USER'] = $olduser; 414} 415 416/** 417 * Formats a timestamp as ISO 8601 date 418 * 419 * @author <ungu at terong dot com> 420 * @link http://www.php.net/manual/en/function.date.php#54072 421 */ 422function date_iso8601($int_date) { 423 //$int_date: current date in UNIX timestamp 424 $date_mod = date('Y-m-d\TH:i:s', $int_date); 425 $pre_timezone = date('O', $int_date); 426 $time_zone = substr($pre_timezone, 0, 3).":".substr($pre_timezone, 3, 2); 427 $date_mod .= $time_zone; 428 return $date_mod; 429} 430 431/** 432 * Just send a 1x1 pixel blank gif to the browser 433 * 434 * @author Andreas Gohr <andi@splitbrain.org> 435 * @author Harry Fuecks <fuecks@gmail.com> 436 */ 437function sendGIF(){ 438 if(isset($_REQUEST['debug'])){ 439 header('Content-Type: text/plain'); 440 return; 441 } 442 $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7'); 443 header('Content-Type: image/gif'); 444 header('Content-Length: '.strlen($img)); 445 header('Connection: Close'); 446 print $img; 447 flush(); 448 // Browser should drop connection after this 449 // Thinks it's got the whole image 450} 451 452//Setup VIM: ex: et ts=4 enc=utf-8 : 453// No trailing PHP closing tag - no output please! 454// See Note at http://www.php.net/manual/en/language.basic-syntax.instruction-separation.php 455