1<?php 2/** 3 * DokuWiki indexer 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8if(!defined('DOKU_INC')) define('DOKU_INC',dirname(__FILE__).'/../../'); 9define('DOKU_DISABLE_GZIP_OUTPUT',1); 10require_once(DOKU_INC.'inc/init.php'); 11session_write_close(); //close session 12if(!defined('NL')) define('NL',"\n"); 13 14// Version tag used to force rebuild on upgrade 15define('INDEXER_VERSION', 2); 16 17// keep running after browser closes connection 18@ignore_user_abort(true); 19 20// check if user abort worked, if yes send output early 21$defer = !@ignore_user_abort() || $conf['broken_iua']; 22if(!$defer){ 23 sendGIF(); // send gif 24} 25 26$ID = cleanID($_REQUEST['id']); 27 28// Catch any possible output (e.g. errors) 29if(!isset($_REQUEST['debug'])) ob_start(); 30 31// run one of the jobs 32$tmp = array(); // No event data 33$evt = new Doku_Event('INDEXER_TASKS_RUN', $tmp); 34if ($evt->advise_before()) { 35 runIndexer() or 36 metaUpdate() or 37 runSitemapper() or 38 sendDigest() or 39 runTrimRecentChanges() or 40 runTrimRecentChanges(true) or 41 $evt->advise_after(); 42} 43if($defer) sendGIF(); 44 45if(!isset($_REQUEST['debug'])) ob_end_clean(); 46exit; 47 48// -------------------------------------------------------------------- 49 50/** 51 * Trims the recent changes cache (or imports the old changelog) as needed. 52 * 53 * @param media_changes If the media changelog shall be trimmed instead of 54 * the page changelog 55 * 56 * @author Ben Coburn <btcoburn@silicodon.net> 57 */ 58function runTrimRecentChanges($media_changes = false) { 59 global $conf; 60 61 $fn = ($media_changes ? $conf['media_changelog'] : $conf['changelog']); 62 63 // Trim the Recent Changes 64 // Trims the recent changes cache to the last $conf['changes_days'] recent 65 // changes or $conf['recent'] items, which ever is larger. 66 // The trimming is only done once a day. 67 if (@file_exists($fn) && 68 (@filemtime($fn.'.trimmed')+86400)<time() && 69 !@file_exists($fn.'_tmp')) { 70 @touch($fn.'.trimmed'); 71 io_lock($fn); 72 $lines = file($fn); 73 if (count($lines)<=$conf['recent']) { 74 // nothing to trim 75 io_unlock($fn); 76 return false; 77 } 78 79 io_saveFile($fn.'_tmp', ''); // presave tmp as 2nd lock 80 $trim_time = time() - $conf['recent_days']*86400; 81 $out_lines = array(); 82 83 for ($i=0; $i<count($lines); $i++) { 84 $log = parseChangelogLine($lines[$i]); 85 if ($log === false) continue; // discard junk 86 if ($log['date'] < $trim_time) { 87 $old_lines[$log['date'].".$i"] = $lines[$i]; // keep old lines for now (append .$i to prevent key collisions) 88 } else { 89 $out_lines[$log['date'].".$i"] = $lines[$i]; // definitely keep these lines 90 } 91 } 92 93 if (count($lines)==count($out_lines)) { 94 // nothing to trim 95 @unlink($fn.'_tmp'); 96 io_unlock($fn); 97 return false; 98 } 99 100 // sort the final result, it shouldn't be necessary, 101 // however the extra robustness in making the changelog cache self-correcting is worth it 102 ksort($out_lines); 103 $extra = $conf['recent'] - count($out_lines); // do we need extra lines do bring us up to minimum 104 if ($extra > 0) { 105 ksort($old_lines); 106 $out_lines = array_merge(array_slice($old_lines,-$extra),$out_lines); 107 } 108 109 // save trimmed changelog 110 io_saveFile($fn.'_tmp', implode('', $out_lines)); 111 @unlink($fn); 112 if (!rename($fn.'_tmp', $fn)) { 113 // rename failed so try another way... 114 io_unlock($fn); 115 io_saveFile($fn, implode('', $out_lines)); 116 @unlink($fn.'_tmp'); 117 } else { 118 io_unlock($fn); 119 } 120 return true; 121 } 122 123 // nothing done 124 return false; 125} 126 127/** 128 * Runs the indexer for the current page 129 * 130 * @author Andreas Gohr <andi@splitbrain.org> 131 */ 132function runIndexer(){ 133 global $ID; 134 global $conf; 135 print "runIndexer(): started".NL; 136 137 // Move index files (if needed) 138 // Uses the importoldindex plugin to upgrade the index automatically. 139 // FIXME: Remove this from runIndexer when it is no longer needed. 140 if (@file_exists($conf['cachedir'].'/page.idx') && 141 (!@file_exists($conf['indexdir'].'/page.idx') || 142 !filesize($conf['indexdir'].'/page.idx')) && 143 !@file_exists($conf['indexdir'].'/index_importing')) { 144 echo "trigger TEMPORARY_INDEX_UPGRADE_EVENT\n"; 145 $tmp = array(); // no event data 146 trigger_event('TEMPORARY_INDEX_UPGRADE_EVENT', $tmp); 147 } 148 149 if(!$ID) return false; 150 151 // check if indexing needed 152 $idxtag = metaFN($ID,'.indexed'); 153 if(@file_exists($idxtag)){ 154 if(io_readFile($idxtag) >= INDEXER_VERSION){ 155 $last = @filemtime($idxtag); 156 if($last > @filemtime(wikiFN($ID))){ 157 print "runIndexer(): index for $ID up to date".NL; 158 return false; 159 } 160 } 161 } 162 163 // try to aquire a lock 164 $lock = $conf['lockdir'].'/_indexer.lock'; 165 while(!@mkdir($lock,$conf['dmode'])){ 166 usleep(50); 167 if(time()-@filemtime($lock) > 60*5){ 168 // looks like a stale lock - remove it 169 @rmdir($lock); 170 print "runIndexer(): stale lock removed".NL; 171 }else{ 172 print "runIndexer(): indexer locked".NL; 173 return false; 174 } 175 } 176 if($conf['dperm']) chmod($lock, $conf['dperm']); 177 178 // upgrade to version 2 179 if (!@file_exists($conf['indexdir'].'/pageword.idx')) 180 idx_upgradePageWords(); 181 182 // do the work 183 idx_addPage($ID); 184 185 // we're finished - save and free lock 186 io_saveFile(metaFN($ID,'.indexed'),INDEXER_VERSION); 187 @rmdir($lock); 188 print "runIndexer(): finished".NL; 189 return true; 190} 191 192/** 193 * Will render the metadata for the page if not exists yet 194 * 195 * This makes sure pages which are created from outside DokuWiki will 196 * gain their data when viewed for the first time. 197 */ 198function metaUpdate(){ 199 global $ID; 200 print "metaUpdate(): started".NL; 201 202 if(!$ID) return false; 203 $file = metaFN($ID, '.meta'); 204 echo "meta file: $file".NL; 205 206 // rendering needed? 207 if (@file_exists($file)) return false; 208 if (!@file_exists(wikiFN($ID))) return false; 209 210 global $conf; 211 212 // gather some additional info from changelog 213 $info = io_grep($conf['changelog'], 214 '/^(\d+)\t(\d+\.\d+\.\d+\.\d+)\t'.preg_quote($ID,'/').'\t([^\t]+)\t([^\t\n]+)/', 215 0,true); 216 217 $meta = array(); 218 if(!empty($info)){ 219 $meta['date']['created'] = $info[0][1]; 220 foreach($info as $item){ 221 if($item[4] != '*'){ 222 $meta['date']['modified'] = $item[1]; 223 if($item[3]){ 224 $meta['contributor'][$item[3]] = $item[3]; 225 } 226 } 227 } 228 } 229 230 $meta = p_render_metadata($ID, $meta); 231 io_saveFile($file, serialize($meta)); 232 233 echo "metaUpdate(): finished".NL; 234 return true; 235} 236 237/** 238 * Builds a Google Sitemap of all public pages known to the indexer 239 * 240 * The map is placed in the root directory named sitemap.xml.gz - This 241 * file needs to be writable! 242 * 243 * @author Andreas Gohr 244 * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html 245 */ 246function runSitemapper(){ 247 global $conf; 248 print "runSitemapper(): started".NL; 249 if(!$conf['sitemap']) return false; 250 251 if($conf['compression'] == 'bz2' || $conf['compression'] == 'gz'){ 252 $sitemap = 'sitemap.xml.gz'; 253 }else{ 254 $sitemap = 'sitemap.xml'; 255 } 256 print "runSitemapper(): using $sitemap".NL; 257 258 if(@file_exists(DOKU_INC.$sitemap)){ 259 if(!is_writable(DOKU_INC.$sitemap)) return false; 260 }else{ 261 if(!is_writable(DOKU_INC)) return false; 262 } 263 264 if(@filesize(DOKU_INC.$sitemap) && 265 @filemtime(DOKU_INC.$sitemap) > (time()-($conf['sitemap']*60*60*24))){ 266 print 'runSitemapper(): Sitemap up to date'.NL; 267 return false; 268 } 269 270 $pages = file($conf['indexdir'].'/page.idx'); 271 print 'runSitemapper(): creating sitemap using '.count($pages).' pages'.NL; 272 273 // build the sitemap 274 ob_start(); 275 print '<?xml version="1.0" encoding="UTF-8"?>'.NL; 276 print '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'.NL; 277 foreach($pages as $id){ 278 $id = trim($id); 279 $file = wikiFN($id); 280 281 //skip hidden, non existing and restricted files 282 if(isHiddenPage($id)) continue; 283 $date = @filemtime($file); 284 if(!$date) continue; 285 if(auth_aclcheck($id,'','') < AUTH_READ) continue; 286 287 print ' <url>'.NL; 288 print ' <loc>'.wl($id,'',true).'</loc>'.NL; 289 print ' <lastmod>'.date_iso8601($date).'</lastmod>'.NL; 290 print ' </url>'.NL; 291 } 292 print '</urlset>'.NL; 293 $data = ob_get_contents(); 294 ob_end_clean(); 295 296 //save the new sitemap 297 io_saveFile(DOKU_INC.$sitemap,$data); 298 299 //ping search engines... 300 $http = new DokuHTTPClient(); 301 $http->timeout = 8; 302 303 //ping google 304 print 'runSitemapper(): pinging google'.NL; 305 $url = 'http://www.google.com/webmasters/sitemaps/ping?sitemap='; 306 $url .= urlencode(DOKU_URL.$sitemap); 307 $resp = $http->get($url); 308 if($http->error) print 'runSitemapper(): '.$http->error.NL; 309 print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL; 310 311 //ping yahoo 312 print 'runSitemapper(): pinging yahoo'.NL; 313 $url = 'http://search.yahooapis.com/SiteExplorerService/V1/updateNotification?appid=dokuwiki&url='; 314 $url .= urlencode(DOKU_URL.$sitemap); 315 $resp = $http->get($url); 316 if($http->error) print 'runSitemapper(): '.$http->error.NL; 317 print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL; 318 319 //ping microsoft 320 print 'runSitemapper(): pinging microsoft'.NL; 321 $url = 'http://www.bing.com/webmaster/ping.aspx?siteMap='; 322 $url .= urlencode(DOKU_URL.$sitemap); 323 $resp = $http->get($url); 324 if($http->error) print 'runSitemapper(): '.$http->error.NL; 325 print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL; 326 327 print 'runSitemapper(): finished'.NL; 328 return true; 329} 330 331/** 332 * Send digest and list mails for all subscriptions which are in effect for the 333 * current page 334 * 335 * @author Adrian Lang <lang@cosmocode.de> 336 */ 337function sendDigest() { 338 echo 'sendDigest(): start'.NL; 339 global $ID; 340 global $conf; 341 if (!$conf['subscribers']) { 342 return; 343 } 344 $subscriptions = subscription_find($ID, array('style' => '(digest|list)', 345 'escaped' => true)); 346 global $auth; 347 global $lang; 348 global $conf; 349 global $USERINFO; 350 351 // remember current user info 352 $olduinfo = $USERINFO; 353 $olduser = $_SERVER['REMOTE_USER']; 354 355 foreach($subscriptions as $id => $users) { 356 if (!subscription_lock($id)) { 357 continue; 358 } 359 foreach($users as $data) { 360 list($user, $style, $lastupdate) = $data; 361 $lastupdate = (int) $lastupdate; 362 if ($lastupdate + $conf['subscribe_time'] > time()) { 363 // Less than the configured time period passed since last 364 // update. 365 continue; 366 } 367 368 // Work as the user to make sure ACLs apply correctly 369 $USERINFO = $auth->getUserData($user); 370 $_SERVER['REMOTE_USER'] = $user; 371 if ($USERINFO === false) { 372 continue; 373 } 374 375 if (substr($id, -1, 1) === ':') { 376 // The subscription target is a namespace 377 $changes = getRecentsSince($lastupdate, null, getNS($id)); 378 if (count($changes) === 0) { 379 continue; 380 } 381 if ($style === 'digest') { 382 foreach($changes as $change) { 383 subscription_send_digest($USERINFO['mail'], $change, 384 $lastupdate); 385 } 386 } elseif ($style === 'list') { 387 subscription_send_list($USERINFO['mail'], $changes, $id); 388 } 389 // TODO: Handle duplicate subscriptions. 390 } else { 391 if(auth_quickaclcheck($id) < AUTH_READ) continue; 392 393 $meta = p_get_metadata($id); 394 $rev = $meta['last_change']['date']; 395 if ($rev < $lastupdate) { 396 // There is no new revision. 397 continue; 398 } 399 subscription_send_digest($USERINFO['mail'], $meta['last_change'], 400 $lastupdate); 401 } 402 // Update notification time. 403 subscription_set($user, $id, $style, time(), true); 404 } 405 subscription_unlock($id); 406 } 407 408 // restore current user info 409 $USERINFO = $olduinfo; 410 $_SERVER['REMOTE_USER'] = $olduser; 411} 412 413/** 414 * Formats a timestamp as ISO 8601 date 415 * 416 * @author <ungu at terong dot com> 417 * @link http://www.php.net/manual/en/function.date.php#54072 418 */ 419function date_iso8601($int_date) { 420 //$int_date: current date in UNIX timestamp 421 $date_mod = date('Y-m-d\TH:i:s', $int_date); 422 $pre_timezone = date('O', $int_date); 423 $time_zone = substr($pre_timezone, 0, 3).":".substr($pre_timezone, 3, 2); 424 $date_mod .= $time_zone; 425 return $date_mod; 426} 427 428/** 429 * Just send a 1x1 pixel blank gif to the browser 430 * 431 * @author Andreas Gohr <andi@splitbrain.org> 432 * @author Harry Fuecks <fuecks@gmail.com> 433 */ 434function sendGIF(){ 435 if(isset($_REQUEST['debug'])){ 436 header('Content-Type: text/plain'); 437 return; 438 } 439 $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7'); 440 header('Content-Type: image/gif'); 441 header('Content-Length: '.strlen($img)); 442 header('Connection: Close'); 443 print $img; 444 flush(); 445 // Browser should drop connection after this 446 // Thinks it's got the whole image 447} 448 449//Setup VIM: ex: et ts=4 enc=utf-8 : 450// No trailing PHP closing tag - no output please! 451// See Note at http://www.php.net/manual/en/language.basic-syntax.instruction-separation.php 452