1<?php 2/** 3 * DokuWiki indexer 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8if(!defined('DOKU_INC')) define('DOKU_INC',dirname(__FILE__).'/../../'); 9define('DOKU_DISABLE_GZIP_OUTPUT',1); 10require_once(DOKU_INC.'inc/init.php'); 11require_once(DOKU_INC.'inc/auth.php'); 12require_once(DOKU_INC.'inc/events.php'); 13session_write_close(); //close session 14if(!defined('NL')) define('NL',"\n"); 15 16// Version tag used to force rebuild on upgrade 17define('INDEXER_VERSION', 2); 18 19// keep running after browser closes connection 20@ignore_user_abort(true); 21 22// check if user abort worked, if yes send output early 23if(@ignore_user_abort() && !$conf['broken_iua']){ 24 sendGIF(); // send gif 25 $defer = false; 26}else{ 27 $defer = true; 28} 29 30$ID = cleanID($_REQUEST['id']); 31 32// Catch any possible output (e.g. errors) 33if(!$_REQUEST['debug']) ob_start(); 34 35// run one of the jobs 36$tmp = array(); 37$evt = new Doku_Event('INDEXER_TASKS_RUN', $tmp); 38if ($evt->advise_before()) { 39 runIndexer() or 40 metaUpdate() or 41 runSitemapper() or 42 runTrimRecentChanges() or 43 runTrimRecentChanges(true) or 44 $evt->advise_after(); 45} 46if($defer) sendGIF(); 47 48if(!$_REQUEST['debug']) ob_end_clean(); 49exit; 50 51// -------------------------------------------------------------------- 52 53/** 54 * Trims the recent changes cache (or imports the old changelog) as needed. 55 * 56 * @param media_changes If the media changelog shall be trimmed instead of 57 * the page changelog 58 * 59 * @author Ben Coburn <btcoburn@silicodon.net> 60 */ 61function runTrimRecentChanges($media_changes = false) { 62 global $conf; 63 64 // Import old changelog (if needed) 65 // Uses the imporoldchangelog plugin to upgrade the changelog automaticaly. 66 // FIXME: Remove this from runTrimRecentChanges when it is no longer needed. 67 if (!$media_changes && isset($conf['changelog_old']) && 68 @file_exists($conf['changelog_old']) && !@file_exists($conf['changelog']) && 69 !@file_exists($conf['changelog'].'_importing') && !@file_exists($conf['changelog'].'_tmp')) { 70 $tmp = array(); // no event data 71 trigger_event('TEMPORARY_CHANGELOG_UPGRADE_EVENT', $tmp); 72 return true; 73 } 74 75 $fn = ($media_changes ? $conf['media_changelog'] : $conf['changelog']); 76 77 // Trim the Recent Changes 78 // Trims the recent changes cache to the last $conf['changes_days'] recent 79 // changes or $conf['recent'] items, which ever is larger. 80 // The trimming is only done once a day. 81 if (@file_exists($fn) && 82 (filectime($fn)+86400)<time() && 83 !@file_exists($fn.'_tmp')) { 84 io_lock($fn); 85 $lines = file($fn); 86 if (count($lines)<=$conf['recent']) { 87 // nothing to trim 88 io_unlock($fn); 89 return false; 90 } 91 92 io_saveFile($fn.'_tmp', ''); // presave tmp as 2nd lock 93 $trim_time = time() - $conf['recent_days']*86400; 94 $out_lines = array(); 95 96 for ($i=0; $i<count($lines); $i++) { 97 $log = parseChangelogLine($lines[$i]); 98 if ($log === false) continue; // discard junk 99 if ($log['date'] < $trim_time) { 100 $old_lines[$log['date'].".$i"] = $lines[$i]; // keep old lines for now (append .$i to prevent key collisions) 101 } else { 102 $out_lines[$log['date'].".$i"] = $lines[$i]; // definitely keep these lines 103 } 104 } 105 106 // sort the final result, it shouldn't be necessary, 107 // however the extra robustness in making the changelog cache self-correcting is worth it 108 ksort($out_lines); 109 $extra = $conf['recent'] - count($out_lines); // do we need extra lines do bring us up to minimum 110 if ($extra > 0) { 111 ksort($old_lines); 112 $out_lines = array_merge(array_slice($old_lines,-$extra),$out_lines); 113 } 114 115 // save trimmed changelog 116 io_saveFile($fn.'_tmp', implode('', $out_lines)); 117 @unlink($fn); 118 if (!rename($fn.'_tmp', $fn)) { 119 // rename failed so try another way... 120 io_unlock($fn); 121 io_saveFile($fn, implode('', $out_lines)); 122 @unlink($fn.'_tmp'); 123 } else { 124 io_unlock($fn); 125 } 126 return true; 127 } 128 129 // nothing done 130 return false; 131} 132 133/** 134 * Runs the indexer for the current page 135 * 136 * @author Andreas Gohr <andi@splitbrain.org> 137 */ 138function runIndexer(){ 139 global $ID; 140 global $conf; 141 print "runIndexer(): started".NL; 142 143 // Move index files (if needed) 144 // Uses the importoldindex plugin to upgrade the index automatically. 145 // FIXME: Remove this from runIndexer when it is no longer needed. 146 if (@file_exists($conf['cachedir'].'/page.idx') && 147 (!@file_exists($conf['indexdir'].'/page.idx') || 148 !filesize($conf['indexdir'].'/page.idx')) && 149 !@file_exists($conf['indexdir'].'/index_importing')) { 150 echo "trigger TEMPORARY_INDEX_UPGRADE_EVENT\n"; 151 $tmp = array(); // no event data 152 trigger_event('TEMPORARY_INDEX_UPGRADE_EVENT', $tmp); 153 } 154 155 if(!$ID) return false; 156 157 // check if indexing needed 158 $idxtag = metaFN($ID,'.indexed'); 159 if(@file_exists($idxtag)){ 160 if(io_readFile($idxtag) >= INDEXER_VERSION){ 161 $last = @filemtime($idxtag); 162 if($last > @filemtime(wikiFN($ID))){ 163 print "runIndexer(): index for $ID up to date".NL; 164 return false; 165 } 166 } 167 } 168 169 // try to aquire a lock 170 $lock = $conf['lockdir'].'/_indexer.lock'; 171 while(!@mkdir($lock,$conf['dmode'])){ 172 usleep(50); 173 if(time()-@filemtime($lock) > 60*5){ 174 // looks like a stale lock - remove it 175 @rmdir($lock); 176 print "runIndexer(): stale lock removed".NL; 177 }else{ 178 print "runIndexer(): indexer locked".NL; 179 return false; 180 } 181 } 182 if($conf['dperm']) chmod($lock, $conf['dperm']); 183 184 require_once(DOKU_INC.'inc/indexer.php'); 185 186 // upgrade to version 2 187 if (!@file_exists($conf['indexdir'].'/pageword.idx')) 188 idx_upgradePageWords(); 189 190 // do the work 191 idx_addPage($ID); 192 193 // we're finished - save and free lock 194 io_saveFile(metaFN($ID,'.indexed'),INDEXER_VERSION); 195 @rmdir($lock); 196 print "runIndexer(): finished".NL; 197 return true; 198} 199 200/** 201 * Will render the metadata for the page if not exists yet 202 * 203 * This makes sure pages which are created from outside DokuWiki will 204 * gain their data when viewed for the first time. 205 */ 206function metaUpdate(){ 207 global $ID; 208 print "metaUpdate(): started".NL; 209 210 if(!$ID) return false; 211 $file = metaFN($ID, '.meta'); 212 echo "meta file: $file".NL; 213 214 // rendering needed? 215 if (@file_exists($file)) return false; 216 if (!@file_exists(wikiFN($ID))) return false; 217 218 require_once(DOKU_INC.'inc/common.php'); 219 require_once(DOKU_INC.'inc/parserutils.php'); 220 global $conf; 221 222 223 // gather some additional info from changelog 224 $info = io_grep($conf['changelog'], 225 '/^(\d+)\t(\d+\.\d+\.\d+\.\d+)\t'.preg_quote($ID,'/').'\t([^\t]+)\t([^\t\n]+)/', 226 0,true); 227 228 $meta = array(); 229 if(!empty($info)){ 230 $meta['date']['created'] = $info[0][1]; 231 foreach($info as $item){ 232 if($item[4] != '*'){ 233 $meta['date']['modified'] = $item[1]; 234 if($item[3]){ 235 $meta['contributor'][$item[3]] = $item[3]; 236 } 237 } 238 } 239 } 240 241 $meta = p_render_metadata($ID, $meta); 242 io_saveFile($file, serialize($meta)); 243 244 echo "metaUpdate(): finished".NL; 245 return true; 246} 247 248/** 249 * Builds a Google Sitemap of all public pages known to the indexer 250 * 251 * The map is placed in the root directory named sitemap.xml.gz - This 252 * file needs to be writable! 253 * 254 * @author Andreas Gohr 255 * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html 256 */ 257function runSitemapper(){ 258 global $conf; 259 print "runSitemapper(): started".NL; 260 if(!$conf['sitemap']) return false; 261 262 if($conf['compression'] == 'bz2' || $conf['compression'] == 'gz'){ 263 $sitemap = 'sitemap.xml.gz'; 264 }else{ 265 $sitemap = 'sitemap.xml'; 266 } 267 print "runSitemapper(): using $sitemap".NL; 268 269 if(@file_exists(DOKU_INC.$sitemap)){ 270 if(!is_writable(DOKU_INC.$sitemap)) return false; 271 }else{ 272 if(!is_writable(DOKU_INC)) return false; 273 } 274 275 if(@filesize(DOKU_INC.$sitemap) && 276 @filemtime(DOKU_INC.$sitemap) > (time()-($conf['sitemap']*60*60*24))){ 277 print 'runSitemapper(): Sitemap up to date'.NL; 278 return false; 279 } 280 281 $pages = file($conf['indexdir'].'/page.idx'); 282 print 'runSitemapper(): creating sitemap using '.count($pages).' pages'.NL; 283 284 // build the sitemap 285 ob_start(); 286 print '<?xml version="1.0" encoding="UTF-8"?>'.NL; 287 print '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'.NL; 288 foreach($pages as $id){ 289 $id = trim($id); 290 $file = wikiFN($id); 291 292 //skip hidden, non existing and restricted files 293 if(isHiddenPage($id)) continue; 294 $date = @filemtime($file); 295 if(!$date) continue; 296 if(auth_aclcheck($id,'','') < AUTH_READ) continue; 297 298 print ' <url>'.NL; 299 print ' <loc>'.wl($id,'',true).'</loc>'.NL; 300 print ' <lastmod>'.date_iso8601($date).'</lastmod>'.NL; 301 print ' </url>'.NL; 302 } 303 print '</urlset>'.NL; 304 $data = ob_get_contents(); 305 ob_end_clean(); 306 307 //save the new sitemap 308 io_saveFile(DOKU_INC.$sitemap,$data); 309 310 //ping search engines... 311 $http = new DokuHTTPClient(); 312 $http->timeout = 8; 313 314 //ping google 315 print 'runSitemapper(): pinging google'.NL; 316 $url = 'http://www.google.com/webmasters/sitemaps/ping?sitemap='; 317 $url .= urlencode(DOKU_URL.$sitemap); 318 $resp = $http->get($url); 319 if($http->error) print 'runSitemapper(): '.$http->error.NL; 320 print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL; 321 322 //ping yahoo 323 print 'runSitemapper(): pinging yahoo'.NL; 324 $url = 'http://search.yahooapis.com/SiteExplorerService/V1/updateNotification?appid=dokuwiki&url='; 325 $url .= urlencode(DOKU_URL.$sitemap); 326 $resp = $http->get($url); 327 if($http->error) print 'runSitemapper(): '.$http->error.NL; 328 print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL; 329 330 //ping microsoft 331 print 'runSitemapper(): pinging microsoft'.NL; 332 $url = 'http://webmaster.live.com/webmaster/ping.aspx?sitemap='; 333 $url .= urlencode(DOKU_URL.$sitemap); 334 $resp = $http->get($url); 335 if($http->error) print 'runSitemapper(): '.$http->error.NL; 336 print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL; 337 338 print 'runSitemapper(): finished'.NL; 339 return true; 340} 341 342/** 343 * Formats a timestamp as ISO 8601 date 344 * 345 * @author <ungu at terong dot com> 346 * @link http://www.php.net/manual/en/function.date.php#54072 347 */ 348function date_iso8601($int_date) { 349 //$int_date: current date in UNIX timestamp 350 $date_mod = date('Y-m-d\TH:i:s', $int_date); 351 $pre_timezone = date('O', $int_date); 352 $time_zone = substr($pre_timezone, 0, 3).":".substr($pre_timezone, 3, 2); 353 $date_mod .= $time_zone; 354 return $date_mod; 355} 356 357/** 358 * Just send a 1x1 pixel blank gif to the browser 359 * 360 * @author Andreas Gohr <andi@splitbrain.org> 361 * @author Harry Fuecks <fuecks@gmail.com> 362 */ 363function sendGIF(){ 364 if($_REQUEST['debug']){ 365 header('Content-Type: text/plain'); 366 return; 367 } 368 $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7'); 369 header('Content-Type: image/gif'); 370 header('Content-Length: '.strlen($img)); 371 header('Connection: Close'); 372 print $img; 373 flush(); 374 // Browser should drop connection after this 375 // Thinks it's got the whole image 376} 377 378//Setup VIM: ex: et ts=4 enc=utf-8 : 379// No trailing PHP closing tag - no output please! 380// See Note at http://www.php.net/manual/en/language.basic-syntax.instruction-separation.php 381