1<?php 2/** 3 * DokuWiki indexer 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8if(!defined('DOKU_INC')) define('DOKU_INC',dirname(__FILE__).'/../../'); 9define('DOKU_DISABLE_GZIP_OUTPUT',1); 10require_once(DOKU_INC.'inc/init.php'); 11require_once(DOKU_INC.'inc/auth.php'); 12require_once(DOKU_INC.'inc/events.php'); 13session_write_close(); //close session 14if(!defined('NL')) define('NL',"\n"); 15 16// Version tag used to force rebuild on upgrade 17define('INDEXER_VERSION', 2); 18 19// keep running after browser closes connection 20@ignore_user_abort(true); 21 22// check if user abort worked, if yes send output early 23if(@ignore_user_abort() && !$conf['broken_iua']){ 24 sendGIF(); // send gif 25 $defer = false; 26}else{ 27 $defer = true; 28} 29 30$ID = cleanID($_REQUEST['id']); 31 32// Catch any possible output (e.g. errors) 33if(!$_REQUEST['debug']) ob_start(); 34 35// run one of the jobs 36$tmp = array(); 37$evt = new Doku_Event('INDEXER_TASKS_RUN', $tmp); 38if ($evt->advise_before()) { 39 runIndexer() or 40 metaUpdate() or 41 runSitemapper() or 42 runTrimRecentChanges() or 43 runTrimRecentChanges(true) or 44 $evt->advise_after(); 45} 46if($defer) sendGIF(); 47 48if(!$_REQUEST['debug']) ob_end_clean(); 49exit; 50 51// -------------------------------------------------------------------- 52 53/** 54 * Trims the recent changes cache (or imports the old changelog) as needed. 55 * 56 * @param media_changes If the media changelog shall be trimmed instead of 57 * the page changelog 58 * 59 * @author Ben Coburn <btcoburn@silicodon.net> 60 */ 61function runTrimRecentChanges($media_changes = false) { 62 global $conf; 63 64 $fn = ($media_changes ? $conf['media_changelog'] : $conf['changelog']); 65 66 // Trim the Recent Changes 67 // Trims the recent changes cache to the last $conf['changes_days'] recent 68 // changes or $conf['recent'] items, which ever is larger. 69 // The trimming is only done once a day. 70 if (@file_exists($fn) && 71 (@filemtime($fn.'.trimmed')+86400)<time() && 72 !@file_exists($fn.'_tmp')) { 73 @touch($fn.'.trimmed'); 74 io_lock($fn); 75 $lines = file($fn); 76 if (count($lines)<=$conf['recent']) { 77 // nothing to trim 78 io_unlock($fn); 79 return false; 80 } 81 82 io_saveFile($fn.'_tmp', ''); // presave tmp as 2nd lock 83 $trim_time = time() - $conf['recent_days']*86400; 84 $out_lines = array(); 85 86 for ($i=0; $i<count($lines); $i++) { 87 $log = parseChangelogLine($lines[$i]); 88 if ($log === false) continue; // discard junk 89 if ($log['date'] < $trim_time) { 90 $old_lines[$log['date'].".$i"] = $lines[$i]; // keep old lines for now (append .$i to prevent key collisions) 91 } else { 92 $out_lines[$log['date'].".$i"] = $lines[$i]; // definitely keep these lines 93 } 94 } 95 96 if (count($lines)==count($out_lines)) { 97 // nothing to trim 98 @unlink($fn.'_tmp'); 99 io_unlock($fn); 100 return false; 101 } 102 103 // sort the final result, it shouldn't be necessary, 104 // however the extra robustness in making the changelog cache self-correcting is worth it 105 ksort($out_lines); 106 $extra = $conf['recent'] - count($out_lines); // do we need extra lines do bring us up to minimum 107 if ($extra > 0) { 108 ksort($old_lines); 109 $out_lines = array_merge(array_slice($old_lines,-$extra),$out_lines); 110 } 111 112 // save trimmed changelog 113 io_saveFile($fn.'_tmp', implode('', $out_lines)); 114 @unlink($fn); 115 if (!rename($fn.'_tmp', $fn)) { 116 // rename failed so try another way... 117 io_unlock($fn); 118 io_saveFile($fn, implode('', $out_lines)); 119 @unlink($fn.'_tmp'); 120 } else { 121 io_unlock($fn); 122 } 123 return true; 124 } 125 126 // nothing done 127 return false; 128} 129 130/** 131 * Runs the indexer for the current page 132 * 133 * @author Andreas Gohr <andi@splitbrain.org> 134 */ 135function runIndexer(){ 136 global $ID; 137 global $conf; 138 print "runIndexer(): started".NL; 139 140 // Move index files (if needed) 141 // Uses the importoldindex plugin to upgrade the index automatically. 142 // FIXME: Remove this from runIndexer when it is no longer needed. 143 if (@file_exists($conf['cachedir'].'/page.idx') && 144 (!@file_exists($conf['indexdir'].'/page.idx') || 145 !filesize($conf['indexdir'].'/page.idx')) && 146 !@file_exists($conf['indexdir'].'/index_importing')) { 147 echo "trigger TEMPORARY_INDEX_UPGRADE_EVENT\n"; 148 $tmp = array(); // no event data 149 trigger_event('TEMPORARY_INDEX_UPGRADE_EVENT', $tmp); 150 } 151 152 if(!$ID) return false; 153 154 // check if indexing needed 155 $idxtag = metaFN($ID,'.indexed'); 156 if(@file_exists($idxtag)){ 157 if(io_readFile($idxtag) >= INDEXER_VERSION){ 158 $last = @filemtime($idxtag); 159 if($last > @filemtime(wikiFN($ID))){ 160 print "runIndexer(): index for $ID up to date".NL; 161 return false; 162 } 163 } 164 } 165 166 // try to aquire a lock 167 $lock = $conf['lockdir'].'/_indexer.lock'; 168 while(!@mkdir($lock,$conf['dmode'])){ 169 usleep(50); 170 if(time()-@filemtime($lock) > 60*5){ 171 // looks like a stale lock - remove it 172 @rmdir($lock); 173 print "runIndexer(): stale lock removed".NL; 174 }else{ 175 print "runIndexer(): indexer locked".NL; 176 return false; 177 } 178 } 179 if($conf['dperm']) chmod($lock, $conf['dperm']); 180 181 require_once(DOKU_INC.'inc/indexer.php'); 182 183 // upgrade to version 2 184 if (!@file_exists($conf['indexdir'].'/pageword.idx')) 185 idx_upgradePageWords(); 186 187 // do the work 188 idx_addPage($ID); 189 190 // we're finished - save and free lock 191 io_saveFile(metaFN($ID,'.indexed'),INDEXER_VERSION); 192 @rmdir($lock); 193 print "runIndexer(): finished".NL; 194 return true; 195} 196 197/** 198 * Will render the metadata for the page if not exists yet 199 * 200 * This makes sure pages which are created from outside DokuWiki will 201 * gain their data when viewed for the first time. 202 */ 203function metaUpdate(){ 204 global $ID; 205 print "metaUpdate(): started".NL; 206 207 if(!$ID) return false; 208 $file = metaFN($ID, '.meta'); 209 echo "meta file: $file".NL; 210 211 // rendering needed? 212 if (@file_exists($file)) return false; 213 if (!@file_exists(wikiFN($ID))) return false; 214 215 require_once(DOKU_INC.'inc/common.php'); 216 require_once(DOKU_INC.'inc/parserutils.php'); 217 global $conf; 218 219 220 // gather some additional info from changelog 221 $info = io_grep($conf['changelog'], 222 '/^(\d+)\t(\d+\.\d+\.\d+\.\d+)\t'.preg_quote($ID,'/').'\t([^\t]+)\t([^\t\n]+)/', 223 0,true); 224 225 $meta = array(); 226 if(!empty($info)){ 227 $meta['date']['created'] = $info[0][1]; 228 foreach($info as $item){ 229 if($item[4] != '*'){ 230 $meta['date']['modified'] = $item[1]; 231 if($item[3]){ 232 $meta['contributor'][$item[3]] = $item[3]; 233 } 234 } 235 } 236 } 237 238 $meta = p_render_metadata($ID, $meta); 239 io_saveFile($file, serialize($meta)); 240 241 echo "metaUpdate(): finished".NL; 242 return true; 243} 244 245/** 246 * Builds a Google Sitemap of all public pages known to the indexer 247 * 248 * The map is placed in the root directory named sitemap.xml.gz - This 249 * file needs to be writable! 250 * 251 * @author Andreas Gohr 252 * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html 253 */ 254function runSitemapper(){ 255 global $conf; 256 print "runSitemapper(): started".NL; 257 if(!$conf['sitemap']) return false; 258 259 if($conf['compression'] == 'bz2' || $conf['compression'] == 'gz'){ 260 $sitemap = 'sitemap.xml.gz'; 261 }else{ 262 $sitemap = 'sitemap.xml'; 263 } 264 print "runSitemapper(): using $sitemap".NL; 265 266 if(@file_exists(DOKU_INC.$sitemap)){ 267 if(!is_writable(DOKU_INC.$sitemap)) return false; 268 }else{ 269 if(!is_writable(DOKU_INC)) return false; 270 } 271 272 if(@filesize(DOKU_INC.$sitemap) && 273 @filemtime(DOKU_INC.$sitemap) > (time()-($conf['sitemap']*60*60*24))){ 274 print 'runSitemapper(): Sitemap up to date'.NL; 275 return false; 276 } 277 278 $pages = file($conf['indexdir'].'/page.idx'); 279 print 'runSitemapper(): creating sitemap using '.count($pages).' pages'.NL; 280 281 // build the sitemap 282 ob_start(); 283 print '<?xml version="1.0" encoding="UTF-8"?>'.NL; 284 print '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'.NL; 285 foreach($pages as $id){ 286 $id = trim($id); 287 $file = wikiFN($id); 288 289 //skip hidden, non existing and restricted files 290 if(isHiddenPage($id)) continue; 291 $date = @filemtime($file); 292 if(!$date) continue; 293 if(auth_aclcheck($id,'','') < AUTH_READ) continue; 294 295 print ' <url>'.NL; 296 print ' <loc>'.wl($id,'',true).'</loc>'.NL; 297 print ' <lastmod>'.date_iso8601($date).'</lastmod>'.NL; 298 print ' </url>'.NL; 299 } 300 print '</urlset>'.NL; 301 $data = ob_get_contents(); 302 ob_end_clean(); 303 304 //save the new sitemap 305 io_saveFile(DOKU_INC.$sitemap,$data); 306 307 //ping search engines... 308 $http = new DokuHTTPClient(); 309 $http->timeout = 8; 310 311 //ping google 312 print 'runSitemapper(): pinging google'.NL; 313 $url = 'http://www.google.com/webmasters/sitemaps/ping?sitemap='; 314 $url .= urlencode(DOKU_URL.$sitemap); 315 $resp = $http->get($url); 316 if($http->error) print 'runSitemapper(): '.$http->error.NL; 317 print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL; 318 319 //ping yahoo 320 print 'runSitemapper(): pinging yahoo'.NL; 321 $url = 'http://search.yahooapis.com/SiteExplorerService/V1/updateNotification?appid=dokuwiki&url='; 322 $url .= urlencode(DOKU_URL.$sitemap); 323 $resp = $http->get($url); 324 if($http->error) print 'runSitemapper(): '.$http->error.NL; 325 print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL; 326 327 //ping microsoft 328 print 'runSitemapper(): pinging microsoft'.NL; 329 $url = 'http://www.bing.com/webmaster/ping.aspx?siteMap='; 330 $url .= urlencode(DOKU_URL.$sitemap); 331 $resp = $http->get($url); 332 if($http->error) print 'runSitemapper(): '.$http->error.NL; 333 print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL; 334 335 print 'runSitemapper(): finished'.NL; 336 return true; 337} 338 339/** 340 * Formats a timestamp as ISO 8601 date 341 * 342 * @author <ungu at terong dot com> 343 * @link http://www.php.net/manual/en/function.date.php#54072 344 */ 345function date_iso8601($int_date) { 346 //$int_date: current date in UNIX timestamp 347 $date_mod = date('Y-m-d\TH:i:s', $int_date); 348 $pre_timezone = date('O', $int_date); 349 $time_zone = substr($pre_timezone, 0, 3).":".substr($pre_timezone, 3, 2); 350 $date_mod .= $time_zone; 351 return $date_mod; 352} 353 354/** 355 * Just send a 1x1 pixel blank gif to the browser 356 * 357 * @author Andreas Gohr <andi@splitbrain.org> 358 * @author Harry Fuecks <fuecks@gmail.com> 359 */ 360function sendGIF(){ 361 if($_REQUEST['debug']){ 362 header('Content-Type: text/plain'); 363 return; 364 } 365 $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7'); 366 header('Content-Type: image/gif'); 367 header('Content-Length: '.strlen($img)); 368 header('Connection: Close'); 369 print $img; 370 flush(); 371 // Browser should drop connection after this 372 // Thinks it's got the whole image 373} 374 375//Setup VIM: ex: et ts=4 enc=utf-8 : 376// No trailing PHP closing tag - no output please! 377// See Note at http://www.php.net/manual/en/language.basic-syntax.instruction-separation.php 378