1<?php 2/** 3 * DokuWiki indexer 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8if(!defined('DOKU_INC')) define('DOKU_INC',dirname(__FILE__).'/../../'); 9define('DOKU_DISABLE_GZIP_OUTPUT',1); 10require_once(DOKU_INC.'inc/init.php'); 11require_once(DOKU_INC.'inc/auth.php'); 12require_once(DOKU_INC.'inc/events.php'); 13session_write_close(); //close session 14if(!defined('NL')) define('NL',"\n"); 15 16// Version tag used to force rebuild on upgrade 17define('INDEXER_VERSION', 2); 18 19// keep running after browser closes connection 20@ignore_user_abort(true); 21 22// check if user abort worked, if yes send output early 23if(@ignore_user_abort() && !$conf['broken_iua']){ 24 sendGIF(); // send gif 25 $defer = false; 26}else{ 27 $defer = true; 28} 29 30$ID = cleanID($_REQUEST['id']); 31 32// Catch any possible output (e.g. errors) 33if(!$_REQUEST['debug']) ob_start(); 34 35// run one of the jobs 36$tmp = array(); 37$evt = new Doku_Event('INDEXER_TASKS_RUN', $tmp); 38if ($evt->advise_before()) { 39 runIndexer() or 40 metaUpdate() or 41 runSitemapper() or 42 runTrimRecentChanges() or 43 $evt->advise_after(); 44} 45if($defer) sendGIF(); 46 47if(!$_REQUEST['debug']) ob_end_clean(); 48exit; 49 50// -------------------------------------------------------------------- 51 52/** 53 * Trims the recent changes cache (or imports the old changelog) as needed. 54 * 55 * @author Ben Coburn <btcoburn@silicodon.net> 56 */ 57function runTrimRecentChanges() { 58 global $conf; 59 60 // Import old changelog (if needed) 61 // Uses the imporoldchangelog plugin to upgrade the changelog automaticaly. 62 // FIXME: Remove this from runTrimRecentChanges when it is no longer needed. 63 if (isset($conf['changelog_old']) && 64 @file_exists($conf['changelog_old']) && !@file_exists($conf['changelog']) && 65 !@file_exists($conf['changelog'].'_importing') && !@file_exists($conf['changelog'].'_tmp')) { 66 $tmp = array(); // no event data 67 trigger_event('TEMPORARY_CHANGELOG_UPGRADE_EVENT', $tmp); 68 return true; 69 } 70 71 // Trim the Recent Changes 72 // Trims the recent changes cache to the last $conf['changes_days'] recent 73 // changes or $conf['recent'] items, which ever is larger. 74 // The trimming is only done once a day. 75 if (@file_exists($conf['changelog']) && 76 (filectime($conf['changelog'])+86400)<time() && 77 !@file_exists($conf['changelog'].'_tmp')) { 78 io_lock($conf['changelog']); 79 $lines = file($conf['changelog']); 80 if (count($lines)<=$conf['recent']) { 81 // nothing to trim 82 io_unlock($conf['changelog']); 83 return false; 84 } 85 86 io_saveFile($conf['changelog'].'_tmp', ''); // presave tmp as 2nd lock 87 $trim_time = time() - $conf['recent_days']*86400; 88 $out_lines = array(); 89 90 for ($i=0; $i<count($lines); $i++) { 91 $log = parseChangelogLine($lines[$i]); 92 if ($log === false) continue; // discard junk 93 if ($log['date'] < $trim_time) { 94 $old_lines[$log['date'].".$i"] = $lines[$i]; // keep old lines for now (append .$i to prevent key collisions) 95 } else { 96 $out_lines[$log['date'].".$i"] = $lines[$i]; // definitely keep these lines 97 } 98 } 99 100 // sort the final result, it shouldn't be necessary, 101 // however the extra robustness in making the changelog cache self-correcting is worth it 102 ksort($out_lines); 103 $extra = $conf['recent'] - count($out_lines); // do we need extra lines do bring us up to minimum 104 if ($extra > 0) { 105 ksort($old_lines); 106 $out_lines = array_merge(array_slice($old_lines,-$extra),$out_lines); 107 } 108 109 // save trimmed changelog 110 io_saveFile($conf['changelog'].'_tmp', implode('', $out_lines)); 111 @unlink($conf['changelog']); 112 if (!rename($conf['changelog'].'_tmp', $conf['changelog'])) { 113 // rename failed so try another way... 114 io_unlock($conf['changelog']); 115 io_saveFile($conf['changelog'], implode('', $out_lines)); 116 @unlink($conf['changelog'].'_tmp'); 117 } else { 118 io_unlock($conf['changelog']); 119 } 120 return true; 121 } 122 123 // nothing done 124 return false; 125} 126 127/** 128 * Runs the indexer for the current page 129 * 130 * @author Andreas Gohr <andi@splitbrain.org> 131 */ 132function runIndexer(){ 133 global $ID; 134 global $conf; 135 print "runIndexer(): started".NL; 136 137 // Move index files (if needed) 138 // Uses the importoldindex plugin to upgrade the index automatically. 139 // FIXME: Remove this from runIndexer when it is no longer needed. 140 if (@file_exists($conf['cachedir'].'/page.idx') && 141 (!@file_exists($conf['indexdir'].'/page.idx') || 142 !filesize($conf['indexdir'].'/page.idx')) && 143 !@file_exists($conf['indexdir'].'/index_importing')) { 144 echo "trigger TEMPORARY_INDEX_UPGRADE_EVENT\n"; 145 $tmp = array(); // no event data 146 trigger_event('TEMPORARY_INDEX_UPGRADE_EVENT', $tmp); 147 } 148 149 if(!$ID) return false; 150 151 // check if indexing needed 152 $idxtag = metaFN($ID,'.indexed'); 153 if(@file_exists($idxtag)){ 154 if(io_readFile($idxtag) >= INDEXER_VERSION){ 155 $last = @filemtime($idxtag); 156 if($last > @filemtime(wikiFN($ID))){ 157 print "runIndexer(): index for $ID up to date".NL; 158 return false; 159 } 160 } 161 } 162 163 // try to aquire a lock 164 $lock = $conf['lockdir'].'/_indexer.lock'; 165 while(!@mkdir($lock,$conf['dmode'])){ 166 usleep(50); 167 if(time()-@filemtime($lock) > 60*5){ 168 // looks like a stale lock - remove it 169 @rmdir($lock); 170 print "runIndexer(): stale lock removed".NL; 171 }else{ 172 print "runIndexer(): indexer locked".NL; 173 return false; 174 } 175 } 176 if($conf['dperm']) chmod($lock, $conf['dperm']); 177 178 require_once(DOKU_INC.'inc/indexer.php'); 179 180 // upgrade to version 2 181 if (!@file_exists($conf['indexdir'].'/pageword.idx')) 182 idx_upgradePageWords(); 183 184 // do the work 185 idx_addPage($ID); 186 187 // we're finished - save and free lock 188 io_saveFile(metaFN($ID,'.indexed'),INDEXER_VERSION); 189 @rmdir($lock); 190 print "runIndexer(): finished".NL; 191 return true; 192} 193 194/** 195 * Will render the metadata for the page if not exists yet 196 * 197 * This makes sure pages which are created from outside DokuWiki will 198 * gain their data when viewed for the first time. 199 */ 200function metaUpdate(){ 201 global $ID; 202 print "metaUpdate(): started".NL; 203 204 if(!$ID) return false; 205 $file = metaFN($ID, '.meta'); 206 echo "meta file: $file".NL; 207 208 // rendering needed? 209 if (@file_exists($file)) return false; 210 if (!@file_exists(wikiFN($ID))) return false; 211 212 require_once(DOKU_INC.'inc/common.php'); 213 require_once(DOKU_INC.'inc/parserutils.php'); 214 global $conf; 215 216 217 // gather some additional info from changelog 218 $info = io_grep($conf['changelog'], 219 '/^(\d+)\t(\d+\.\d+\.\d+\.\d+)\t'.preg_quote($ID,'/').'\t([^\t]+)\t([^\t\n]+)/', 220 0,true); 221 222 $meta = array(); 223 if(!empty($info)){ 224 $meta['date']['created'] = $info[0][1]; 225 foreach($info as $item){ 226 if($item[4] != '*'){ 227 $meta['date']['modified'] = $item[1]; 228 if($item[3]){ 229 $meta['contributor'][$item[3]] = $item[3]; 230 } 231 } 232 } 233 } 234 235 $meta = p_render_metadata($ID, $meta); 236 io_saveFile($file, serialize($meta)); 237 238 echo "metaUpdate(): finished".NL; 239 return true; 240} 241 242/** 243 * Builds a Google Sitemap of all public pages known to the indexer 244 * 245 * The map is placed in the root directory named sitemap.xml.gz - This 246 * file needs to be writable! 247 * 248 * @author Andreas Gohr 249 * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html 250 */ 251function runSitemapper(){ 252 global $conf; 253 print "runSitemapper(): started".NL; 254 if(!$conf['sitemap']) return false; 255 256 if($conf['compression'] == 'bz2' || $conf['compression'] == 'gz'){ 257 $sitemap = 'sitemap.xml.gz'; 258 }else{ 259 $sitemap = 'sitemap.xml'; 260 } 261 print "runSitemapper(): using $sitemap".NL; 262 263 if(@file_exists(DOKU_INC.$sitemap)){ 264 if(!is_writable(DOKU_INC.$sitemap)) return false; 265 }else{ 266 if(!is_writable(DOKU_INC)) return false; 267 } 268 269 if(@filesize(DOKU_INC.$sitemap) && 270 @filemtime(DOKU_INC.$sitemap) > (time()-($conf['sitemap']*60*60*24))){ 271 print 'runSitemapper(): Sitemap up to date'.NL; 272 return false; 273 } 274 275 $pages = file($conf['indexdir'].'/page.idx'); 276 print 'runSitemapper(): creating sitemap using '.count($pages).' pages'.NL; 277 278 // build the sitemap 279 ob_start(); 280 print '<?xml version="1.0" encoding="UTF-8"?>'.NL; 281 print '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'.NL; 282 foreach($pages as $id){ 283 $id = trim($id); 284 $file = wikiFN($id); 285 286 //skip hidden, non existing and restricted files 287 if(isHiddenPage($id)) continue; 288 $date = @filemtime($file); 289 if(!$date) continue; 290 if(auth_aclcheck($id,'','') < AUTH_READ) continue; 291 292 print ' <url>'.NL; 293 print ' <loc>'.wl($id,'',true).'</loc>'.NL; 294 print ' <lastmod>'.date_iso8601($date).'</lastmod>'.NL; 295 print ' </url>'.NL; 296 } 297 print '</urlset>'.NL; 298 $data = ob_get_contents(); 299 ob_end_clean(); 300 301 //save the new sitemap 302 io_saveFile(DOKU_INC.$sitemap,$data); 303 304 //ping search engines... 305 $http = new DokuHTTPClient(); 306 $http->timeout = 8; 307 308 //ping google 309 print 'runSitemapper(): pinging google'.NL; 310 $url = 'http://www.google.com/webmasters/sitemaps/ping?sitemap='; 311 $url .= urlencode(DOKU_URL.$sitemap); 312 $resp = $http->get($url); 313 if($http->error) print 'runSitemapper(): '.$http->error.NL; 314 print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL; 315 316 //ping yahoo 317 print 'runSitemapper(): pinging yahoo'.NL; 318 $url = 'http://search.yahooapis.com/SiteExplorerService/V1/updateNotification?appid=dokuwiki&url='; 319 $url .= urlencode(DOKU_URL.$sitemap); 320 $resp = $http->get($url); 321 if($http->error) print 'runSitemapper(): '.$http->error.NL; 322 print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL; 323 324 //ping microsoft 325 print 'runSitemapper(): pinging microsoft'.NL; 326 $url = 'http://search.live.com/ping?sitemap='; 327 $url .= urlencode(DOKU_URL.$sitemap); 328 $resp = $http->get($url); 329 if($http->error) print 'runSitemapper(): '.$http->error.NL; 330 print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL; 331 332 print 'runSitemapper(): finished'.NL; 333 return true; 334} 335 336/** 337 * Formats a timestamp as ISO 8601 date 338 * 339 * @author <ungu at terong dot com> 340 * @link http://www.php.net/manual/en/function.date.php#54072 341 */ 342function date_iso8601($int_date) { 343 //$int_date: current date in UNIX timestamp 344 $date_mod = date('Y-m-d\TH:i:s', $int_date); 345 $pre_timezone = date('O', $int_date); 346 $time_zone = substr($pre_timezone, 0, 3).":".substr($pre_timezone, 3, 2); 347 $date_mod .= $time_zone; 348 return $date_mod; 349} 350 351/** 352 * Just send a 1x1 pixel blank gif to the browser 353 * 354 * @author Andreas Gohr <andi@splitbrain.org> 355 * @author Harry Fuecks <fuecks@gmail.com> 356 */ 357function sendGIF(){ 358 if($_REQUEST['debug']){ 359 header('Content-Type: text/plain'); 360 return; 361 } 362 $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7'); 363 header('Content-Type: image/gif'); 364 header('Content-Length: '.strlen($img)); 365 header('Connection: Close'); 366 print $img; 367 flush(); 368 // Browser should drop connection after this 369 // Thinks it's got the whole image 370} 371 372//Setup VIM: ex: et ts=4 enc=utf-8 : 373// No trailing PHP closing tag - no output please! 374// See Note at http://www.php.net/manual/en/language.basic-syntax.instruction-separation.php 375