1<?php 2/** 3 * DokuWiki indexer 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8if(!defined('DOKU_INC')) define('DOKU_INC',dirname(__FILE__).'/../../'); 9define('DOKU_DISABLE_GZIP_OUTPUT',1); 10require_once(DOKU_INC.'inc/init.php'); 11require_once(DOKU_INC.'inc/auth.php'); 12require_once(DOKU_INC.'inc/events.php'); 13session_write_close(); //close session 14if(!defined('NL')) define('NL',"\n"); 15 16// Version tag used to force rebuild on upgrade 17define('INDEXER_VERSION', 2); 18 19// keep running after browser closes connection 20@ignore_user_abort(true); 21 22// check if user abort worked, if yes send output early 23if(@ignore_user_abort() && !$conf['broken_iua']){ 24 sendGIF(); // send gif 25 $defer = false; 26}else{ 27 $defer = true; 28} 29 30// Catch any possible output (e.g. errors) 31if(!$_REQUEST['debug']) ob_start(); 32 33// run one of the jobs 34runIndexer() or metaUpdate() or runSitemapper() or runTrimRecentChanges(); 35if($defer) sendGIF(); 36 37if(!$_REQUEST['debug']) ob_end_clean(); 38exit; 39 40// -------------------------------------------------------------------- 41 42/** 43 * Trims the recent changes cache (or imports the old changelog) as needed. 44 * 45 * @author Ben Coburn <btcoburn@silicodon.net> 46 */ 47function runTrimRecentChanges() { 48 global $conf; 49 50 // Import old changelog (if needed) 51 // Uses the imporoldchangelog plugin to upgrade the changelog automaticaly. 52 // FIXME: Remove this from runTrimRecentChanges when it is no longer needed. 53 if (isset($conf['changelog_old']) && 54 @file_exists($conf['changelog_old']) && !@file_exists($conf['changelog']) && 55 !@file_exists($conf['changelog'].'_importing') && !@file_exists($conf['changelog'].'_tmp')) { 56 $tmp = array(); // no event data 57 trigger_event('TEMPORARY_CHANGELOG_UPGRADE_EVENT', $tmp); 58 return true; 59 } 60 61 // Trim the Recent Changes 62 // Trims the recent changes cache to the last $conf['changes_days'] recent 63 // changes or $conf['recent'] items, which ever is larger. 64 // The trimming is only done once a day. 65 if (@file_exists($conf['changelog']) && 66 (filectime($conf['changelog'])+86400)<time() && 67 !@file_exists($conf['changelog'].'_tmp')) { 68 io_lock($conf['changelog']); 69 $lines = file($conf['changelog']); 70 if (count($lines)<$conf['recent']) { 71 // nothing to trim 72 io_unlock($conf['changelog']); 73 return true; 74 } 75 76 io_saveFile($conf['changelog'].'_tmp', ''); // presave tmp as 2nd lock 77 $trim_time = time() - $conf['recent_days']*86400; 78 $out_lines = array(); 79 80 for ($i=0; $i<count($lines); $i++) { 81 $log = parseChangelogLine($lines[$i]); 82 if ($log === false) continue; // discard junk 83 if ($log['date'] < $trim_time) { 84 $old_lines[$log['date'].".$i"] = $lines[$i]; // keep old lines for now (append .$i to prevent key collisions) 85 } else { 86 $out_lines[$log['date'].".$i"] = $lines[$i]; // definitely keep these lines 87 } 88 } 89 90 // sort the final result, it shouldn't be necessary, 91 // however the extra robustness in making the changelog cache self-correcting is worth it 92 ksort($out_lines); 93 $extra = $conf['recent'] - count($out_lines); // do we need extra lines do bring us up to minimum 94 if ($extra > 0) { 95 ksort($old_lines); 96 $out_lines = array_merge(array_slice($old_lines,-$extra),$out_lines); 97 } 98 99 // save trimmed changelog 100 io_saveFile($conf['changelog'].'_tmp', implode('', $out_lines)); 101 @unlink($conf['changelog']); 102 if (!rename($conf['changelog'].'_tmp', $conf['changelog'])) { 103 // rename failed so try another way... 104 io_unlock($conf['changelog']); 105 io_saveFile($conf['changelog'], implode('', $out_lines)); 106 @unlink($conf['changelog'].'_tmp'); 107 } else { 108 io_unlock($conf['changelog']); 109 } 110 return true; 111 } 112 113 // nothing done 114 return false; 115} 116 117/** 118 * Runs the indexer for the current page 119 * 120 * @author Andreas Gohr <andi@splitbrain.org> 121 */ 122function runIndexer(){ 123 global $conf; 124 print "runIndexer(): started".NL; 125 126 // Move index files (if needed) 127 // Uses the importoldindex plugin to upgrade the index automatically. 128 // FIXME: Remove this from runIndexer when it is no longer needed. 129 if (@file_exists($conf['cachedir'].'/page.idx') && 130 (!@file_exists($conf['indexdir'].'/page.idx') || 131 !filesize($conf['indexdir'].'/page.idx')) && 132 !@file_exists($conf['indexdir'].'/index_importing')) { 133 echo "trigger TEMPORARY_INDEX_UPGRADE_EVENT\n"; 134 $tmp = array(); // no event data 135 trigger_event('TEMPORARY_INDEX_UPGRADE_EVENT', $tmp); 136 } 137 138 $ID = cleanID($_REQUEST['id']); 139 if(!$ID) return false; 140 141 // check if indexing needed 142 $idxtag = metaFN($ID,'.indexed'); 143 if(@file_exists($idxtag)){ 144 if(io_readFile($idxtag) >= INDEXER_VERSION){ 145 $last = @filemtime($idxtag); 146 if($last > @filemtime(wikiFN($ID))){ 147 print "runIndexer(): index for $ID up to date".NL; 148 return false; 149 } 150 } 151 } 152 153 // try to aquire a lock 154 $lock = $conf['lockdir'].'/_indexer.lock'; 155 while(!@mkdir($lock,$conf['dmode'])){ 156 usleep(50); 157 if(time()-@filemtime($lock) > 60*5){ 158 // looks like a stale lock - remove it 159 @rmdir($lock); 160 print "runIndexer(): stale lock removed".NL; 161 }else{ 162 print "runIndexer(): indexer locked".NL; 163 return false; 164 } 165 } 166 if($conf['dperm']) chmod($lock, $conf['dperm']); 167 168 require_once(DOKU_INC.'inc/indexer.php'); 169 170 // upgrade to version 2 171 if (!@file_exists($conf['indexdir'].'/pageword.idx')) 172 idx_upgradePageWords(); 173 174 // do the work 175 idx_addPage($ID); 176 177 // we're finished - save and free lock 178 io_saveFile(metaFN($ID,'.indexed'),INDEXER_VERSION); 179 @rmdir($lock); 180 print "runIndexer(): finished".NL; 181 return true; 182} 183 184/** 185 * Will render the metadata for the page if not exists yet 186 * 187 * This makes sure pages which are created from outside DokuWiki will 188 * gain their data when viewed for the first time. 189 */ 190function metaUpdate(){ 191 print "metaUpdate(): started".NL; 192 193 $ID = cleanID($_REQUEST['id']); 194 if(!$ID) return false; 195 $file = metaFN($ID, '.meta'); 196 echo "meta file: $file".NL; 197 198 // rendering needed? 199 if (@file_exists($file)) return false; 200 if (!@file_exists(wikiFN($ID))) return false; 201 202 require_once(DOKU_INC.'inc/common.php'); 203 require_once(DOKU_INC.'inc/parserutils.php'); 204 global $conf; 205 206 207 // gather some additional info from changelog 208 $info = io_grep($conf['changelog'], 209 '/^(\d+)\t(\d+\.\d+\.\d+\.\d+)\t'.preg_quote($ID,'/').'\t([^\t]+)\t([^\t\n]+)/', 210 0,true); 211 212 $meta = array(); 213 if(!empty($info)){ 214 $meta['date']['created'] = $info[0][1]; 215 foreach($info as $item){ 216 if($item[4] != '*'){ 217 $meta['date']['modified'] = $item[1]; 218 if($item[3]){ 219 $meta['contributor'][$item[3]] = $item[3]; 220 } 221 } 222 } 223 } 224 225 $meta = p_render_metadata($ID, $meta); 226 io_saveFile($file, serialize($meta)); 227 228 echo "metaUpdate(): finished".NL; 229 return true; 230} 231 232/** 233 * Builds a Google Sitemap of all public pages known to the indexer 234 * 235 * The map is placed in the root directory named sitemap.xml.gz - This 236 * file needs to be writable! 237 * 238 * @author Andreas Gohr 239 * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html 240 */ 241function runSitemapper(){ 242 global $conf; 243 print "runSitemapper(): started".NL; 244 if(!$conf['sitemap']) return false; 245 246 if($conf['compression'] == 'bz2' || $conf['compression'] == 'gz'){ 247 $sitemap = 'sitemap.xml.gz'; 248 }else{ 249 $sitemap = 'sitemap.xml'; 250 } 251 print "runSitemapper(): using $sitemap".NL; 252 253 if(@file_exists(DOKU_INC.$sitemap)){ 254 if(!is_writable(DOKU_INC.$sitemap)) return false; 255 }else{ 256 if(!is_writable(DOKU_INC)) return false; 257 } 258 259 if(@filesize(DOKU_INC.$sitemap) && 260 @filemtime(DOKU_INC.$sitemap) > (time()-($conf['sitemap']*60*60*24))){ 261 print 'runSitemapper(): Sitemap up to date'.NL; 262 return false; 263 } 264 265 $pages = file($conf['indexdir'].'/page.idx'); 266 print 'runSitemapper(): creating sitemap using '.count($pages).' pages'.NL; 267 268 // build the sitemap 269 ob_start(); 270 print '<?xml version="1.0" encoding="UTF-8"?>'.NL; 271 print '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'.NL; 272 foreach($pages as $id){ 273 $id = trim($id); 274 $file = wikiFN($id); 275 276 //skip hidden, non existing and restricted files 277 if(isHiddenPage($id)) continue; 278 $date = @filemtime($file); 279 if(!$date) continue; 280 if(auth_aclcheck($id,'','') < AUTH_READ) continue; 281 282 print ' <url>'.NL; 283 print ' <loc>'.wl($id,'',true).'</loc>'.NL; 284 print ' <lastmod>'.date_iso8601($date).'</lastmod>'.NL; 285 print ' </url>'.NL; 286 } 287 print '</urlset>'.NL; 288 $data = ob_get_contents(); 289 ob_end_clean(); 290 291 //save the new sitemap 292 io_saveFile(DOKU_INC.$sitemap,$data); 293 294 //ping search engines... 295 $http = new DokuHTTPClient(); 296 $http->timeout = 8; 297 298 //ping google 299 print 'runSitemapper(): pinging google'.NL; 300 $url = 'http://www.google.com/webmasters/sitemaps/ping?sitemap='; 301 $url .= urlencode(DOKU_URL.$sitemap); 302 $resp = $http->get($url); 303 if($http->error) print 'runSitemapper(): '.$http->error.NL; 304 print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL; 305 306 //ping yahoo 307 print 'runSitemapper(): pinging yahoo'.NL; 308 $url = 'http://search.yahooapis.com/SiteExplorerService/V1/updateNotification?appid=dokuwiki&url='; 309 $url .= urlencode(DOKU_URL.$sitemap); 310 $resp = $http->get($url); 311 if($http->error) print 'runSitemapper(): '.$http->error.NL; 312 print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL; 313 314 //ping microsoft 315 print 'runSitemapper(): pinging microsoft'.NL; 316 $url = 'http://search.live.com/ping?sitemap='; 317 $url .= urlencode(DOKU_URL.$sitemap); 318 $resp = $http->get($url); 319 if($http->error) print 'runSitemapper(): '.$http->error.NL; 320 print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL; 321 322 print 'runSitemapper(): finished'.NL; 323 return true; 324} 325 326/** 327 * Formats a timestamp as ISO 8601 date 328 * 329 * @author <ungu at terong dot com> 330 * @link http://www.php.net/manual/en/function.date.php#54072 331 */ 332function date_iso8601($int_date) { 333 //$int_date: current date in UNIX timestamp 334 $date_mod = date('Y-m-d\TH:i:s', $int_date); 335 $pre_timezone = date('O', $int_date); 336 $time_zone = substr($pre_timezone, 0, 3).":".substr($pre_timezone, 3, 2); 337 $date_mod .= $time_zone; 338 return $date_mod; 339} 340 341/** 342 * Just send a 1x1 pixel blank gif to the browser 343 * 344 * @author Andreas Gohr <andi@splitbrain.org> 345 * @author Harry Fuecks <fuecks@gmail.com> 346 */ 347function sendGIF(){ 348 if($_REQUEST['debug']){ 349 header('Content-Type: text/plain'); 350 return; 351 } 352 $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7'); 353 header('Content-Type: image/gif'); 354 header('Content-Length: '.strlen($img)); 355 header('Connection: Close'); 356 print $img; 357 flush(); 358 // Browser should drop connection after this 359 // Thinks it's got the whole image 360} 361 362//Setup VIM: ex: et ts=4 enc=utf-8 : 363// No trailing PHP closing tag - no output please! 364// See Note at http://www.php.net/manual/en/language.basic-syntax.instruction-separation.php 365