1<?php 2/** 3 * DokuWiki indexer 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../../').'/'); 9define('DOKU_DISABLE_GZIP_OUTPUT',1); 10require_once(DOKU_INC.'inc/init.php'); 11require_once(DOKU_INC.'inc/auth.php'); 12require_once(DOKU_INC.'inc/events.php'); 13session_write_close(); //close session 14if(!defined('NL')) define('NL',"\n"); 15 16// Version tag used to force rebuild on upgrade 17define('INDEXER_VERSION', 1); 18 19// keep running after browser closes connection 20@ignore_user_abort(true); 21 22// check if user abort worked, if yes send output early 23if(@ignore_user_abort() && !$conf['broken_iua']){ 24 sendGIF(); // send gif 25 $defer = false; 26}else{ 27 $defer = true; 28} 29 30// Catch any possible output (e.g. errors) 31if(!$_REQUEST['debug']) ob_start(); 32 33// run one of the jobs 34runIndexer() or metaUpdate() or runSitemapper() or runTrimRecentChanges(); 35if($defer) sendGIF(); 36 37if(!$_REQUEST['debug']) ob_end_clean(); 38exit; 39 40// -------------------------------------------------------------------- 41 42/** 43 * Trims the recent changes cache (or imports the old changelog) as needed. 44 * 45 * @author Ben Coburn <btcoburn@silicodon.net> 46 */ 47function runTrimRecentChanges() { 48 global $conf; 49 50 // Import old changelog (if needed) 51 // Uses the imporoldchangelog plugin to upgrade the changelog automaticaly. 52 // FIXME: Remove this from runTrimRecentChanges when it is no longer needed. 53 if (isset($conf['changelog_old']) && 54 @file_exists($conf['changelog_old']) && !@file_exists($conf['changelog']) && 55 !@file_exists($conf['changelog'].'_importing') && !@file_exists($conf['changelog'].'_tmp')) { 56 $tmp = array(); // no event data 57 trigger_event('TEMPORARY_CHANGELOG_UPGRADE_EVENT', $tmp); 58 return true; 59 } 60 61 // Trim the Recent Changes 62 // Trims the recent changes cache to the last $conf['changes_days'] recent 63 // changes or $conf['recent'] items, which ever is larger. 64 // The trimming is only done once a day. 65 if (@file_exists($conf['changelog']) && 66 (filectime($conf['changelog'])+86400)<time() && 67 !@file_exists($conf['changelog'].'_tmp')) { 68 io_lock($conf['changelog']); 69 $lines = file($conf['changelog']); 70 if (count($lines)<$conf['recent']) { 71 // nothing to trim 72 io_unlock($conf['changelog']); 73 return true; 74 } 75 76 io_saveFile($conf['changelog'].'_tmp', ''); // presave tmp as 2nd lock 77 $trim_time = time() - $conf['recent_days']*86400; 78 $out_lines = array(); 79 80 for ($i=0; $i<count($lines); $i++) { 81 $log = parseChangelogLine($lines[$i]); 82 if ($log === false) continue; // discard junk 83 if ($log['date'] < $trim_time) { 84 $old_lines[$log['date'].".$i"] = $lines[$i]; // keep old lines for now (append .$i to prevent key collisions) 85 } else { 86 $out_lines[$log['date'].".$i"] = $lines[$i]; // definitely keep these lines 87 } 88 } 89 90 // sort the final result, it shouldn't be necessary, 91 // however the extra robustness in making the changelog cache self-correcting is worth it 92 ksort($out_lines); 93 $extra = $conf['recent'] - count($out_lines); // do we need extra lines do bring us up to minimum 94 if ($extra > 0) { 95 ksort($old_lines); 96 $out_lines = array_merge(array_slice($old_lines,-$extra),$out_lines); 97 } 98 99 // save trimmed changelog 100 io_saveFile($conf['changelog'].'_tmp', implode('', $out_lines)); 101 @unlink($conf['changelog']); 102 if (!rename($conf['changelog'].'_tmp', $conf['changelog'])) { 103 // rename failed so try another way... 104 io_unlock($conf['changelog']); 105 io_saveFile($conf['changelog'], implode('', $out_lines)); 106 @unlink($conf['changelog'].'_tmp'); 107 } else { 108 io_unlock($conf['changelog']); 109 } 110 return true; 111 } 112 113 // nothing done 114 return false; 115} 116 117/** 118 * Runs the indexer for the current page 119 * 120 * @author Andreas Gohr <andi@splitbrain.org> 121 */ 122function runIndexer(){ 123 global $conf; 124 print "runIndexer(): started".NL; 125 126 // Move index files (if needed) 127 // Uses the importoldindex plugin to upgrade the index automatically. 128 // FIXME: Remove this from runIndexer when it is no longer needed. 129 if (@file_exists($conf['cachedir'].'/page.idx') && 130 (!@file_exists($conf['indexdir'].'/page.idx') || 131 !filesize($conf['indexdir'].'/page.idx')) && 132 !@file_exists($conf['indexdir'].'/index_importing')) { 133 echo "trigger TEMPORARY_INDEX_UPGRADE_EVENT\n"; 134 $tmp = array(); // no event data 135 trigger_event('TEMPORARY_INDEX_UPGRADE_EVENT', $tmp); 136 } 137 138 $ID = cleanID($_REQUEST['id']); 139 if(!$ID) return false; 140 141 // check if indexing needed 142 $idxtag = metaFN($ID,'.indexed'); 143 if(@file_exists($idxtag)){ 144 if(io_readFile($idxtag) >= INDEXER_VERSION){ 145 $last = @filemtime($idxtag); 146 if($last > @filemtime(wikiFN($ID))){ 147 print "runIndexer(): index for $ID up to date".NL; 148 return false; 149 } 150 } 151 } 152 153 // try to aquire a lock 154 $lock = $conf['lockdir'].'/_indexer.lock'; 155 while(!@mkdir($lock,$conf['dmode'])){ 156 usleep(50); 157 if(time()-@filemtime($lock) > 60*5){ 158 // looks like a stale lock - remove it 159 @rmdir($lock); 160 print "runIndexer(): stale lock removed".NL; 161 }else{ 162 print "runIndexer(): indexer locked".NL; 163 return false; 164 } 165 } 166 if($conf['dperm']) chmod($lock, $conf['dperm']); 167 168 require_once(DOKU_INC.'inc/indexer.php'); 169 170 // do the work 171 idx_addPage($ID); 172 173 // we're finished - save and free lock 174 io_saveFile(metaFN($ID,'.indexed'),INDEXER_VERSION); 175 @rmdir($lock); 176 print "runIndexer(): finished".NL; 177 return true; 178} 179 180/** 181 * Will render the metadata for the page if not exists yet 182 * 183 * This makes sure pages which are created from outside DokuWiki will 184 * gain their data when viewed for the first time. 185 */ 186function metaUpdate(){ 187 print "metaUpdate(): started".NL; 188 189 $ID = cleanID($_REQUEST['id']); 190 if(!$ID) return false; 191 $file = metaFN($ID, '.meta'); 192 echo "meta file: $file".NL; 193 194 // rendering needed? 195 if (@file_exists($file)) return false; 196 if (!@file_exists(wikiFN($ID))) return false; 197 198 require_once(DOKU_INC.'inc/common.php'); 199 require_once(DOKU_INC.'inc/parserutils.php'); 200 global $conf; 201 202 203 // gather some additional info from changelog 204 $info = io_grep($conf['changelog'], 205 '/^(\d+)\t(\d+\.\d+\.\d+\.\d+)\t'.preg_quote($ID,'/').'\t([^\t]+)\t([^\t\n]+)/', 206 0,true); 207 208 $meta = array(); 209 if(!empty($info)){ 210 $meta['date']['created'] = $info[0][1]; 211 foreach($info as $item){ 212 if($item[4] != '*'){ 213 $meta['date']['modified'] = $item[1]; 214 if($item[3]){ 215 $meta['contributor'][$item[3]] = $item[3]; 216 } 217 } 218 } 219 } 220 221 $meta = p_render_metadata($ID, $meta); 222 io_saveFile($file, serialize($meta)); 223 224 echo "metaUpdate(): finished".NL; 225 return true; 226} 227 228/** 229 * Builds a Google Sitemap of all public pages known to the indexer 230 * 231 * The map is placed in the root directory named sitemap.xml.gz - This 232 * file needs to be writable! 233 * 234 * @author Andreas Gohr 235 * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html 236 */ 237function runSitemapper(){ 238 global $conf; 239 print "runSitemapper(): started".NL; 240 if(!$conf['sitemap']) return false; 241 242 if($conf['compression'] == 'bz2' || $conf['compression'] == 'gz'){ 243 $sitemap = 'sitemap.xml.gz'; 244 }else{ 245 $sitemap = 'sitemap.xml'; 246 } 247 print "runSitemapper(): using $sitemap".NL; 248 249 if(@file_exists(DOKU_INC.$sitemap)){ 250 if(!is_writable(DOKU_INC.$sitemap)) return false; 251 }else{ 252 if(!is_writable(DOKU_INC)) return false; 253 } 254 255 if(@filesize(DOKU_INC.$sitemap) && 256 @filemtime(DOKU_INC.$sitemap) > (time()-($conf['sitemap']*60*60*24))){ 257 print 'runSitemapper(): Sitemap up to date'.NL; 258 return false; 259 } 260 261 $pages = file($conf['indexdir'].'/page.idx'); 262 print 'runSitemapper(): creating sitemap using '.count($pages).' pages'.NL; 263 264 // build the sitemap 265 ob_start(); 266 print '<?xml version="1.0" encoding="UTF-8"?>'.NL; 267 print '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'.NL; 268 foreach($pages as $id){ 269 $id = trim($id); 270 $file = wikiFN($id); 271 272 //skip hidden, non existing and restricted files 273 if(isHiddenPage($id)) continue; 274 $date = @filemtime($file); 275 if(!$date) continue; 276 if(auth_aclcheck($id,'','') < AUTH_READ) continue; 277 278 print ' <url>'.NL; 279 print ' <loc>'.wl($id,'',true).'</loc>'.NL; 280 print ' <lastmod>'.date_iso8601($date).'</lastmod>'.NL; 281 print ' </url>'.NL; 282 } 283 print '</urlset>'.NL; 284 $data = ob_get_contents(); 285 ob_end_clean(); 286 287 //save the new sitemap 288 io_saveFile(DOKU_INC.$sitemap,$data); 289 290 //ping search engines... 291 $http = new DokuHTTPClient(); 292 $http->timeout = 8; 293 294 //ping google 295 print 'runSitemapper(): pinging google'.NL; 296 $url = 'http://www.google.com/webmasters/sitemaps/ping?sitemap='; 297 $url .= urlencode(DOKU_URL.$sitemap); 298 $resp = $http->get($url); 299 if($http->error) print 'runSitemapper(): '.$http->error.NL; 300 print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL; 301 302 //ping yahoo 303 print 'runSitemapper(): pinging yahoo'.NL; 304 $url = 'http://search.yahooapis.com/SiteExplorerService/V1/updateNotification?appid=dokuwiki&url='; 305 $url .= urlencode(DOKU_URL.$sitemap); 306 $resp = $http->get($url); 307 if($http->error) print 'runSitemapper(): '.$http->error.NL; 308 print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL; 309 310 //ping microsoft 311 print 'runSitemapper(): pinging microsoft'.NL; 312 $url = 'http://search.live.com/ping?sitemap='; 313 $url .= urlencode(DOKU_URL.$sitemap); 314 $resp = $http->get($url); 315 if($http->error) print 'runSitemapper(): '.$http->error.NL; 316 print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL; 317 318 print 'runSitemapper(): finished'.NL; 319 return true; 320} 321 322/** 323 * Formats a timestamp as ISO 8601 date 324 * 325 * @author <ungu at terong dot com> 326 * @link http://www.php.net/manual/en/function.date.php#54072 327 */ 328function date_iso8601($int_date) { 329 //$int_date: current date in UNIX timestamp 330 $date_mod = date('Y-m-d\TH:i:s', $int_date); 331 $pre_timezone = date('O', $int_date); 332 $time_zone = substr($pre_timezone, 0, 3).":".substr($pre_timezone, 3, 2); 333 $date_mod .= $time_zone; 334 return $date_mod; 335} 336 337/** 338 * Just send a 1x1 pixel blank gif to the browser 339 * 340 * @author Andreas Gohr <andi@splitbrain.org> 341 * @author Harry Fuecks <fuecks@gmail.com> 342 */ 343function sendGIF(){ 344 if($_REQUEST['debug']){ 345 header('Content-Type: text/plain'); 346 return; 347 } 348 $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7'); 349 header('Content-Type: image/gif'); 350 header('Content-Length: '.strlen($img)); 351 header('Connection: Close'); 352 print $img; 353 flush(); 354 // Browser should drop connection after this 355 // Thinks it's got the whole image 356} 357 358//Setup VIM: ex: et ts=4 enc=utf-8 : 359// No trailing PHP closing tag - no output please! 360// See Note at http://www.php.net/manual/en/language.basic-syntax.instruction-separation.php 361