1<?php 2/** 3 * DokuWiki indexer 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../../').'/'); 9define('DOKU_DISABLE_GZIP_OUTPUT',1); 10require_once(DOKU_INC.'inc/init.php'); 11require_once(DOKU_INC.'inc/auth.php'); 12require_once(DOKU_INC.'inc/events.php'); 13session_write_close(); //close session 14if(!defined('NL')) define('NL',"\n"); 15 16// keep running after browser closes connection 17@ignore_user_abort(true); 18 19// check if user abort worked, if yes send output early 20if(@ignore_user_abort()){ 21 sendGIF(); // send gif 22 $defer = false; 23}else{ 24 $defer = true; 25} 26 27// Catch any possible output (e.g. errors) 28if(!$_REQUEST['debug']) ob_start(); 29 30// run one of the jobs 31runIndexer() or metaUpdate() or runSitemapper() or runTrimRecentChanges(); 32if($defer) sendGIF(); 33 34if(!$_REQUEST['debug']) ob_end_clean(); 35exit; 36 37// -------------------------------------------------------------------- 38 39/** 40 * Trims the recent changes cache (or imports the old changelog) as needed. 41 * 42 * @author Ben Coburn <btcoburn@silicodon.net> 43 */ 44function runTrimRecentChanges() { 45 global $conf; 46 47 // Import old changelog (if needed) 48 // Uses the imporoldchangelog plugin to upgrade the changelog automaticaly. 49 // FIXME: Remove this from runTrimRecentChanges when it is no longer needed. 50 if (isset($conf['changelog_old']) && 51 @file_exists($conf['changelog_old']) && !@file_exists($conf['changelog']) && 52 !@file_exists($conf['changelog'].'_importing') && !@file_exists($conf['changelog'].'_tmp')) { 53 $tmp = array(); // no event data 54 trigger_event('TEMPORARY_CHANGELOG_UPGRADE_EVENT', $tmp); 55 return true; 56 } 57 58 // Trim the Recent Changes 59 // Trims the recent changes cache to the last $conf['changes_days'] recent 60 // changes or $conf['recent'] items, which ever is larger. 61 // The trimming is only done once a day. 62 if (@file_exists($conf['changelog']) && 63 (filectime($conf['changelog'])+86400)<time() && 64 !@file_exists($conf['changelog'].'_tmp')) { 65 66 io_lock($conf['changelog']); 67 $lines = file($conf['changelog']); 68 if (count($lines)<$conf['recent']) { 69 // nothing to trim 70 io_unlock($conf['changelog']); 71 return true; 72 } 73 74 io_saveFile($conf['changelog'].'_tmp', ''); // presave tmp as 2nd lock 75 $trim_time = time() - $conf['recent_days']*86400; 76 $out_lines = array(); 77 78 for ($i=0; $i<count($lines); $i++) { 79 $log = parseChangelogLine($lines[$i]); 80 if ($log === false || $log['date'] < $trim_time) continue; // discard old lines 81 $out_lines[$log['date']] = $lines[$i]; // preserve the rest 82 } 83 84 ksort($out_lines); // sort lines, just in case! 85 if (count($out_lines) > $conf['recent']) { 86 $out_lines = array_slice($out_lines,-$conf['recent']); // trim list to one page 87 } 88 89 // save trimmed changelog 90 io_saveFile($conf['changelog'].'_tmp', implode('', $out_lines)); 91 @unlink($conf['changelog']); 92 if (!rename($conf['changelog'].'_tmp', $conf['changelog'])) { 93 // rename failed so try another way... 94 io_unlock($conf['changelog']); 95 io_saveFile($conf['changelog'], implode('', $out_lines)); 96 @unlink($conf['changelog'].'_tmp'); 97 } else { 98 io_unlock($conf['changelog']); 99 } 100 return true; 101 } 102 103 // nothing done 104 return false; 105} 106 107/** 108 * Runs the indexer for the current page 109 * 110 * @author Andreas Gohr <andi@splitbrain.org> 111 */ 112function runIndexer(){ 113 global $conf; 114 print "runIndexer(): started".NL; 115 116 $ID = cleanID($_REQUEST['id']); 117 if(!$ID) return false; 118 119 // check if indexing needed 120 $last = @filemtime(metaFN($ID,'.indexed')); 121 if($last > @filemtime(wikiFN($ID))){ 122 print "runIndexer(): index for $ID up to date".NL; 123 return false; 124 } 125 126 // try to aquire a lock 127 $lock = $conf['lockdir'].'/_indexer.lock'; 128 while(!@mkdir($lock,$conf['dmode'])){ 129 usleep(50); 130 if(time()-@filemtime($lock) > 60*5){ 131 // looks like a stale lock - remove it 132 @rmdir($lock); 133 print "runIndexer(): stale lock removed".NL; 134 }else{ 135 print "runIndexer(): indexer locked".NL; 136 return false; 137 } 138 } 139 if($conf['dperm']) chmod($lock, $conf['dperm']); 140 141 require_once(DOKU_INC.'inc/indexer.php'); 142 143 // do the work 144 idx_addPage($ID); 145 146 // we're finished - save and free lock 147 io_saveFile(metaFN($ID,'.indexed'),' '); 148 @rmdir($lock); 149 print "runIndexer(): finished".NL; 150 return true; 151} 152 153/** 154 * Will render the metadata for the page if not exists yet 155 * 156 * This makes sure pages which are created from outside DokuWiki will 157 * gain their data when viewed for the first time. 158 */ 159function metaUpdate(){ 160 print "metaUpdate(): started".NL; 161 162 $ID = cleanID($_REQUEST['id']); 163 if(!$ID) return false; 164 $file = metaFN($ID, '.meta'); 165 echo "meta file: $file".NL; 166 167 // rendering needed? 168 if (@file_exists($file)) return false; 169 if (!@file_exists(wikiFN($ID))) return false; 170 171 require_once(DOKU_INC.'inc/common.php'); 172 require_once(DOKU_INC.'inc/parserutils.php'); 173 global $conf; 174 175 176 // gather some additional info from changelog 177 $info = io_grep($conf['changelog'], 178 '/^(\d+)\t(\d+\.\d+\.\d+\.\d+)\t'.preg_quote($ID,'/').'\t([^\t]+)\t([^\t\n]+)/', 179 0,true); 180 181 $meta = array(); 182 if(count($info)){ 183 $meta['date']['created'] = $info[0][1]; 184 foreach($info as $item){ 185 if($item[4] != '*'){ 186 $meta['date']['modified'] = $item[1]; 187 if($item[3]){ 188 $meta['contributor'][$item[3]] = $item[3]; 189 } 190 } 191 } 192 } 193 194 $meta = p_render_metadata($ID, $meta); 195 io_saveFile($file, serialize($meta)); 196 197 echo "metaUpdate(): finished".NL; 198 return true; 199} 200 201/** 202 * Builds a Google Sitemap of all public pages known to the indexer 203 * 204 * The map is placed in the root directory named sitemap.xml.gz - This 205 * file needs to be writable! 206 * 207 * @author Andreas Gohr 208 * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html 209 */ 210function runSitemapper(){ 211 global $conf; 212 print "runSitemapper(): started".NL; 213 if(!$conf['sitemap']) return false; 214 215 if($conf['usegzip']){ 216 $sitemap = 'sitemap.xml.gz'; 217 }else{ 218 $sitemap = 'sitemap.xml'; 219 } 220 print "runSitemapper(): using $sitemap".NL; 221 222 if(!is_writable(DOKU_INC.$sitemap)) return false; 223 if(@filesize(DOKU_INC.$sitemap) && 224 @filemtime(DOKU_INC.$sitemap) > (time()-($conf['sitemap']*60*60*24))){ 225 print 'runSitemapper(): Sitemap up to date'.NL; 226 return false; 227 } 228 229 $pages = file($conf['cachedir'].'/page.idx'); 230 print 'runSitemapper(): creating sitemap using '.count($pages).' pages'.NL; 231 232 // build the sitemap 233 ob_start(); 234 print '<?xml version="1.0" encoding="UTF-8"?>'.NL; 235 print '<urlset xmlns="http://www.google.com/schemas/sitemap/0.84">'.NL; 236 foreach($pages as $id){ 237 $id = trim($id); 238 $file = wikiFN($id); 239 240 //skip hidden, non existing and restricted files 241 if(isHiddenPage($id)) continue; 242 $date = @filemtime($file); 243 if(!$date) continue; 244 if(auth_aclcheck($id,'','') < AUTH_READ) continue; 245 246 print ' <url>'.NL; 247 print ' <loc>'.wl($id,'',true).'</loc>'.NL; 248 print ' <lastmod>'.date_iso8601($date).'</lastmod>'.NL; 249 print ' </url>'.NL; 250 } 251 print '</urlset>'.NL; 252 $data = ob_get_contents(); 253 ob_end_clean(); 254 255 //save the new sitemap 256 io_saveFile(DOKU_INC.$sitemap,$data); 257 258 print 'runSitemapper(): pinging google'.NL; 259 //ping google 260 $url = 'http://www.google.com/webmasters/sitemaps/ping?sitemap='; 261 $url .= urlencode(DOKU_URL.$sitemap); 262 $http = new DokuHTTPClient(); 263 $http->get($url); 264 if($http->error) print 'runSitemapper(): '.$http->error.NL; 265 266 print 'runSitemapper(): finished'.NL; 267 return true; 268} 269 270/** 271 * Formats a timestamp as ISO 8601 date 272 * 273 * @author <ungu at terong dot com> 274 * @link http://www.php.net/manual/en/function.date.php#54072 275 */ 276function date_iso8601($int_date) { 277 //$int_date: current date in UNIX timestamp 278 $date_mod = date('Y-m-d\TH:i:s', $int_date); 279 $pre_timezone = date('O', $int_date); 280 $time_zone = substr($pre_timezone, 0, 3).":".substr($pre_timezone, 3, 2); 281 $date_mod .= $time_zone; 282 return $date_mod; 283} 284 285/** 286 * Just send a 1x1 pixel blank gif to the browser 287 * 288 * @author Andreas Gohr <andi@splitbrain.org> 289 * @author Harry Fuecks <fuecks@gmail.com> 290 */ 291function sendGIF(){ 292 if($_REQUEST['debug']){ 293 header('Content-Type: text/plain'); 294 return; 295 } 296 $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7'); 297 header('Content-Type: image/gif'); 298 header('Content-Length: '.strlen($img)); 299 header('Connection: Close'); 300 print $img; 301 flush(); 302 // Browser should drop connection after this 303 // Thinks it's got the whole image 304} 305 306//Setup VIM: ex: et ts=4 enc=utf-8 : 307// No trailing PHP closing tag - no output please! 308// See Note at http://www.php.net/manual/en/language.basic-syntax.instruction-separation.php 309