1<?php 2/** 3 * DokuWiki indexer 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../../').'/'); 9define('DOKU_DISABLE_GZIP_OUTPUT',1); 10require_once(DOKU_INC.'inc/init.php'); 11require_once(DOKU_INC.'inc/auth.php'); 12require_once(DOKU_INC.'inc/events.php'); 13session_write_close(); //close session 14if(!defined('NL')) define('NL',"\n"); 15 16// keep running after browser closes connection 17@ignore_user_abort(true); 18 19// check if user abort worked, if yes send output early 20if(@ignore_user_abort()){ 21 sendGIF(); // send gif 22 $defer = false; 23}else{ 24 $defer = true; 25} 26 27// Catch any possible output (e.g. errors) 28if(!$_REQUEST['debug']) ob_start(); 29 30// run one of the jobs 31runIndexer() or metaUpdate() or runSitemapper() or runTrimRecentChanges(); 32if($defer) sendGIF(); 33 34if(!$_REQUEST['debug']) ob_end_clean(); 35exit; 36 37// -------------------------------------------------------------------- 38 39/** 40 * Trims the recent changes cache (or imports the old changelog) as needed. 41 * 42 * @author Ben Coburn <btcoburn@silicodon.net> 43 */ 44function runTrimRecentChanges() { 45 global $conf; 46 47 // Import old changelog (if needed) 48 // Uses the imporoldchangelog plugin to upgrade the changelog automaticaly. 49 // FIXME: Remove this from runTrimRecentChanges when it is no longer needed. 50 if (isset($conf['changelog_old']) && 51 @file_exists($conf['changelog_old']) && !@file_exists($conf['changelog']) && 52 !@file_exists($conf['changelog'].'_importing') && !@file_exists($conf['changelog'].'_tmp')) { 53 $tmp = array(); // no event data 54 trigger_event('TEMPORARY_CHANGELOG_UPGRADE_EVENT', $tmp); 55 return true; 56 } 57 58 // Trim the Recent Changes 59 // Trims the recent changes cache to the last $conf['changes_days'] recent 60 // changes or $conf['recent'] items, which ever is larger. 61 // The trimming is only done once a day. 62 if (@file_exists($conf['changelog']) && 63 (filectime($conf['changelog'])+86400)<time() && 64 !@file_exists($conf['changelog'].'_tmp')) { 65 io_lock($conf['changelog']); 66 $lines = file($conf['changelog']); 67 if (count($lines)<$conf['recent']) { 68 // nothing to trim 69 io_unlock($conf['changelog']); 70 return true; 71 } 72 73 io_saveFile($conf['changelog'].'_tmp', ''); // presave tmp as 2nd lock 74 $trim_time = time() - $conf['recent_days']*86400; 75 $out_lines = array(); 76 77 for ($i=0; $i<count($lines); $i++) { 78 $log = parseChangelogLine($lines[$i]); 79 if ($log === false) continue; // discard junk 80 if ($log['date'] < $trim_time) { 81 $old_lines[$log['date'].".$i"] = $lines[$i]; // keep old lines for now (append .$i to prevent key collisions) 82 } else { 83 $out_lines[$log['date'].".$i"] = $lines[$i]; // definitely keep these lines 84 } 85 } 86 87 // sort the final result, it shouldn't be necessary, 88 // however the extra robustness in making the changelog cache self-correcting is worth it 89 ksort($out_lines); 90 $extra = $conf['recent'] - count($out_lines); // do we need extra lines do bring us up to minimum 91 if ($extra > 0) { 92 ksort($old_lines); 93 $out_lines = array_merge(array_slice($old_lines,-$extra),$out_lines); 94 } 95 96 // save trimmed changelog 97 io_saveFile($conf['changelog'].'_tmp', implode('', $out_lines)); 98 @unlink($conf['changelog']); 99 if (!rename($conf['changelog'].'_tmp', $conf['changelog'])) { 100 // rename failed so try another way... 101 io_unlock($conf['changelog']); 102 io_saveFile($conf['changelog'], implode('', $out_lines)); 103 @unlink($conf['changelog'].'_tmp'); 104 } else { 105 io_unlock($conf['changelog']); 106 } 107 return true; 108 } 109 110 // nothing done 111 return false; 112} 113 114/** 115 * Runs the indexer for the current page 116 * 117 * @author Andreas Gohr <andi@splitbrain.org> 118 */ 119function runIndexer(){ 120 global $conf; 121 print "runIndexer(): started".NL; 122 123 // Move index files (if needed) 124 // Uses the importoldindex plugin to upgrade the index automatically. 125 // FIXME: Remove this from runIndexer when it is no longer needed. 126 if (@file_exists($conf['cachedir'].'/page.idx') && 127 (!@file_exists($conf['indexdir'].'/page.idx') || 128 !filesize($conf['indexdir'].'/page.idx')) && 129 !@file_exists($conf['indexdir'].'/index_importing')) { 130 echo "trigger TEMPORARY_INDEX_UPGRADE_EVENT\n"; 131 $tmp = array(); // no event data 132 trigger_event('TEMPORARY_INDEX_UPGRADE_EVENT', $tmp); 133 } 134 135 $ID = cleanID($_REQUEST['id']); 136 if(!$ID) return false; 137 138 // check if indexing needed 139 $last = @filemtime(metaFN($ID,'.indexed')); 140 if($last > @filemtime(wikiFN($ID))){ 141 print "runIndexer(): index for $ID up to date".NL; 142 return false; 143 } 144 145 // try to aquire a lock 146 $lock = $conf['lockdir'].'/_indexer.lock'; 147 while(!@mkdir($lock,$conf['dmode'])){ 148 usleep(50); 149 if(time()-@filemtime($lock) > 60*5){ 150 // looks like a stale lock - remove it 151 @rmdir($lock); 152 print "runIndexer(): stale lock removed".NL; 153 }else{ 154 print "runIndexer(): indexer locked".NL; 155 return false; 156 } 157 } 158 if($conf['dperm']) chmod($lock, $conf['dperm']); 159 160 require_once(DOKU_INC.'inc/indexer.php'); 161 162 // do the work 163 idx_addPage($ID); 164 165 // we're finished - save and free lock 166 io_saveFile(metaFN($ID,'.indexed'),' '); 167 @rmdir($lock); 168 print "runIndexer(): finished".NL; 169 return true; 170} 171 172/** 173 * Will render the metadata for the page if not exists yet 174 * 175 * This makes sure pages which are created from outside DokuWiki will 176 * gain their data when viewed for the first time. 177 */ 178function metaUpdate(){ 179 print "metaUpdate(): started".NL; 180 181 $ID = cleanID($_REQUEST['id']); 182 if(!$ID) return false; 183 $file = metaFN($ID, '.meta'); 184 echo "meta file: $file".NL; 185 186 // rendering needed? 187 if (@file_exists($file)) return false; 188 if (!@file_exists(wikiFN($ID))) return false; 189 190 require_once(DOKU_INC.'inc/common.php'); 191 require_once(DOKU_INC.'inc/parserutils.php'); 192 global $conf; 193 194 195 // gather some additional info from changelog 196 $info = io_grep($conf['changelog'], 197 '/^(\d+)\t(\d+\.\d+\.\d+\.\d+)\t'.preg_quote($ID,'/').'\t([^\t]+)\t([^\t\n]+)/', 198 0,true); 199 200 $meta = array(); 201 if(count($info)){ 202 $meta['date']['created'] = $info[0][1]; 203 foreach($info as $item){ 204 if($item[4] != '*'){ 205 $meta['date']['modified'] = $item[1]; 206 if($item[3]){ 207 $meta['contributor'][$item[3]] = $item[3]; 208 } 209 } 210 } 211 } 212 213 $meta = p_render_metadata($ID, $meta); 214 io_saveFile($file, serialize($meta)); 215 216 echo "metaUpdate(): finished".NL; 217 return true; 218} 219 220/** 221 * Builds a Google Sitemap of all public pages known to the indexer 222 * 223 * The map is placed in the root directory named sitemap.xml.gz - This 224 * file needs to be writable! 225 * 226 * @author Andreas Gohr 227 * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html 228 */ 229function runSitemapper(){ 230 global $conf; 231 print "runSitemapper(): started".NL; 232 if(!$conf['sitemap']) return false; 233 234 if($conf['compression'] == 'bz2' || $conf['compression'] == 'gz'){ 235 $sitemap = 'sitemap.xml.gz'; 236 }else{ 237 $sitemap = 'sitemap.xml'; 238 } 239 print "runSitemapper(): using $sitemap".NL; 240 241 if(!is_writable(DOKU_INC.$sitemap)) return false; 242 if(@filesize(DOKU_INC.$sitemap) && 243 @filemtime(DOKU_INC.$sitemap) > (time()-($conf['sitemap']*60*60*24))){ 244 print 'runSitemapper(): Sitemap up to date'.NL; 245 return false; 246 } 247 248 $pages = file($conf['indexdir'].'/page.idx'); 249 print 'runSitemapper(): creating sitemap using '.count($pages).' pages'.NL; 250 251 // build the sitemap 252 ob_start(); 253 print '<?xml version="1.0" encoding="UTF-8"?>'.NL; 254 print '<urlset xmlns="http://www.google.com/schemas/sitemap/0.84">'.NL; 255 foreach($pages as $id){ 256 $id = trim($id); 257 $file = wikiFN($id); 258 259 //skip hidden, non existing and restricted files 260 if(isHiddenPage($id)) continue; 261 $date = @filemtime($file); 262 if(!$date) continue; 263 if(auth_aclcheck($id,'','') < AUTH_READ) continue; 264 265 print ' <url>'.NL; 266 print ' <loc>'.wl($id,'',true).'</loc>'.NL; 267 print ' <lastmod>'.date_iso8601($date).'</lastmod>'.NL; 268 print ' </url>'.NL; 269 } 270 print '</urlset>'.NL; 271 $data = ob_get_contents(); 272 ob_end_clean(); 273 274 //save the new sitemap 275 io_saveFile(DOKU_INC.$sitemap,$data); 276 277 print 'runSitemapper(): pinging google'.NL; 278 //ping google 279 $url = 'http://www.google.com/webmasters/sitemaps/ping?sitemap='; 280 $url .= urlencode(DOKU_URL.$sitemap); 281 $http = new DokuHTTPClient(); 282 $http->get($url); 283 if($http->error) print 'runSitemapper(): '.$http->error.NL; 284 285 print 'runSitemapper(): finished'.NL; 286 return true; 287} 288 289/** 290 * Formats a timestamp as ISO 8601 date 291 * 292 * @author <ungu at terong dot com> 293 * @link http://www.php.net/manual/en/function.date.php#54072 294 */ 295function date_iso8601($int_date) { 296 //$int_date: current date in UNIX timestamp 297 $date_mod = date('Y-m-d\TH:i:s', $int_date); 298 $pre_timezone = date('O', $int_date); 299 $time_zone = substr($pre_timezone, 0, 3).":".substr($pre_timezone, 3, 2); 300 $date_mod .= $time_zone; 301 return $date_mod; 302} 303 304/** 305 * Just send a 1x1 pixel blank gif to the browser 306 * 307 * @author Andreas Gohr <andi@splitbrain.org> 308 * @author Harry Fuecks <fuecks@gmail.com> 309 */ 310function sendGIF(){ 311 if($_REQUEST['debug']){ 312 header('Content-Type: text/plain'); 313 return; 314 } 315 $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7'); 316 header('Content-Type: image/gif'); 317 header('Content-Length: '.strlen($img)); 318 header('Connection: Close'); 319 print $img; 320 flush(); 321 // Browser should drop connection after this 322 // Thinks it's got the whole image 323} 324 325//Setup VIM: ex: et ts=4 enc=utf-8 : 326// No trailing PHP closing tag - no output please! 327// See Note at http://www.php.net/manual/en/language.basic-syntax.instruction-separation.php 328