1<?php 2/** 3 * DokuWiki indexer 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../../').'/'); 9require_once(DOKU_INC.'inc/init.php'); 10require_once(DOKU_INC.'inc/auth.php'); 11require_once(DOKU_INC.'inc/events.php'); 12session_write_close(); //close session 13if(!defined('NL')) define('NL',"\n"); 14 15// keep running after browser closes connection 16@ignore_user_abort(true); 17 18// check if user abort worked, if yes send output early 19if(@ignore_user_abort()){ 20 sendGIF(); // send gif 21 $defer = false; 22}else{ 23 $defer = true; 24} 25 26// Catch any possible output (e.g. errors) 27if(!$_REQUEST['debug']) ob_start(); 28 29// run one of the jobs 30runIndexer() or metaUpdate() or runSitemapper() or runTrimRecentChanges(); 31if($defer) sendGIF(); 32 33if(!$_REQUEST['debug']) ob_end_clean(); 34exit; 35 36// -------------------------------------------------------------------- 37 38/** 39 * Trims the recent changes cache (or imports the old changelog) as needed. 40 * 41 * @author Ben Coburn <btcoburn@silicodon.net> 42 */ 43function runTrimRecentChanges() { 44 global $conf; 45 46 // Import old changelog (if needed) 47 // Uses the imporoldchangelog plugin to upgrade the changelog automaticaly. 48 // FIXME: Remove this from runTrimRecentChanges when it is no longer needed. 49 if (isset($conf['changelog_old']) && 50 @file_exists($conf['changelog_old']) && !@file_exists($conf['changelog']) && 51 !@file_exists($conf['changelog'].'_importing') && !@file_exists($conf['changelog'].'_tmp')) { 52 $tmp = array(); // no event data 53 trigger_event('TEMPORARY_CHANGELOG_UPGRADE_EVENT', $tmp); 54 return true; 55 } 56 57 // Trim the Recent Changes 58 // Trims the recent changes cache to the last $conf['changes_days'] recent 59 // changes or $conf['recent'] items, which ever is larger. 60 // The trimming is only done once a day. 61 if (@file_exists($conf['changelog']) && 62 (filectime($conf['changelog'])+86400)<time() && 63 !@file_exists($conf['changelog'].'_tmp')) { 64 io_lock($conf['changelog']); 65 $lines = file($conf['changelog']); 66 if (count($lines)<$conf['recent']) { 67 // nothing to trim 68 io_unlock($conf['changelog']); 69 return true; 70 } 71 // trim changelog 72 io_saveFile($conf['changelog'].'_tmp', ''); // presave tmp as 2nd lock 73 $kept = 0; 74 $trim_time = time() - $conf['recent_days']*86400; 75 $out_lines = array(); 76 // check lines from newest to oldest 77 for ($i = count($lines)-1; $i >= 0; $i--) { 78 $tmp = parseChangelogLine($lines[$i]); 79 if ($tmp===false) { continue; } 80 if ($tmp['date']>$trim_time || $kept<$conf['recent']) { 81 array_push($out_lines, implode("\t", $tmp)."\n"); 82 $kept++; 83 } else { 84 // no more lines worth keeping 85 break; 86 } 87 } 88 io_saveFile($conf['changelog'].'_tmp', implode('', $out_lines)); 89 @unlink($conf['changelog']); 90 if (!rename($conf['changelog'].'_tmp', $conf['changelog'])) { 91 // rename failed so try another way... 92 io_unlock($conf['changelog']); 93 io_saveFile($conf['changelog'], implode('', $out_lines)); 94 @unlink($conf['changelog'].'_tmp'); 95 } else { 96 io_unlock($conf['changelog']); 97 } 98 return true; 99 } 100 101 // nothing done 102 return false; 103} 104 105/** 106 * Runs the indexer for the current page 107 * 108 * @author Andreas Gohr <andi@splitbrain.org> 109 */ 110function runIndexer(){ 111 global $conf; 112 print "runIndexer(): started".NL; 113 114 $ID = cleanID($_REQUEST['id']); 115 if(!$ID) return false; 116 117 // check if indexing needed 118 $last = @filemtime(metaFN($ID,'.indexed')); 119 if($last > @filemtime(wikiFN($ID))){ 120 print "runIndexer(): index for $ID up to date".NL; 121 return false; 122 } 123 124 // try to aquire a lock 125 $lock = $conf['lockdir'].'/_indexer.lock'; 126 while(!@mkdir($lock,$conf['dmode'])){ 127 usleep(50); 128 if(time()-@filemtime($lock) > 60*5){ 129 // looks like a stale lock - remove it 130 @rmdir($lock); 131 print "runIndexer(): stale lock removed".NL; 132 }else{ 133 print "runIndexer(): indexer locked".NL; 134 return false; 135 } 136 } 137 if($conf['dperm']) chmod($lock, $conf['dperm']); 138 139 require_once(DOKU_INC.'inc/indexer.php'); 140 141 // do the work 142 idx_addPage($ID); 143 144 // we're finished - save and free lock 145 io_saveFile(metaFN($ID,'.indexed'),' '); 146 @rmdir($lock); 147 print "runIndexer(): finished".NL; 148 return true; 149} 150 151/** 152 * Will render the metadata for the page if not exists yet 153 * 154 * This makes sure pages which are created from outside DokuWiki will 155 * gain their data when viewed for the first time. 156 */ 157function metaUpdate(){ 158 print "metaUpdate(): started".NL; 159 160 $ID = cleanID($_REQUEST['id']); 161 if(!$ID) return false; 162 $file = metaFN($ID, '.meta'); 163 echo "meta file: $file".NL; 164 165 // rendering needed? 166 if (@file_exists($file)) return false; 167 if (!@file_exists(wikiFN($ID))) return false; 168 169 require_once(DOKU_INC.'inc/common.php'); 170 require_once(DOKU_INC.'inc/parserutils.php'); 171 global $conf; 172 173 174 // gather some additional info from changelog 175 $info = io_grep($conf['changelog'], 176 '/^(\d+)\t(\d+\.\d+\.\d+\.\d+)\t'.preg_quote($ID,'/').'\t([^\t]+)\t([^\t\n]+)/', 177 0,true); 178 179 $meta = array(); 180 if(count($info)){ 181 $meta['date']['created'] = $info[0][1]; 182 foreach($info as $item){ 183 if($item[4] != '*'){ 184 $meta['date']['modified'] = $item[1]; 185 if($item[3]){ 186 $meta['contributor'][$item[3]] = $item[3]; 187 } 188 } 189 } 190 } 191 192 $meta = p_render_metadata($ID, $meta); 193 io_saveFile($file, serialize($meta)); 194 195 echo "metaUpdate(): finished".NL; 196 return true; 197} 198 199/** 200 * Builds a Google Sitemap of all public pages known to the indexer 201 * 202 * The map is placed in the root directory named sitemap.xml.gz - This 203 * file needs to be writable! 204 * 205 * @author Andreas Gohr 206 * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html 207 */ 208function runSitemapper(){ 209 global $conf; 210 print "runSitemapper(): started".NL; 211 if(!$conf['sitemap']) return false; 212 213 if($conf['usegzip']){ 214 $sitemap = 'sitemap.xml.gz'; 215 }else{ 216 $sitemap = 'sitemap.xml'; 217 } 218 print "runSitemapper(): using $sitemap".NL; 219 220 if(!is_writable(DOKU_INC.$sitemap)) return false; 221 if(@filesize(DOKU_INC.$sitemap) && 222 @filemtime(DOKU_INC.$sitemap) > (time()-($conf['sitemap']*60*60*24))){ 223 print 'runSitemapper(): Sitemap up to date'.NL; 224 return false; 225 } 226 227 $pages = file($conf['cachedir'].'/page.idx'); 228 print 'runSitemapper(): creating sitemap using '.count($pages).' pages'.NL; 229 230 // build the sitemap 231 ob_start(); 232 print '<?xml version="1.0" encoding="UTF-8"?>'.NL; 233 print '<urlset xmlns="http://www.google.com/schemas/sitemap/0.84">'.NL; 234 foreach($pages as $id){ 235 $id = trim($id); 236 $file = wikiFN($id); 237 238 //skip hidden, non existing and restricted files 239 if(isHiddenPage($id)) continue; 240 $date = @filemtime($file); 241 if(!$date) continue; 242 if(auth_aclcheck($id,'','') < AUTH_READ) continue; 243 244 print ' <url>'.NL; 245 print ' <loc>'.wl($id,'',true).'</loc>'.NL; 246 print ' <lastmod>'.date_iso8601($date).'</lastmod>'.NL; 247 print ' </url>'.NL; 248 } 249 print '</urlset>'.NL; 250 $data = ob_get_contents(); 251 ob_end_clean(); 252 253 //save the new sitemap 254 io_saveFile(DOKU_INC.$sitemap,$data); 255 256 print 'runSitemapper(): pinging google'.NL; 257 //ping google 258 $url = 'http://www.google.com/webmasters/sitemaps/ping?sitemap='; 259 $url .= urlencode(DOKU_URL.$sitemap); 260 $http = new DokuHTTPClient(); 261 $http->get($url); 262 if($http->error) print 'runSitemapper(): '.$http->error.NL; 263 264 print 'runSitemapper(): finished'.NL; 265 return true; 266} 267 268/** 269 * Formats a timestamp as ISO 8601 date 270 * 271 * @author <ungu at terong dot com> 272 * @link http://www.php.net/manual/en/function.date.php#54072 273 */ 274function date_iso8601($int_date) { 275 //$int_date: current date in UNIX timestamp 276 $date_mod = date('Y-m-d\TH:i:s', $int_date); 277 $pre_timezone = date('O', $int_date); 278 $time_zone = substr($pre_timezone, 0, 3).":".substr($pre_timezone, 3, 2); 279 $date_mod .= $time_zone; 280 return $date_mod; 281} 282 283/** 284 * Just send a 1x1 pixel blank gif to the browser 285 * 286 * @author Andreas Gohr <andi@splitbrain.org> 287 * @author Harry Fuecks <fuecks@gmail.com> 288 */ 289function sendGIF(){ 290 if($_REQUEST['debug']){ 291 header('Content-Type: text/plain'); 292 return; 293 } 294 $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7'); 295 header('Content-Type: image/gif'); 296 header('Content-Length: '.strlen($img)); 297 header('Connection: Close'); 298 print $img; 299 flush(); 300 // Browser should drop connection after this 301 // Thinks it's got the whole image 302} 303 304//Setup VIM: ex: et ts=4 enc=utf-8 : 305// No trailing PHP closing tag - no output please! 306// See Note at http://www.php.net/manual/en/language.basic-syntax.instruction-separation.php 307