1<?php 2/** 3 * DokuWiki indexer 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../../').'/'); 9define('DOKU_DISABLE_GZIP_OUTPUT',1); 10require_once(DOKU_INC.'inc/init.php'); 11require_once(DOKU_INC.'inc/auth.php'); 12require_once(DOKU_INC.'inc/events.php'); 13session_write_close(); //close session 14if(!defined('NL')) define('NL',"\n"); 15 16// keep running after browser closes connection 17@ignore_user_abort(true); 18 19// check if user abort worked, if yes send output early 20if(@ignore_user_abort()){ 21 sendGIF(); // send gif 22 $defer = false; 23}else{ 24 $defer = true; 25} 26 27// Catch any possible output (e.g. errors) 28if(!$_REQUEST['debug']) ob_start(); 29 30// run one of the jobs 31runIndexer() or metaUpdate() or runSitemapper() or runTrimRecentChanges(); 32if($defer) sendGIF(); 33 34if(!$_REQUEST['debug']) ob_end_clean(); 35exit; 36 37// -------------------------------------------------------------------- 38 39/** 40 * Trims the recent changes cache (or imports the old changelog) as needed. 41 * 42 * @author Ben Coburn <btcoburn@silicodon.net> 43 */ 44function runTrimRecentChanges() { 45 global $conf; 46 47 // Import old changelog (if needed) 48 // Uses the imporoldchangelog plugin to upgrade the changelog automaticaly. 49 // FIXME: Remove this from runTrimRecentChanges when it is no longer needed. 50 if (isset($conf['changelog_old']) && 51 @file_exists($conf['changelog_old']) && !@file_exists($conf['changelog']) && 52 !@file_exists($conf['changelog'].'_importing') && !@file_exists($conf['changelog'].'_tmp')) { 53 $tmp = array(); // no event data 54 trigger_event('TEMPORARY_CHANGELOG_UPGRADE_EVENT', $tmp); 55 return true; 56 } 57 58 // Trim the Recent Changes 59 // Trims the recent changes cache to the last $conf['changes_days'] recent 60 // changes or $conf['recent'] items, which ever is larger. 61 // The trimming is only done once a day. 62 if (@file_exists($conf['changelog']) && 63 (filectime($conf['changelog'])+86400)<time() && 64 !@file_exists($conf['changelog'].'_tmp')) { 65 io_lock($conf['changelog']); 66 $lines = file($conf['changelog']); 67 if (count($lines)<$conf['recent']) { 68 // nothing to trim 69 io_unlock($conf['changelog']); 70 return true; 71 } 72 // trim changelog 73 io_saveFile($conf['changelog'].'_tmp', ''); // presave tmp as 2nd lock 74 $kept = 0; 75 $trim_time = time() - $conf['recent_days']*86400; 76 $out_lines = array(); 77 // check lines from newest to oldest 78 for ($i = count($lines)-1; $i >= 0; $i--) { 79 $tmp = parseChangelogLine($lines[$i]); 80 if ($tmp===false) { continue; } 81 if ($tmp['date']>$trim_time || $kept<$conf['recent']) { 82 array_push($out_lines, implode("\t", $tmp)."\n"); 83 $kept++; 84 } else { 85 // no more lines worth keeping 86 break; 87 } 88 } 89 io_saveFile($conf['changelog'].'_tmp', implode('', $out_lines)); 90 @unlink($conf['changelog']); 91 if (!rename($conf['changelog'].'_tmp', $conf['changelog'])) { 92 // rename failed so try another way... 93 io_unlock($conf['changelog']); 94 io_saveFile($conf['changelog'], implode('', $out_lines)); 95 @unlink($conf['changelog'].'_tmp'); 96 } else { 97 io_unlock($conf['changelog']); 98 } 99 return true; 100 } 101 102 // nothing done 103 return false; 104} 105 106/** 107 * Runs the indexer for the current page 108 * 109 * @author Andreas Gohr <andi@splitbrain.org> 110 */ 111function runIndexer(){ 112 global $conf; 113 print "runIndexer(): started".NL; 114 115 $ID = cleanID($_REQUEST['id']); 116 if(!$ID) return false; 117 118 // check if indexing needed 119 $last = @filemtime(metaFN($ID,'.indexed')); 120 if($last > @filemtime(wikiFN($ID))){ 121 print "runIndexer(): index for $ID up to date".NL; 122 return false; 123 } 124 125 // try to aquire a lock 126 $lock = $conf['lockdir'].'/_indexer.lock'; 127 while(!@mkdir($lock,$conf['dmode'])){ 128 usleep(50); 129 if(time()-@filemtime($lock) > 60*5){ 130 // looks like a stale lock - remove it 131 @rmdir($lock); 132 print "runIndexer(): stale lock removed".NL; 133 }else{ 134 print "runIndexer(): indexer locked".NL; 135 return false; 136 } 137 } 138 if($conf['dperm']) chmod($lock, $conf['dperm']); 139 140 require_once(DOKU_INC.'inc/indexer.php'); 141 142 // do the work 143 idx_addPage($ID); 144 145 // we're finished - save and free lock 146 io_saveFile(metaFN($ID,'.indexed'),' '); 147 @rmdir($lock); 148 print "runIndexer(): finished".NL; 149 return true; 150} 151 152/** 153 * Will render the metadata for the page if not exists yet 154 * 155 * This makes sure pages which are created from outside DokuWiki will 156 * gain their data when viewed for the first time. 157 */ 158function metaUpdate(){ 159 print "metaUpdate(): started".NL; 160 161 $ID = cleanID($_REQUEST['id']); 162 if(!$ID) return false; 163 $file = metaFN($ID, '.meta'); 164 echo "meta file: $file".NL; 165 166 // rendering needed? 167 if (@file_exists($file)) return false; 168 if (!@file_exists(wikiFN($ID))) return false; 169 170 require_once(DOKU_INC.'inc/common.php'); 171 require_once(DOKU_INC.'inc/parserutils.php'); 172 global $conf; 173 174 175 // gather some additional info from changelog 176 $info = io_grep($conf['changelog'], 177 '/^(\d+)\t(\d+\.\d+\.\d+\.\d+)\t'.preg_quote($ID,'/').'\t([^\t]+)\t([^\t\n]+)/', 178 0,true); 179 180 $meta = array(); 181 if(count($info)){ 182 $meta['date']['created'] = $info[0][1]; 183 foreach($info as $item){ 184 if($item[4] != '*'){ 185 $meta['date']['modified'] = $item[1]; 186 if($item[3]){ 187 $meta['contributor'][$item[3]] = $item[3]; 188 } 189 } 190 } 191 } 192 193 $meta = p_render_metadata($ID, $meta); 194 io_saveFile($file, serialize($meta)); 195 196 echo "metaUpdate(): finished".NL; 197 return true; 198} 199 200/** 201 * Builds a Google Sitemap of all public pages known to the indexer 202 * 203 * The map is placed in the root directory named sitemap.xml.gz - This 204 * file needs to be writable! 205 * 206 * @author Andreas Gohr 207 * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html 208 */ 209function runSitemapper(){ 210 global $conf; 211 print "runSitemapper(): started".NL; 212 if(!$conf['sitemap']) return false; 213 214 if($conf['usegzip']){ 215 $sitemap = 'sitemap.xml.gz'; 216 }else{ 217 $sitemap = 'sitemap.xml'; 218 } 219 print "runSitemapper(): using $sitemap".NL; 220 221 if(!is_writable(DOKU_INC.$sitemap)) return false; 222 if(@filesize(DOKU_INC.$sitemap) && 223 @filemtime(DOKU_INC.$sitemap) > (time()-($conf['sitemap']*60*60*24))){ 224 print 'runSitemapper(): Sitemap up to date'.NL; 225 return false; 226 } 227 228 $pages = file($conf['cachedir'].'/page.idx'); 229 print 'runSitemapper(): creating sitemap using '.count($pages).' pages'.NL; 230 231 // build the sitemap 232 ob_start(); 233 print '<?xml version="1.0" encoding="UTF-8"?>'.NL; 234 print '<urlset xmlns="http://www.google.com/schemas/sitemap/0.84">'.NL; 235 foreach($pages as $id){ 236 $id = trim($id); 237 $file = wikiFN($id); 238 239 //skip hidden, non existing and restricted files 240 if(isHiddenPage($id)) continue; 241 $date = @filemtime($file); 242 if(!$date) continue; 243 if(auth_aclcheck($id,'','') < AUTH_READ) continue; 244 245 print ' <url>'.NL; 246 print ' <loc>'.wl($id,'',true).'</loc>'.NL; 247 print ' <lastmod>'.date_iso8601($date).'</lastmod>'.NL; 248 print ' </url>'.NL; 249 } 250 print '</urlset>'.NL; 251 $data = ob_get_contents(); 252 ob_end_clean(); 253 254 //save the new sitemap 255 io_saveFile(DOKU_INC.$sitemap,$data); 256 257 print 'runSitemapper(): pinging google'.NL; 258 //ping google 259 $url = 'http://www.google.com/webmasters/sitemaps/ping?sitemap='; 260 $url .= urlencode(DOKU_URL.$sitemap); 261 $http = new DokuHTTPClient(); 262 $http->get($url); 263 if($http->error) print 'runSitemapper(): '.$http->error.NL; 264 265 print 'runSitemapper(): finished'.NL; 266 return true; 267} 268 269/** 270 * Formats a timestamp as ISO 8601 date 271 * 272 * @author <ungu at terong dot com> 273 * @link http://www.php.net/manual/en/function.date.php#54072 274 */ 275function date_iso8601($int_date) { 276 //$int_date: current date in UNIX timestamp 277 $date_mod = date('Y-m-d\TH:i:s', $int_date); 278 $pre_timezone = date('O', $int_date); 279 $time_zone = substr($pre_timezone, 0, 3).":".substr($pre_timezone, 3, 2); 280 $date_mod .= $time_zone; 281 return $date_mod; 282} 283 284/** 285 * Just send a 1x1 pixel blank gif to the browser 286 * 287 * @author Andreas Gohr <andi@splitbrain.org> 288 * @author Harry Fuecks <fuecks@gmail.com> 289 */ 290function sendGIF(){ 291 if($_REQUEST['debug']){ 292 header('Content-Type: text/plain'); 293 return; 294 } 295 $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7'); 296 header('Content-Type: image/gif'); 297 header('Content-Length: '.strlen($img)); 298 header('Connection: Close'); 299 print $img; 300 flush(); 301 // Browser should drop connection after this 302 // Thinks it's got the whole image 303} 304 305//Setup VIM: ex: et ts=4 enc=utf-8 : 306// No trailing PHP closing tag - no output please! 307// See Note at http://www.php.net/manual/en/language.basic-syntax.instruction-separation.php 308