1<?php 2/** 3 * DokuWiki indexer 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8if(!defined('DOKU_INC')) define('DOKU_INC',dirname(__FILE__).'/../../'); 9define('DOKU_DISABLE_GZIP_OUTPUT',1); 10require_once(DOKU_INC.'inc/init.php'); 11session_write_close(); //close session 12if(!defined('NL')) define('NL',"\n"); 13 14// keep running after browser closes connection 15@ignore_user_abort(true); 16 17// check if user abort worked, if yes send output early 18$defer = !@ignore_user_abort() || $conf['broken_iua']; 19if(!$defer){ 20 sendGIF(); // send gif 21} 22 23$ID = cleanID($_REQUEST['id']); 24 25// Catch any possible output (e.g. errors) 26$output = isset($_REQUEST['debug']) && $conf['allowdebug']; 27if(!$output) ob_start(); 28 29// run one of the jobs 30$tmp = array(); // No event data 31$evt = new Doku_Event('INDEXER_TASKS_RUN', $tmp); 32if ($evt->advise_before()) { 33 runIndexer() or 34 metaUpdate() or 35 runSitemapper() or 36 sendDigest() or 37 runTrimRecentChanges() or 38 runTrimRecentChanges(true) or 39 $evt->advise_after(); 40} 41if($defer) sendGIF(); 42 43if(!$output) ob_end_clean(); 44exit; 45 46// -------------------------------------------------------------------- 47 48/** 49 * Trims the recent changes cache (or imports the old changelog) as needed. 50 * 51 * @param media_changes If the media changelog shall be trimmed instead of 52 * the page changelog 53 * 54 * @author Ben Coburn <btcoburn@silicodon.net> 55 */ 56function runTrimRecentChanges($media_changes = false) { 57 global $conf; 58 59 $fn = ($media_changes ? $conf['media_changelog'] : $conf['changelog']); 60 61 // Trim the Recent Changes 62 // Trims the recent changes cache to the last $conf['changes_days'] recent 63 // changes or $conf['recent'] items, which ever is larger. 64 // The trimming is only done once a day. 65 if (@file_exists($fn) && 66 (@filemtime($fn.'.trimmed')+86400)<time() && 67 !@file_exists($fn.'_tmp')) { 68 @touch($fn.'.trimmed'); 69 io_lock($fn); 70 $lines = file($fn); 71 if (count($lines)<=$conf['recent']) { 72 // nothing to trim 73 io_unlock($fn); 74 return false; 75 } 76 77 io_saveFile($fn.'_tmp', ''); // presave tmp as 2nd lock 78 $trim_time = time() - $conf['recent_days']*86400; 79 $out_lines = array(); 80 81 for ($i=0; $i<count($lines); $i++) { 82 $log = parseChangelogLine($lines[$i]); 83 if ($log === false) continue; // discard junk 84 if ($log['date'] < $trim_time) { 85 $old_lines[$log['date'].".$i"] = $lines[$i]; // keep old lines for now (append .$i to prevent key collisions) 86 } else { 87 $out_lines[$log['date'].".$i"] = $lines[$i]; // definitely keep these lines 88 } 89 } 90 91 if (count($lines)==count($out_lines)) { 92 // nothing to trim 93 @unlink($fn.'_tmp'); 94 io_unlock($fn); 95 return false; 96 } 97 98 // sort the final result, it shouldn't be necessary, 99 // however the extra robustness in making the changelog cache self-correcting is worth it 100 ksort($out_lines); 101 $extra = $conf['recent'] - count($out_lines); // do we need extra lines do bring us up to minimum 102 if ($extra > 0) { 103 ksort($old_lines); 104 $out_lines = array_merge(array_slice($old_lines,-$extra),$out_lines); 105 } 106 107 // save trimmed changelog 108 io_saveFile($fn.'_tmp', implode('', $out_lines)); 109 @unlink($fn); 110 if (!rename($fn.'_tmp', $fn)) { 111 // rename failed so try another way... 112 io_unlock($fn); 113 io_saveFile($fn, implode('', $out_lines)); 114 @unlink($fn.'_tmp'); 115 } else { 116 io_unlock($fn); 117 } 118 return true; 119 } 120 121 // nothing done 122 return false; 123} 124 125/** 126 * Runs the indexer for the current page 127 * 128 * @author Andreas Gohr <andi@splitbrain.org> 129 */ 130function runIndexer(){ 131 global $ID; 132 global $conf; 133 print "runIndexer(): started".NL; 134 135 if(!$ID) return false; 136 137 // do the work 138 return idx_addPage($ID, true); 139} 140 141/** 142 * Will render the metadata for the page if not exists yet 143 * 144 * This makes sure pages which are created from outside DokuWiki will 145 * gain their data when viewed for the first time. 146 */ 147function metaUpdate(){ 148 global $ID; 149 print "metaUpdate(): started".NL; 150 151 if(!$ID) return false; 152 $file = metaFN($ID, '.meta'); 153 echo "meta file: $file".NL; 154 155 // rendering needed? 156 if (@file_exists($file)) return false; 157 if (!page_exists($ID)) return false; 158 159 global $conf; 160 161 // gather some additional info from changelog 162 $info = io_grep($conf['changelog'], 163 '/^(\d+)\t(\d+\.\d+\.\d+\.\d+)\t'.preg_quote($ID,'/').'\t([^\t]+)\t([^\t\n]+)/', 164 0,true); 165 166 $meta = array(); 167 if(!empty($info)){ 168 $meta['date']['created'] = $info[0][1]; 169 foreach($info as $item){ 170 if($item[4] != '*'){ 171 $meta['date']['modified'] = $item[1]; 172 if($item[3]){ 173 $meta['contributor'][$item[3]] = $item[3]; 174 } 175 } 176 } 177 } 178 179 $meta = p_render_metadata($ID, $meta); 180 p_save_metadata($ID, $meta); 181 182 echo "metaUpdate(): finished".NL; 183 return true; 184} 185 186/** 187 * Builds a Google Sitemap of all public pages known to the indexer 188 * 189 * The map is placed in the root directory named sitemap.xml.gz - This 190 * file needs to be writable! 191 * 192 * @author Andreas Gohr 193 * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html 194 */ 195function runSitemapper(){ 196 print "runSitemapper(): started".NL; 197 $result = Sitemapper::generate() && Sitemapper::pingSearchEngines(); 198 print 'runSitemapper(): finished'.NL; 199 return $result; 200} 201 202/** 203 * Send digest and list mails for all subscriptions which are in effect for the 204 * current page 205 * 206 * @author Adrian Lang <lang@cosmocode.de> 207 */ 208function sendDigest() { 209 echo 'sendDigest(): start'.NL; 210 global $ID; 211 global $conf; 212 if (!$conf['subscribers']) { 213 return; 214 } 215 $subscriptions = subscription_find($ID, array('style' => '(digest|list)', 216 'escaped' => true)); 217 global $auth; 218 global $lang; 219 global $conf; 220 global $USERINFO; 221 222 // remember current user info 223 $olduinfo = $USERINFO; 224 $olduser = $_SERVER['REMOTE_USER']; 225 226 foreach($subscriptions as $id => $users) { 227 if (!subscription_lock($id)) { 228 continue; 229 } 230 foreach($users as $data) { 231 list($user, $style, $lastupdate) = $data; 232 $lastupdate = (int) $lastupdate; 233 if ($lastupdate + $conf['subscribe_time'] > time()) { 234 // Less than the configured time period passed since last 235 // update. 236 continue; 237 } 238 239 // Work as the user to make sure ACLs apply correctly 240 $USERINFO = $auth->getUserData($user); 241 $_SERVER['REMOTE_USER'] = $user; 242 if ($USERINFO === false) { 243 continue; 244 } 245 246 if (substr($id, -1, 1) === ':') { 247 // The subscription target is a namespace 248 $changes = getRecentsSince($lastupdate, null, getNS($id)); 249 } else { 250 if(auth_quickaclcheck($id) < AUTH_READ) continue; 251 252 $meta = p_get_metadata($id); 253 $changes = array($meta['last_change']); 254 } 255 256 // Filter out pages only changed in small and own edits 257 $change_ids = array(); 258 foreach($changes as $rev) { 259 $n = 0; 260 while (!is_null($rev) && $rev['date'] >= $lastupdate && 261 ($_SERVER['REMOTE_USER'] === $rev['user'] || 262 $rev['type'] === DOKU_CHANGE_TYPE_MINOR_EDIT)) { 263 $rev = getRevisions($rev['id'], $n++, 1); 264 $rev = (count($rev) > 0) ? $rev[0] : null; 265 } 266 267 if (!is_null($rev) && $rev['date'] >= $lastupdate) { 268 // Some change was not a minor one and not by myself 269 $change_ids[] = $rev['id']; 270 } 271 } 272 273 if ($style === 'digest') { 274 foreach($change_ids as $change_id) { 275 subscription_send_digest($USERINFO['mail'], $change_id, 276 $lastupdate); 277 } 278 } elseif ($style === 'list') { 279 subscription_send_list($USERINFO['mail'], $change_ids, $id); 280 } 281 // TODO: Handle duplicate subscriptions. 282 283 // Update notification time. 284 subscription_set($user, $id, $style, time(), true); 285 } 286 subscription_unlock($id); 287 } 288 289 // restore current user info 290 $USERINFO = $olduinfo; 291 $_SERVER['REMOTE_USER'] = $olduser; 292} 293 294/** 295 * Just send a 1x1 pixel blank gif to the browser 296 * 297 * @author Andreas Gohr <andi@splitbrain.org> 298 * @author Harry Fuecks <fuecks@gmail.com> 299 */ 300function sendGIF(){ 301 if(isset($_REQUEST['debug'])){ 302 header('Content-Type: text/plain'); 303 return; 304 } 305 $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7'); 306 header('Content-Type: image/gif'); 307 header('Content-Length: '.strlen($img)); 308 header('Connection: Close'); 309 print $img; 310 flush(); 311 // Browser should drop connection after this 312 // Thinks it's got the whole image 313} 314 315//Setup VIM: ex: et ts=4 enc=utf-8 : 316// No trailing PHP closing tag - no output please! 317// See Note at http://www.php.net/manual/en/language.basic-syntax.instruction-separation.php 318