1<?php 2/** 3 * DokuWiki indexer 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8if(!defined('DOKU_INC')) define('DOKU_INC',dirname(__FILE__).'/../../'); 9define('DOKU_DISABLE_GZIP_OUTPUT',1); 10require_once(DOKU_INC.'inc/init.php'); 11session_write_close(); //close session 12if(!defined('NL')) define('NL',"\n"); 13 14// keep running after browser closes connection 15@ignore_user_abort(true); 16 17// check if user abort worked, if yes send output early 18$defer = !@ignore_user_abort() || $conf['broken_iua']; 19if(!$defer){ 20 sendGIF(); // send gif 21} 22 23$ID = cleanID($INPUT->str('id')); 24 25// Catch any possible output (e.g. errors) 26$output = $INPUT->has('debug') && $conf['allowdebug']; 27if(!$output) ob_start(); 28 29// run one of the jobs 30$tmp = array(); // No event data 31$evt = new Doku_Event('INDEXER_TASKS_RUN', $tmp); 32if ($evt->advise_before()) { 33 runIndexer() or 34 runSitemapper() or 35 sendDigest() or 36 runTrimRecentChanges() or 37 runTrimRecentChanges(true) or 38 $evt->advise_after(); 39} 40 41if(!$output) { 42 ob_end_clean(); 43 if($defer) sendGIF(); 44} 45 46exit; 47 48// -------------------------------------------------------------------- 49 50/** 51 * Trims the recent changes cache (or imports the old changelog) as needed. 52 * 53 * @param media_changes If the media changelog shall be trimmed instead of 54 * the page changelog 55 * 56 * @author Ben Coburn <btcoburn@silicodon.net> 57 */ 58function runTrimRecentChanges($media_changes = false) { 59 global $conf; 60 61 echo "runTrimRecentChanges($media_changes): started".NL; 62 63 $fn = ($media_changes ? $conf['media_changelog'] : $conf['changelog']); 64 65 // Trim the Recent Changes 66 // Trims the recent changes cache to the last $conf['changes_days'] recent 67 // changes or $conf['recent'] items, which ever is larger. 68 // The trimming is only done once a day. 69 if (@file_exists($fn) && 70 (@filemtime($fn.'.trimmed')+86400)<time() && 71 !@file_exists($fn.'_tmp')) { 72 @touch($fn.'.trimmed'); 73 io_lock($fn); 74 $lines = file($fn); 75 if (count($lines)<=$conf['recent']) { 76 // nothing to trim 77 io_unlock($fn); 78 echo "runTrimRecentChanges($media_changes): finished".NL; 79 return false; 80 } 81 82 io_saveFile($fn.'_tmp', ''); // presave tmp as 2nd lock 83 $trim_time = time() - $conf['recent_days']*86400; 84 $out_lines = array(); 85 86 for ($i=0; $i<count($lines); $i++) { 87 $log = parseChangelogLine($lines[$i]); 88 if ($log === false) continue; // discard junk 89 if ($log['date'] < $trim_time) { 90 $old_lines[$log['date'].".$i"] = $lines[$i]; // keep old lines for now (append .$i to prevent key collisions) 91 } else { 92 $out_lines[$log['date'].".$i"] = $lines[$i]; // definitely keep these lines 93 } 94 } 95 96 if (count($lines)==count($out_lines)) { 97 // nothing to trim 98 @unlink($fn.'_tmp'); 99 io_unlock($fn); 100 echo "runTrimRecentChanges($media_changes): finished".NL; 101 return false; 102 } 103 104 // sort the final result, it shouldn't be necessary, 105 // however the extra robustness in making the changelog cache self-correcting is worth it 106 ksort($out_lines); 107 $extra = $conf['recent'] - count($out_lines); // do we need extra lines do bring us up to minimum 108 if ($extra > 0) { 109 ksort($old_lines); 110 $out_lines = array_merge(array_slice($old_lines,-$extra),$out_lines); 111 } 112 113 // save trimmed changelog 114 io_saveFile($fn.'_tmp', implode('', $out_lines)); 115 @unlink($fn); 116 if (!rename($fn.'_tmp', $fn)) { 117 // rename failed so try another way... 118 io_unlock($fn); 119 io_saveFile($fn, implode('', $out_lines)); 120 @unlink($fn.'_tmp'); 121 } else { 122 io_unlock($fn); 123 } 124 echo "runTrimRecentChanges($media_changes): finished".NL; 125 return true; 126 } 127 128 // nothing done 129 echo "runTrimRecentChanges($media_changes): finished".NL; 130 return false; 131} 132 133/** 134 * Runs the indexer for the current page 135 * 136 * @author Andreas Gohr <andi@splitbrain.org> 137 */ 138function runIndexer(){ 139 global $ID; 140 global $conf; 141 print "runIndexer(): started".NL; 142 143 if(!$ID) return false; 144 145 // do the work 146 return idx_addPage($ID, true); 147} 148 149/** 150 * Builds a Google Sitemap of all public pages known to the indexer 151 * 152 * The map is placed in the root directory named sitemap.xml.gz - This 153 * file needs to be writable! 154 * 155 * @author Andreas Gohr 156 * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html 157 */ 158function runSitemapper(){ 159 print "runSitemapper(): started".NL; 160 $result = Sitemapper::generate() && Sitemapper::pingSearchEngines(); 161 print 'runSitemapper(): finished'.NL; 162 return $result; 163} 164 165/** 166 * Send digest and list mails for all subscriptions which are in effect for the 167 * current page 168 * 169 * @author Adrian Lang <lang@cosmocode.de> 170 */ 171function sendDigest() { 172 echo 'sendDigest(): started'.NL; 173 global $ID; 174 global $conf; 175 if (!$conf['subscribers']) { 176 echo 'sendDigest(): disabled'.NL; 177 return false; 178 } 179 $subscriptions = subscription_find($ID, array('style' => '(digest|list)', 180 'escaped' => true)); 181 /** @var auth_basic $auth */ 182 global $auth; 183 global $lang; 184 global $conf; 185 global $USERINFO; 186 187 $sent = false; 188 189 // remember current user info 190 $olduinfo = $USERINFO; 191 $olduser = $_SERVER['REMOTE_USER']; 192 193 foreach($subscriptions as $id => $users) { 194 if (!subscription_lock($id)) { 195 continue; 196 } 197 foreach($users as $data) { 198 list($user, $style, $lastupdate) = $data; 199 $lastupdate = (int) $lastupdate; 200 if ($lastupdate + $conf['subscribe_time'] > time()) { 201 // Less than the configured time period passed since last 202 // update. 203 continue; 204 } 205 206 // Work as the user to make sure ACLs apply correctly 207 $USERINFO = $auth->getUserData($user); 208 $_SERVER['REMOTE_USER'] = $user; 209 if ($USERINFO === false) { 210 continue; 211 } 212 213 if (substr($id, -1, 1) === ':') { 214 // The subscription target is a namespace 215 $changes = getRecentsSince($lastupdate, null, getNS($id)); 216 } else { 217 if(auth_quickaclcheck($id) < AUTH_READ) continue; 218 219 $meta = p_get_metadata($id); 220 $changes = array($meta['last_change']); 221 } 222 223 // Filter out pages only changed in small and own edits 224 $change_ids = array(); 225 foreach($changes as $rev) { 226 $n = 0; 227 while (!is_null($rev) && $rev['date'] >= $lastupdate && 228 ($_SERVER['REMOTE_USER'] === $rev['user'] || 229 $rev['type'] === DOKU_CHANGE_TYPE_MINOR_EDIT)) { 230 $rev = getRevisions($rev['id'], $n++, 1); 231 $rev = (count($rev) > 0) ? $rev[0] : null; 232 } 233 234 if (!is_null($rev) && $rev['date'] >= $lastupdate) { 235 // Some change was not a minor one and not by myself 236 $change_ids[] = $rev['id']; 237 } 238 } 239 240 if ($style === 'digest') { 241 foreach($change_ids as $change_id) { 242 subscription_send_digest($USERINFO['mail'], $change_id, 243 $lastupdate); 244 $sent = true; 245 } 246 } elseif ($style === 'list') { 247 subscription_send_list($USERINFO['mail'], $change_ids, $id); 248 $sent = true; 249 } 250 // TODO: Handle duplicate subscriptions. 251 252 // Update notification time. 253 subscription_set($user, $id, $style, time(), true); 254 } 255 subscription_unlock($id); 256 } 257 258 // restore current user info 259 $USERINFO = $olduinfo; 260 $_SERVER['REMOTE_USER'] = $olduser; 261 echo 'sendDigest(): finished'.NL; 262 return $sent; 263} 264 265/** 266 * Just send a 1x1 pixel blank gif to the browser 267 * 268 * @author Andreas Gohr <andi@splitbrain.org> 269 * @author Harry Fuecks <fuecks@gmail.com> 270 */ 271function sendGIF(){ 272 global $INPUT; 273 if($INPUT->has('debug')){ 274 header('Content-Type: text/plain'); 275 return; 276 } 277 $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7'); 278 header('Content-Type: image/gif'); 279 header('Content-Length: '.strlen($img)); 280 header('Connection: Close'); 281 print $img; 282 flush(); 283 // Browser should drop connection after this 284 // Thinks it's got the whole image 285} 286 287//Setup VIM: ex: et ts=4 : 288// No trailing PHP closing tag - no output please! 289// See Note at http://www.php.net/manual/en/language.basic-syntax.instruction-separation.php 290