1<?php 2// must be run within Dokuwiki 3if(!defined('DOKU_INC')) die(); 4 5/** 6 * statdisplay plugin log helper component 7 * 8 * @author Andreas Gohr <gohr@cosmocode.de> 9 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 10 */ 11class helper_plugin_statdisplay_log extends DokuWiki_Plugin { 12 public $logdata = array(); 13 private $logcache = ''; 14 private $logfile = ''; 15 16 public $top_limit = 30; 17 18 /** 19 * Constructor 20 * 21 * Loads the cache 22 */ 23 public function __construct() { 24 global $conf; 25 $this->logfile = fullpath($conf['metadir'].'/'.$this->getConf('accesslog')); 26 // file not found? assume absolute path 27 if(!file_exists($this->logfile)) $this->logfile = $this->getConf('accesslog'); 28 29 // load the cache file 30 $this->logcache = getCacheName($this->getConf('accesslog'), '.statdisplay'); 31 if(file_exists($this->logcache)) { 32 $this->logdata = unserialize(io_readFile($this->logcache, false)); 33 } 34 } 35 36 /** 37 * Return the progress of the log analysis 38 * 39 * @return float 40 */ 41 public function progress() { 42 $pos = (int) $this->logdata['_logpos']; 43 $max = @filesize($this->logfile); 44 if(!$max) return 100.0; 45 46 return $pos * 100 / $max; 47 } 48 49 /** 50 * Parses the next chunk of logfile into our memory structure 51 */ 52 public function parseLogData() { 53 $size = filesize($this->logfile); 54 if(!$size) return 0; 55 56 // continue from last position 57 $pos = 0; 58 if(isset($this->logdata['_logpos'])) $pos = $this->logdata['_logpos']; 59 if($pos > $size) $pos = 0; 60 if($pos && ( ($size - $pos) < ($this->getConf('lines') * 150) )) return 0; // we want to have some minimal log data 61 62 if(!$this->lock()) return 0; 63 64 require_once(dirname(__FILE__).'/../Browser.php'); 65 66 // open handle 67 $fh = fopen($this->logfile, 'r'); 68 if(!$fh) return 0; 69 fseek($fh, $pos, SEEK_SET); 70 71 // read lines 72 $lines = 0; 73 while(feof($fh) == 0 && $lines < $this->getConf('lines')) { 74 $line = fgets($fh); 75 $lines++; 76 $pos += strlen($line); 77 78 if($line == '') continue; 79 80 $parts = explode(' ', $line); 81 $date = strtotime(trim($parts[3].' '.$parts[4], '[]')); 82 if(!$date) continue; 83 84 $month = date('Y-m', $date); 85 $day = date('d', $date); 86 $hour = date('G', $date); 87 list($url) = explode('?', $parts[6]); // strip GET vars 88 $status = $parts[8]; 89 $size = $parts[9]; 90 $user = trim($parts[2], '"-'); 91 92 if(!empty($user)){ 93 $user = $GLOBALS['auth']->cleanUser($user); 94 } 95 96 if($status == 200) { 97 $thistype = (substr($url, 0, 8) == '/_media/') ? 'media' : 'page'; 98 if($thistype == 'page') { 99 // for analyzing webserver logs we consider all known extensions as media files 100 list($ext) = mimetype($url); 101 if($ext !== false) $thistype = 'media'; 102 } 103 104 // remember IPs 105 $newvisitor = !isset($this->logdata[$month]['ip'][$parts[0]]); 106 $this->logdata[$month]['ip'][$parts[0]]++; 107 108 // log type dependent and summarized 109 foreach(array($thistype, 'hits') as $type) { 110 // we need these in perfect order 111 if(!isset($this->logdata[$month][$type]['hour'])) 112 $this->logdata[$month][$type]['hour'] = array_fill(0, 23, array()); 113 114 $this->logdata[$month][$type]['all']['count']++; 115 $this->logdata[$month][$type]['day'][$day]['count']++; 116 $this->logdata[$month][$type]['hour'][$hour]['count']++; 117 118 $this->logdata[$month][$type]['all']['bytes'] += $size; 119 $this->logdata[$month][$type]['day'][$day]['bytes'] += $size; 120 $this->logdata[$month][$type]['hour'][$hour]['bytes'] += $size; 121 122 if($user) { 123 $this->logdata[$month]['usertraffic'][$day][$user] += $size; 124 } 125 126 if($newvisitor) { 127 $this->logdata[$month][$type]['all']['visitor']++; 128 $this->logdata[$month][$type]['day'][$day]['visitor']++; 129 $this->logdata[$month][$type]['hour'][$hour]['visitor']++; 130 } 131 } 132 133 // log additional detailed data 134 if($thistype == 'page') { 135 // url 136 $this->logdata[$month]['page_url'][$url]++; 137 138 // referer 139 $referer = trim($parts[10], '"'); 140 // skip non valid and local referers 141 if(substr($referer, 0, 4) == 'http' && (strpos($referer, DOKU_URL) !== 0)) { 142 list($referer) = explode('?', $referer); 143 $this->logdata[$month]['referer']['count']++; 144 $this->logdata[$month]['referer_url'][$referer]++; 145 } 146 147 // entry page 148 if($newvisitor) { 149 $this->logdata[$month]['entry'][$url]++; 150 } 151 152 // user agent 153 $ua = trim(join(' ', array_slice($parts, 11)), '" '); 154 if($ua) { 155 $ua = $this->ua($ua); 156 $this->logdata[$month]['useragent'][$ua]++; 157 } 158 } 159 } else { 160 // count non-200 as a hit too 161 $this->logdata[$month]['hits']['all']['count']++; 162 $this->logdata[$month]['hits']['day'][$day]['count']++; 163 $this->logdata[$month]['hits']['hour'][$hour]['count']++; 164 } 165 166 $this->logdata[$month]['status']['all'][$status]++; 167 $this->logdata[$month]['status']['day'][$day][$status]++; 168 $this->logdata[$month]['status']['hour'][$hour][$status]++; 169 } 170 $this->logdata['_logpos'] = $pos; 171 172 // clean up the last month, freeing memory 173 if(isset($month) && $this->logdata['_lastmonth'] != $month) { 174 $this->clean_month($this->logdata['_lastmonth']); 175 $this->logdata['_lastmonth'] = $month; 176 } 177 178 // save the data 179 io_saveFile($this->logcache, serialize($this->logdata)); 180 $this->unlock(); 181 return $lines; 182 } 183 184 /** 185 * Clean up the backlog 186 * 187 * Shortens IPs, referers, entry pages, user agents etc. to preserve space and memory 188 * 189 * @param string $month where to clean up 190 */ 191 private function clean_month($month) { 192 if(!$month) return; 193 194 foreach(array('ip', 'page_url', 'referer_url', 'entry', 'useragent') as $type) { 195 if(is_array($this->logdata[$month][$type])) { 196 arsort($this->logdata[$month][$type]); 197 $this->logdata[$month][$type] = array_slice($this->logdata[$month][$type], 0, $this->top_limit); 198 } 199 } 200 } 201 202 /** 203 * Returns the common user agent name and version as a string 204 * 205 * @param $useragent 206 * @return string 207 */ 208 private function ua($useragent) { 209 $ua = new Browser($useragent); 210 list($version) = explode('.', $ua->getVersion()); 211 if(!$version) $version = ''; // no zero version 212 if($version == 'unknown') $version = ''; 213 return trim($ua->getBrowser().' '.$version); 214 } 215 216 /** 217 * Lock the the analysis process 218 * 219 * @author Tom N Harris <tnharris@whoopdedo.org> 220 */ 221 private function lock() { 222 global $conf; 223 $run = 0; 224 $lock = $conf['lockdir'].'/_statdisplay.lock'; 225 while(!@mkdir($lock, $conf['dmode'])) { 226 usleep(50); 227 if(is_dir($lock) && time() - @filemtime($lock) > 60 * 5) { 228 // looks like a stale lock - remove it 229 @rmdir($lock); 230 return false; 231 } elseif($run++ == 1000) { 232 // we waited 5 seconds for that lock 233 return false; 234 } 235 } 236 if($conf['dperm']) 237 chmod($lock, $conf['dperm']); 238 return true; 239 } 240 241 /** 242 * Unlock the the analysis process 243 * 244 * @author Tom N Harris <tnharris@whoopdedo.org> 245 */ 246 private function unlock() { 247 global $conf; 248 @rmdir($conf['lockdir'].'/_statdisplay.lock'); 249 return true; 250 } 251 252 /** 253 * Return the last 7 day's user traffic 254 * 255 * @param $date 256 * @return array 257 */ 258 public function usertraffic($date) { 259 if(!$date) $date = date('Y-m'); 260 261 $data = $this->logdata[$date]['usertraffic']; 262 $data = array_slice((array) $data, -7, 7, true); // limit to seven days 263 264 // add from previous month if needed 265 $num = count($data); 266 if($num < 7) { 267 $data += array_slice((array) $this->logdata[$this->prevmonth($date)]['usertraffic'], -1 * (7 - $num), 7 - $num, true); 268 } 269 270 // count up the traffic 271 $alltraffic = 0; 272 $usertraffic = array(); 273 foreach($data as $day => $info) { 274 foreach((array) $info as $user => $traffic) { 275 $usertraffic[$user] += $traffic; 276 $alltraffic += $traffic; 277 } 278 } 279 return $usertraffic; 280 } 281 282 /** 283 * Gives the sum of a certain column from the input array 284 * 285 * @param $input 286 * @param $key 287 * @return int 288 */ 289 public function sum($input, $key=null) { 290 $sum = 0; 291 foreach((array) $input as $item) { 292 if(is_null($key)){ 293 $val = $item; 294 }else{ 295 $val = $item[$key]; 296 } 297 $sum += $val; 298 } 299 300 return $sum; 301 } 302 303 /** 304 * Avarages a certain column from the input array 305 * 306 * @param $input 307 * @param $key 308 * @return float 309 */ 310 public function avg($input, $key=null) { 311 $cnt = 0; 312 $all = 0; 313 foreach((array) $input as $item) { 314 if(is_null($key)){ 315 $all += $item; 316 }else{ 317 $all += $item[$key]; 318 } 319 $cnt++; 320 } 321 322 if(!$cnt) return 0; 323 return $all / $cnt; 324 } 325 326 /** 327 * Gives maximum of a certain column from the input array 328 * 329 * @param $input 330 * @param $key 331 * @return int 332 */ 333 public function max($input, $key=null) { 334 $max = 0; 335 foreach((array) $input as $item) { 336 if(is_null($key)){ 337 $val = $item; 338 }else{ 339 $val = $item[$key]; 340 } 341 342 if($val > $max) $max = $val; 343 } 344 345 return $max; 346 } 347 348 /** 349 * return the month before the given month 350 * 351 * @param $date 352 * @return string 353 */ 354 private function prevmonth($date) { 355 list($year, $month) = explode('-', $date); 356 $month = $month - 1; 357 if($month < 1) { 358 $year = $year - 1; 359 $month = 12; 360 } 361 return sprintf("%d-%02d", $year, $month); 362 } 363 364} 365