1<?php 2 3/** 4 * statdisplay plugin log helper component 5 * 6 * @author Andreas Gohr <gohr@cosmocode.de> 7 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 8 */ 9class helper_plugin_statdisplay_log extends DokuWiki_Plugin 10{ 11 public $logdata = array(); 12 private $logcache = ''; 13 private $logfile = ''; 14 15 public $top_limit = 30; 16 17 /** 18 * Constructor 19 * 20 * Loads the cache 21 */ 22 public function __construct() 23 { 24 global $conf; 25 $this->logfile = fullpath($conf['metadir'] . '/' . $this->getConf('accesslog')); 26 // file not found? assume absolute path 27 if (!file_exists($this->logfile)) $this->logfile = $this->getConf('accesslog'); 28 29 // load the cache file 30 $this->logcache = getCacheName($this->getConf('accesslog'), '.statdisplay'); 31 if (file_exists($this->logcache)) { 32 $this->logdata = unserialize(io_readFile($this->logcache, false)); 33 ksort($this->logdata); 34 } 35 } 36 37 /** 38 * drops the existing log cache 39 */ 40 public function resetLogCache() 41 { 42 @unlink($this->logcache); 43 clearstatcache($this->logcache); 44 $this->logdata = []; 45 } 46 47 /** 48 * Return the progress of the log analysis 49 * 50 * @return float 51 */ 52 public function progress() 53 { 54 $pos = $this->logdata['_logpos'] ?? 0; 55 $max = @filesize($this->logfile); 56 if (!$max) return 100.0; 57 58 return (int)$pos * 100 / $max; 59 } 60 61 /** 62 * Parses the next chunk of logfile into our memory structure 63 * 64 * @param int $maxlines the number of lines to read 65 * @return int the number of parsed lines 66 */ 67 public function parseLogData($maxlines) 68 { 69 global $auth; 70 71 $size = filesize($this->logfile); 72 if (!$size) return 0; 73 74 // continue from last position 75 $pos = 0; 76 if (isset($this->logdata['_logpos'])) $pos = $this->logdata['_logpos']; 77 if ($pos > $size) $pos = 0; 78 if ($pos && (($size - $pos) < ($maxlines * 150))) return 0; // we want to have some minimal log data 79 80 if (!$this->lock()) return 0; 81 82 require_once(dirname(__FILE__) . '/../Browser.php'); 83 84 // open handle 85 $fh = fopen($this->logfile, 'r'); 86 if (!$fh) { 87 $this->unlock(); 88 return 0; 89 } 90 fseek($fh, $pos, SEEK_SET); 91 92 // read lines 93 $lines = 0; 94 while (feof($fh) == 0 && $lines < $maxlines) { 95 $line = fgets($fh); 96 $lines++; 97 $pos += strlen($line); 98 99 if ($line == '') continue; 100 101 $parts = explode(' ', $line); 102 $date = strtotime(trim($parts[3] . ' ' . $parts[4], '[]')); 103 if (!$date) continue; 104 105 $month = date('Y-m', $date); 106 $day = date('d', $date); 107 $hour = date('G', $date); 108 list($url) = explode('?', $parts[6]); // strip GET vars 109 $status = $parts[8]; 110 $size = $parts[9]; 111 $user = trim($parts[2], '"-'); 112 113 if (!empty($user) && $auth) { 114 /** @var \dokuwiki\Extension\AuthPlugin $auth */ 115 $user = $auth->cleanUser($user); 116 } 117 118 if ($status == 200) { 119 $thistype = (substr($url, 0, 8) == '/_media/') ? 'media' : 'page'; 120 if ($thistype == 'page') { 121 // for analyzing webserver logs we consider all known extensions as media files 122 list($ext) = mimetype($url); 123 if ($ext !== false) $thistype = 'media'; 124 } 125 126 // remember IPs 127 $newvisitor = !isset($this->logdata[$month]['ip'][$parts[0]]); 128 if ($newvisitor) { 129 $this->logdata[$month]['ip'][$parts[0]] = 1; 130 } else { 131 $this->logdata[$month]['ip'][$parts[0]]++; 132 } 133 134 // log type dependent and summarized 135 foreach (array($thistype, 'hits') as $type) { 136 // we need these in perfect order 137 if (!isset($this->logdata[$month][$type]['hour'])) { 138 $this->logdata[$month][$type]['hour'] = array_fill(0, 23, array()); 139 } 140 141 $this->logdata[$month][$type]['all']['count'] = 142 isset($this->logdata[$month][$type]['all']['count']) ? 143 $this->logdata[$month][$type]['all']['count'] + 1 : 144 1; 145 $this->logdata[$month][$type]['day'][$day]['count'] = 146 isset($this->logdata[$month][$type]['day'][$day]['count']) ? 147 $this->logdata[$month][$type]['day'][$day]['count'] + 1 : 148 1; 149 $this->logdata[$month][$type]['hour'][$hour]['count'] = 150 isset($this->logdata[$month][$type]['hour'][$hour]['count']) ? 151 $this->logdata[$month][$type]['hour'][$hour]['count'] + 1 : 152 1; 153 154 $this->logdata[$month][$type]['all']['bytes'] = 155 isset($this->logdata[$month][$type]['all']['bytes']) ? 156 $this->logdata[$month][$type]['all']['bytes'] + $size : 157 $size; 158 $this->logdata[$month][$type]['day'][$day]['bytes'] = 159 isset($this->logdata[$month][$type]['day'][$day]['bytes']) ? 160 $this->logdata[$month][$type]['day'][$day]['bytes'] + $size 161 : $size; 162 $this->logdata[$month][$type]['hour'][$hour]['bytes'] = 163 isset($this->logdata[$month][$type]['hour'][$hour]['bytes']) ? 164 $this->logdata[$month][$type]['hour'][$hour]['bytes'] + $size : 165 $size; 166 167 if ($user) { 168 $this->logdata[$month]['usertraffic'][$day][$user] = 169 isset($this->logdata[$month]['usertraffic'][$day][$user]) ? 170 $this->logdata[$month]['usertraffic'][$day][$user] + $size : 171 $size; 172 } 173 174 if ($newvisitor) { 175 $this->logdata[$month][$type]['all']['visitor'] = 176 isset($this->logdata[$month][$type]['all']['visitor']) ? 177 $this->logdata[$month][$type]['all']['visitor'] + 1 : 178 1; 179 $this->logdata[$month][$type]['day'][$day]['visitor'] = 180 isset($this->logdata[$month][$type]['day'][$day]['visitor']) ? 181 $this->logdata[$month][$type]['day'][$day]['visitor'] + 1 : 182 1; 183 $this->logdata[$month][$type]['hour'][$hour]['visitor'] = 184 isset($this->logdata[$month][$type]['hour'][$hour]['visitor']) ? 185 $this->logdata[$month][$type]['hour'][$hour]['visitor'] + 1 : 186 1; 187 } 188 } 189 190 // log additional detailed data 191 if ($thistype == 'page') { 192 // url 193 $this->logdata[$month]['page_url'][$url] = 194 isset($this->logdata[$month]['page_url'][$url]) ? 195 $this->logdata[$month]['page_url'][$url] + 1 : 196 1; 197 198 // referer 199 $referer = trim($parts[10], '"'); 200 // skip non valid and local referers 201 if (substr($referer, 0, 4) == 'http' && (strpos($referer, DOKU_URL) !== 0)) { 202 list($referer) = explode('?', $referer); 203 $this->logdata[$month]['referer']['count'] = 204 isset($this->logdata[$month]['referer']['count']) ? 205 $this->logdata[$month]['referer']['count'] + 1 : 206 1; 207 $this->logdata[$month]['referer_url'][$referer] = 208 isset($this->logdata[$month]['referer_url'][$referer]) ? 209 $this->logdata[$month]['referer_url'][$referer] + 1 : 210 1; 211 } 212 213 // entry page 214 if ($newvisitor) { 215 $this->logdata[$month]['entry'][$url] = 216 isset($this->logdata[$month]['entry'][$url]) ? 217 $this->logdata[$month]['entry'][$url] + 1 : 218 1; 219 } 220 221 // user agent 222 $ua = trim(join(' ', array_slice($parts, 11)), '" '); 223 if ($ua) { 224 $ua = $this->ua($ua); 225 $this->logdata[$month]['useragent'][$ua] = 226 isset($this->logdata[$month]['useragent'][$ua]) ? 227 $this->logdata[$month]['useragent'][$ua] + 1 : 228 1; 229 } 230 } 231 } else { 232 // count non-200 as a hit too 233 $this->logdata[$month]['hits']['all']['count'] = 234 isset($this->logdata[$month]['hits']['all']['count']) ? 235 $this->logdata[$month]['hits']['all']['count'] + 1 : 236 1; 237 $this->logdata[$month]['hits']['day'][$day]['count'] = 238 isset($this->logdata[$month]['hits']['day'][$day]['count']) ? 239 $this->logdata[$month]['hits']['day'][$day]['count'] + 1 : 240 1; 241 $this->logdata[$month]['hits']['hour'][$hour]['count'] = 242 isset($this->logdata[$month]['hits']['hour'][$hour]['count']) ? 243 $this->logdata[$month]['hits']['hour'][$hour]['count'] + 1 : 244 1; 245 } 246 247 $this->logdata[$month]['status']['all'][$status] = 248 isset($this->logdata[$month]['status']['all'][$status]) ? 249 $this->logdata[$month]['status']['all'][$status] + 1 : 250 1; 251 $this->logdata[$month]['status']['day'][$day][$status] = 252 isset($this->logdata[$month]['status']['day'][$day][$status]) ? 253 $this->logdata[$month]['status']['day'][$day][$status] + 1 : 254 1; 255 $this->logdata[$month]['status']['hour'][$hour][$status] = 256 isset($this->logdata[$month]['status']['hour'][$hour][$status]) ? 257 $this->logdata[$month]['status']['hour'][$hour][$status] + 1 : 258 1; 259 } 260 $this->logdata['_logpos'] = $pos; 261 262 // clean up the last month, freeing memory 263 if (isset($month) && isset($this->logdata['_lastmonth']) && $this->logdata['_lastmonth'] != $month) { 264 $this->clean_month($this->logdata['_lastmonth']); 265 $this->logdata['_lastmonth'] = $month; 266 } 267 268 // save the data 269 io_saveFile($this->logcache, serialize($this->logdata)); 270 $this->unlock(); 271 return $lines; 272 } 273 274 /** 275 * Clean up the backlog 276 * 277 * Shortens IPs, referers, entry pages, user agents etc. to preserve space and memory 278 * 279 * @param string $month where to clean up 280 */ 281 private function clean_month($month) 282 { 283 if (!$month) return; 284 285 foreach (array('ip', 'page_url', 'referer_url', 'entry', 'useragent') as $type) { 286 if (is_array($this->logdata[$month][$type])) { 287 arsort($this->logdata[$month][$type]); 288 $this->logdata[$month][$type] = array_slice($this->logdata[$month][$type], 0, $this->top_limit); 289 } 290 } 291 } 292 293 /** 294 * Returns the common user agent name and version as a string 295 * 296 * @param $useragent 297 * @return string 298 */ 299 private function ua($useragent) 300 { 301 $ua = new Browser($useragent); 302 list($version) = explode('.', $ua->getVersion()); 303 if (!$version) $version = ''; // no zero version 304 if ($version == 'unknown') $version = ''; 305 return trim($ua->getBrowser() . ' ' . $version); 306 } 307 308 /** 309 * Lock the the analysis process 310 * 311 * @author Tom N Harris <tnharris@whoopdedo.org> 312 */ 313 private function lock() 314 { 315 global $conf; 316 $run = 0; 317 $lock = $conf['lockdir'] . '/_statdisplay.lock'; 318 while (!@mkdir($lock, $conf['dmode'])) { 319 usleep(50); 320 if (is_dir($lock) && time() - @filemtime($lock) > 60 * 5) { 321 // looks like a stale lock - remove it 322 @rmdir($lock); 323 return false; 324 } elseif ($run++ == 1000) { 325 // we waited 5 seconds for that lock 326 return false; 327 } 328 } 329 if ($conf['dperm']) { 330 chmod($lock, $conf['dperm']); 331 } 332 return true; 333 } 334 335 /** 336 * Unlock the the analysis process 337 * 338 * @author Tom N Harris <tnharris@whoopdedo.org> 339 */ 340 private function unlock() 341 { 342 global $conf; 343 @rmdir($conf['lockdir'] . '/_statdisplay.lock'); 344 return true; 345 } 346 347 /** 348 * Return the last 7 day's user traffic 349 * 350 * @param $date 351 * @return array 352 */ 353 public function usertraffic($date) 354 { 355 if (!$date) $date = date('Y-m'); 356 357 $data = $this->logdata[$date]['usertraffic']; 358 $data = array_slice((array)$data, -7, 7, true); // limit to seven days 359 360 // add from previous month if needed 361 $num = count($data); 362 if ($num < 7) { 363 $data += array_slice((array)$this->logdata[$this->prevmonth($date)]['usertraffic'], -1 * (7 - $num), 364 7 - $num, true); 365 } 366 367 // count up the traffic 368 $alltraffic = 0; 369 $usertraffic = array(); 370 foreach ($data as $day => $info) { 371 foreach ((array)$info as $user => $traffic) { 372 $usertraffic[$user] += $traffic; 373 $alltraffic += $traffic; 374 } 375 } 376 return $usertraffic; 377 } 378 379 /** 380 * Gives the sum of a certain column from the input array 381 * 382 * @param $input 383 * @param $key 384 * @return int 385 */ 386 public function sum($input, $key = null) 387 { 388 $sum = 0; 389 foreach ((array)$input as $item) { 390 if (is_null($key)) { 391 $val = $item; 392 } else { 393 $val = $item[$key]; 394 } 395 $sum += $val; 396 } 397 398 return $sum; 399 } 400 401 /** 402 * Avarages a certain column from the input array 403 * 404 * @param $input 405 * @param $key 406 * @return float 407 */ 408 public function avg($input, $key = null) 409 { 410 $cnt = 0; 411 $all = 0; 412 foreach ((array)$input as $item) { 413 if (is_null($key)) { 414 $all += $item; 415 } elseif (isset($item[$key])) { 416 $all += $item[$key]; 417 } 418 $cnt++; 419 } 420 421 if (!$cnt) return 0; 422 return $all / $cnt; 423 } 424 425 /** 426 * Gives maximum of a certain column from the input array 427 * 428 * @param $input 429 * @param $key 430 * @return int 431 */ 432 public function max($input, $key = null) 433 { 434 $max = 0; 435 foreach ((array)$input as $item) { 436 if (is_null($key)) { 437 $val = $item; 438 } else { 439 $val = $item[$key]; 440 } 441 442 if ($val > $max) $max = $val; 443 } 444 445 return $max; 446 } 447 448 /** 449 * return the month before the given month 450 * 451 * @param $date 452 * @return string 453 */ 454 private function prevmonth($date) 455 { 456 list($year, $month) = explode('-', $date); 457 $month = $month - 1; 458 if ($month < 1) { 459 $year = $year - 1; 460 $month = 12; 461 } 462 return sprintf("%d-%02d", $year, $month); 463 } 464 465} 466