1<?php
2
3/**
4 * statdisplay plugin log helper component
5 *
6 * @author Andreas Gohr <gohr@cosmocode.de>
7 * @license  GPL 2 (http://www.gnu.org/licenses/gpl.html)
8 */
9class helper_plugin_statdisplay_log extends DokuWiki_Plugin
10{
11    public $logdata = array();
12    private $logcache = '';
13    private $logfile = '';
14
15    public $top_limit = 30;
16
17    /**
18     * Constructor
19     *
20     * Loads the cache
21     */
22    public function __construct()
23    {
24        global $conf;
25        $this->logfile = fullpath($conf['metadir'] . '/' . $this->getConf('accesslog'));
26        // file not found? assume absolute path
27        if (!file_exists($this->logfile)) $this->logfile = $this->getConf('accesslog');
28
29        // load the cache file
30        $this->logcache = getCacheName($this->getConf('accesslog'), '.statdisplay');
31        if (file_exists($this->logcache)) {
32            $this->logdata = unserialize(io_readFile($this->logcache, false));
33            ksort($this->logdata);
34        }
35    }
36
37    /**
38     * drops the existing log cache
39     */
40    public function resetLogCache()
41    {
42        @unlink($this->logcache);
43        clearstatcache($this->logcache);
44        $this->logdata = [];
45    }
46
47    /**
48     * Return the progress of the log analysis
49     *
50     * @return float
51     */
52    public function progress()
53    {
54        $pos = $this->logdata['_logpos'] ?? 0;
55        $max = @filesize($this->logfile);
56        if (!$max) return 100.0;
57
58        return (int)$pos * 100 / $max;
59    }
60
61    /**
62     * Parses the next chunk of logfile into our memory structure
63     *
64     * @param int $maxlines the number of lines to read
65     * @return int the number of parsed lines
66     */
67    public function parseLogData($maxlines)
68    {
69        global $auth;
70
71        $size = filesize($this->logfile);
72        if (!$size) return 0;
73
74        // continue from last position
75        $pos = 0;
76        if (isset($this->logdata['_logpos'])) $pos = $this->logdata['_logpos'];
77        if ($pos > $size) $pos = 0;
78        if ($pos && (($size - $pos) < ($maxlines * 150))) return 0; // we want to have some minimal log data
79
80        if (!$this->lock()) return 0;
81
82        require_once(dirname(__FILE__) . '/../Browser.php');
83
84        // open handle
85        $fh = fopen($this->logfile, 'r');
86        if (!$fh) {
87            $this->unlock();
88            return 0;
89        }
90        fseek($fh, $pos, SEEK_SET);
91
92        // read lines
93        $lines = 0;
94        while (feof($fh) == 0 && $lines < $maxlines) {
95            $line = fgets($fh);
96            $lines++;
97            $pos += strlen($line);
98
99            if ($line == '') continue;
100
101            $parts = explode(' ', $line);
102            $date = strtotime(trim($parts[3] . ' ' . $parts[4], '[]'));
103            if (!$date) continue;
104
105            $month = date('Y-m', $date);
106            $day = date('d', $date);
107            $hour = date('G', $date);
108            list($url) = explode('?', $parts[6]); // strip GET vars
109            $status = $parts[8];
110            $size = $parts[9];
111            $user = trim($parts[2], '"-');
112
113            if (!empty($user) && $auth) {
114                /** @var \dokuwiki\Extension\AuthPlugin $auth */
115                $user = $auth->cleanUser($user);
116            }
117
118            if ($status == 200) {
119                $thistype = (substr($url, 0, 8) == '/_media/') ? 'media' : 'page';
120                if ($thistype == 'page') {
121                    // for analyzing webserver logs we consider all known extensions as media files
122                    list($ext) = mimetype($url);
123                    if ($ext !== false) $thistype = 'media';
124                }
125
126                // remember IPs
127                $newvisitor = !isset($this->logdata[$month]['ip'][$parts[0]]);
128                if ($newvisitor) {
129                    $this->logdata[$month]['ip'][$parts[0]] = 1;
130                } else {
131                    $this->logdata[$month]['ip'][$parts[0]]++;
132                }
133
134                // log type dependent and summarized
135                foreach (array($thistype, 'hits') as $type) {
136                    // we need these in perfect order
137                    if (!isset($this->logdata[$month][$type]['hour'])) {
138                        $this->logdata[$month][$type]['hour'] = array_fill(0, 23, array());
139                    }
140
141                    $this->logdata[$month][$type]['all']['count'] =
142                        isset($this->logdata[$month][$type]['all']['count']) ?
143                            $this->logdata[$month][$type]['all']['count'] + 1 :
144                            1;
145                    $this->logdata[$month][$type]['day'][$day]['count'] =
146                        isset($this->logdata[$month][$type]['day'][$day]['count']) ?
147                            $this->logdata[$month][$type]['day'][$day]['count'] + 1 :
148                            1;
149                    $this->logdata[$month][$type]['hour'][$hour]['count'] =
150                        isset($this->logdata[$month][$type]['hour'][$hour]['count']) ?
151                            $this->logdata[$month][$type]['hour'][$hour]['count'] + 1 :
152                            1;
153
154                    $this->logdata[$month][$type]['all']['bytes'] =
155                        isset($this->logdata[$month][$type]['all']['bytes']) ?
156                            $this->logdata[$month][$type]['all']['bytes'] + $size :
157                            $size;
158                    $this->logdata[$month][$type]['day'][$day]['bytes'] =
159                        isset($this->logdata[$month][$type]['day'][$day]['bytes']) ?
160                            $this->logdata[$month][$type]['day'][$day]['bytes'] + $size
161                            : $size;
162                    $this->logdata[$month][$type]['hour'][$hour]['bytes'] =
163                        isset($this->logdata[$month][$type]['hour'][$hour]['bytes']) ?
164                            $this->logdata[$month][$type]['hour'][$hour]['bytes'] + $size :
165                            $size;
166
167                    if ($user) {
168                        $this->logdata[$month]['usertraffic'][$day][$user] =
169                            isset($this->logdata[$month]['usertraffic'][$day][$user]) ?
170                                $this->logdata[$month]['usertraffic'][$day][$user] + $size :
171                                $size;
172                    }
173
174                    if ($newvisitor) {
175                        $this->logdata[$month][$type]['all']['visitor'] =
176                            isset($this->logdata[$month][$type]['all']['visitor']) ?
177                                $this->logdata[$month][$type]['all']['visitor'] + 1 :
178                                1;
179                        $this->logdata[$month][$type]['day'][$day]['visitor'] =
180                            isset($this->logdata[$month][$type]['day'][$day]['visitor']) ?
181                                $this->logdata[$month][$type]['day'][$day]['visitor'] + 1 :
182                                1;
183                        $this->logdata[$month][$type]['hour'][$hour]['visitor'] =
184                            isset($this->logdata[$month][$type]['hour'][$hour]['visitor']) ?
185                                $this->logdata[$month][$type]['hour'][$hour]['visitor'] + 1 :
186                                1;
187                    }
188                }
189
190                // log additional detailed data
191                if ($thistype == 'page') {
192                    // url
193                    $this->logdata[$month]['page_url'][$url] =
194                        isset($this->logdata[$month]['page_url'][$url]) ?
195                            $this->logdata[$month]['page_url'][$url] + 1 :
196                            1;
197
198                    // referer
199                    $referer = trim($parts[10], '"');
200                    // skip non valid and local referers
201                    if (substr($referer, 0, 4) == 'http' && (strpos($referer, DOKU_URL) !== 0)) {
202                        list($referer) = explode('?', $referer);
203                        $this->logdata[$month]['referer']['count'] =
204                            isset($this->logdata[$month]['referer']['count']) ?
205                                $this->logdata[$month]['referer']['count'] + 1 :
206                                1;
207                        $this->logdata[$month]['referer_url'][$referer] =
208                            isset($this->logdata[$month]['referer_url'][$referer]) ?
209                                $this->logdata[$month]['referer_url'][$referer] + 1 :
210                                1;
211                    }
212
213                    // entry page
214                    if ($newvisitor) {
215                        $this->logdata[$month]['entry'][$url] =
216                            isset($this->logdata[$month]['entry'][$url]) ?
217                                $this->logdata[$month]['entry'][$url] + 1 :
218                                1;
219                    }
220
221                    // user agent
222                    $ua = trim(join(' ', array_slice($parts, 11)), '" ');
223                    if ($ua) {
224                        $ua = $this->ua($ua);
225                        $this->logdata[$month]['useragent'][$ua] =
226                            isset($this->logdata[$month]['useragent'][$ua]) ?
227                                $this->logdata[$month]['useragent'][$ua] + 1 :
228                                1;
229                    }
230                }
231            } else {
232                // count non-200 as a hit too
233                $this->logdata[$month]['hits']['all']['count'] =
234                    isset($this->logdata[$month]['hits']['all']['count']) ?
235                        $this->logdata[$month]['hits']['all']['count'] + 1 :
236                        1;
237                $this->logdata[$month]['hits']['day'][$day]['count'] =
238                    isset($this->logdata[$month]['hits']['day'][$day]['count']) ?
239                        $this->logdata[$month]['hits']['day'][$day]['count'] + 1 :
240                        1;
241                $this->logdata[$month]['hits']['hour'][$hour]['count'] =
242                    isset($this->logdata[$month]['hits']['hour'][$hour]['count']) ?
243                        $this->logdata[$month]['hits']['hour'][$hour]['count'] + 1 :
244                        1;
245            }
246
247            $this->logdata[$month]['status']['all'][$status] =
248                isset($this->logdata[$month]['status']['all'][$status]) ?
249                    $this->logdata[$month]['status']['all'][$status] + 1 :
250                    1;
251            $this->logdata[$month]['status']['day'][$day][$status] =
252                isset($this->logdata[$month]['status']['day'][$day][$status]) ?
253                    $this->logdata[$month]['status']['day'][$day][$status] + 1 :
254                    1;
255            $this->logdata[$month]['status']['hour'][$hour][$status] =
256                isset($this->logdata[$month]['status']['hour'][$hour][$status]) ?
257                    $this->logdata[$month]['status']['hour'][$hour][$status] + 1 :
258                    1;
259        }
260        $this->logdata['_logpos'] = $pos;
261
262        // clean up the last month, freeing memory
263        if (isset($month) && isset($this->logdata['_lastmonth']) && $this->logdata['_lastmonth'] != $month) {
264            $this->clean_month($this->logdata['_lastmonth']);
265            $this->logdata['_lastmonth'] = $month;
266        }
267
268        // save the data
269        io_saveFile($this->logcache, serialize($this->logdata));
270        $this->unlock();
271        return $lines;
272    }
273
274    /**
275     * Clean up the backlog
276     *
277     * Shortens IPs, referers, entry pages, user agents etc. to preserve space and memory
278     *
279     * @param string $month where to clean up
280     */
281    private function clean_month($month)
282    {
283        if (!$month) return;
284
285        foreach (array('ip', 'page_url', 'referer_url', 'entry', 'useragent') as $type) {
286            if (is_array($this->logdata[$month][$type])) {
287                arsort($this->logdata[$month][$type]);
288                $this->logdata[$month][$type] = array_slice($this->logdata[$month][$type], 0, $this->top_limit);
289            }
290        }
291    }
292
293    /**
294     * Returns the common user agent name and version as a string
295     *
296     * @param $useragent
297     * @return string
298     */
299    private function ua($useragent)
300    {
301        $ua = new Browser($useragent);
302        list($version) = explode('.', $ua->getVersion());
303        if (!$version) $version = ''; // no zero version
304        if ($version == 'unknown') $version = '';
305        return trim($ua->getBrowser() . ' ' . $version);
306    }
307
308    /**
309     * Lock the the analysis process
310     *
311     * @author Tom N Harris <tnharris@whoopdedo.org>
312     */
313    private function lock()
314    {
315        global $conf;
316        $run = 0;
317        $lock = $conf['lockdir'] . '/_statdisplay.lock';
318        while (!@mkdir($lock, $conf['dmode'])) {
319            usleep(50);
320            if (is_dir($lock) && time() - @filemtime($lock) > 60 * 5) {
321                // looks like a stale lock - remove it
322                @rmdir($lock);
323                return false;
324            } elseif ($run++ == 1000) {
325                // we waited 5 seconds for that lock
326                return false;
327            }
328        }
329        if ($conf['dperm']) {
330            chmod($lock, $conf['dperm']);
331        }
332        return true;
333    }
334
335    /**
336     * Unlock the the analysis process
337     *
338     * @author Tom N Harris <tnharris@whoopdedo.org>
339     */
340    private function unlock()
341    {
342        global $conf;
343        @rmdir($conf['lockdir'] . '/_statdisplay.lock');
344        return true;
345    }
346
347    /**
348     * Return the last 7 day's user traffic
349     *
350     * @param $date
351     * @return array
352     */
353    public function usertraffic($date)
354    {
355        if (!$date) $date = date('Y-m');
356
357        $data = $this->logdata[$date]['usertraffic'];
358        $data = array_slice((array)$data, -7, 7, true); // limit to seven days
359
360        // add from previous month if needed
361        $num = count($data);
362        if ($num < 7) {
363            $data += array_slice((array)$this->logdata[$this->prevmonth($date)]['usertraffic'], -1 * (7 - $num),
364                7 - $num, true);
365        }
366
367        // count up the traffic
368        $alltraffic = 0;
369        $usertraffic = array();
370        foreach ($data as $day => $info) {
371            foreach ((array)$info as $user => $traffic) {
372                $usertraffic[$user] += $traffic;
373                $alltraffic += $traffic;
374            }
375        }
376        return $usertraffic;
377    }
378
379    /**
380     * Gives the sum of a certain column from the input array
381     *
382     * @param $input
383     * @param $key
384     * @return int
385     */
386    public function sum($input, $key = null)
387    {
388        $sum = 0;
389        foreach ((array)$input as $item) {
390            if (is_null($key)) {
391                $val = $item;
392            } else {
393                $val = $item[$key];
394            }
395            $sum += $val;
396        }
397
398        return $sum;
399    }
400
401    /**
402     * Avarages a certain column from the input array
403     *
404     * @param $input
405     * @param $key
406     * @return float
407     */
408    public function avg($input, $key = null)
409    {
410        $cnt = 0;
411        $all = 0;
412        foreach ((array)$input as $item) {
413            if (is_null($key)) {
414                $all += $item;
415            } elseif (isset($item[$key])) {
416                $all += $item[$key];
417            }
418            $cnt++;
419        }
420
421        if (!$cnt) return 0;
422        return $all / $cnt;
423    }
424
425    /**
426     * Gives maximum of a certain column from the input array
427     *
428     * @param $input
429     * @param $key
430     * @return int
431     */
432    public function max($input, $key = null)
433    {
434        $max = 0;
435        foreach ((array)$input as $item) {
436            if (is_null($key)) {
437                $val = $item;
438            } else {
439                $val = $item[$key];
440            }
441
442            if ($val > $max) $max = $val;
443        }
444
445        return $max;
446    }
447
448    /**
449     * return the month before the given month
450     *
451     * @param $date
452     * @return string
453     */
454    private function prevmonth($date)
455    {
456        list($year, $month) = explode('-', $date);
457        $month = $month - 1;
458        if ($month < 1) {
459            $year = $year - 1;
460            $month = 12;
461        }
462        return sprintf("%d-%02d", $year, $month);
463    }
464
465}
466