1<?php
2// must be run within Dokuwiki
3if(!defined('DOKU_INC')) die();
4
5/**
6 * statdisplay plugin log helper component
7 *
8 * @author Andreas Gohr <gohr@cosmocode.de>
9 * @license  GPL 2 (http://www.gnu.org/licenses/gpl.html)
10 */
11class helper_plugin_statdisplay_log extends DokuWiki_Plugin {
12    public $logdata = array();
13    private $logcache = '';
14    private $logfile = '';
15
16    public $top_limit = 30;
17
18    /**
19     * Constructor
20     *
21     * Loads the cache
22     */
23    public function __construct() {
24        global $conf;
25        $this->logfile = fullpath($conf['metadir'].'/'.$this->getConf('accesslog'));
26        // file not found? assume absolute path
27        if(!file_exists($this->logfile)) $this->logfile = $this->getConf('accesslog');
28
29        // load the cache file
30        $this->logcache = getCacheName($this->getConf('accesslog'), '.statdisplay');
31        if(file_exists($this->logcache)) {
32            $this->logdata = unserialize(io_readFile($this->logcache, false));
33        }
34    }
35
36    /**
37     * Return the progress of the log analysis
38     *
39     * @return float
40     */
41    public function progress() {
42        $pos = (int) $this->logdata['_logpos'];
43        $max = @filesize($this->logfile);
44        if(!$max) return 100.0;
45
46        return $pos * 100 / $max;
47    }
48
49    /**
50     * Parses the next chunk of logfile into our memory structure
51     */
52    public function parseLogData() {
53        $size = filesize($this->logfile);
54        if(!$size) return 0;
55
56        // continue from last position
57        $pos = 0;
58        if(isset($this->logdata['_logpos'])) $pos = $this->logdata['_logpos'];
59        if($pos > $size) $pos = 0;
60        if($pos && ( ($size - $pos) < ($this->getConf('lines') * 150) )) return 0; // we want to have some minimal log data
61
62        if(!$this->lock()) return 0;
63
64        require_once(dirname(__FILE__).'/../Browser.php');
65
66        // open handle
67        $fh = fopen($this->logfile, 'r');
68        if(!$fh) return 0;
69        fseek($fh, $pos, SEEK_SET);
70
71        // read lines
72        $lines = 0;
73        while(feof($fh) == 0 && $lines < $this->getConf('lines')) {
74            $line = fgets($fh);
75            $lines++;
76            $pos += strlen($line);
77
78            if($line == '') continue;
79
80            $parts = explode(' ', $line);
81            $date  = strtotime(trim($parts[3].' '.$parts[4], '[]'));
82            if(!$date) continue;
83
84            $month = date('Y-m', $date);
85            $day   = date('d', $date);
86            $hour  = date('G', $date);
87            list($url) = explode('?', $parts[6]); // strip GET vars
88            $status = $parts[8];
89            $size   = $parts[9];
90            $user   = trim($parts[2], '"-');
91
92            if(!empty($user)){
93                $user = $GLOBALS['auth']->cleanUser($user);
94            }
95
96            if($status == 200) {
97                $thistype = (substr($url, 0, 8) == '/_media/') ? 'media' : 'page';
98                if($thistype == 'page') {
99                    // for analyzing webserver logs we consider all known extensions as media files
100                    list($ext) = mimetype($url);
101                    if($ext !== false) $thistype = 'media';
102                }
103
104                // remember IPs
105                $newvisitor = !isset($this->logdata[$month]['ip'][$parts[0]]);
106                $this->logdata[$month]['ip'][$parts[0]]++;
107
108                // log type dependent and summarized
109                foreach(array($thistype, 'hits') as $type) {
110                    // we need these in perfect order
111                    if(!isset($this->logdata[$month][$type]['hour']))
112                        $this->logdata[$month][$type]['hour'] = array_fill(0, 23, array());
113
114                    $this->logdata[$month][$type]['all']['count']++;
115                    $this->logdata[$month][$type]['day'][$day]['count']++;
116                    $this->logdata[$month][$type]['hour'][$hour]['count']++;
117
118                    $this->logdata[$month][$type]['all']['bytes'] += $size;
119                    $this->logdata[$month][$type]['day'][$day]['bytes'] += $size;
120                    $this->logdata[$month][$type]['hour'][$hour]['bytes'] += $size;
121
122                    if($user) {
123                        $this->logdata[$month]['usertraffic'][$day][$user] += $size;
124                    }
125
126                    if($newvisitor) {
127                        $this->logdata[$month][$type]['all']['visitor']++;
128                        $this->logdata[$month][$type]['day'][$day]['visitor']++;
129                        $this->logdata[$month][$type]['hour'][$hour]['visitor']++;
130                    }
131                }
132
133                // log additional detailed data
134                if($thistype == 'page') {
135                    // url
136                    $this->logdata[$month]['page_url'][$url]++;
137
138                    // referer
139                    $referer = trim($parts[10], '"');
140                    // skip non valid and local referers
141                    if(substr($referer, 0, 4) == 'http' && (strpos($referer, DOKU_URL) !== 0)) {
142                        list($referer) = explode('?', $referer);
143                        $this->logdata[$month]['referer']['count']++;
144                        $this->logdata[$month]['referer_url'][$referer]++;
145                    }
146
147                    // entry page
148                    if($newvisitor) {
149                        $this->logdata[$month]['entry'][$url]++;
150                    }
151
152                    // user agent
153                    $ua = trim(join(' ', array_slice($parts, 11)), '" ');
154                    if($ua) {
155                        $ua = $this->ua($ua);
156                        $this->logdata[$month]['useragent'][$ua]++;
157                    }
158                }
159            } else {
160                // count non-200 as a hit too
161                $this->logdata[$month]['hits']['all']['count']++;
162                $this->logdata[$month]['hits']['day'][$day]['count']++;
163                $this->logdata[$month]['hits']['hour'][$hour]['count']++;
164            }
165
166            $this->logdata[$month]['status']['all'][$status]++;
167            $this->logdata[$month]['status']['day'][$day][$status]++;
168            $this->logdata[$month]['status']['hour'][$hour][$status]++;
169        }
170        $this->logdata['_logpos'] = $pos;
171
172        // clean up the last month, freeing memory
173        if(isset($month) && $this->logdata['_lastmonth'] != $month) {
174            $this->clean_month($this->logdata['_lastmonth']);
175            $this->logdata['_lastmonth'] = $month;
176        }
177
178        // save the data
179        io_saveFile($this->logcache, serialize($this->logdata));
180        $this->unlock();
181        return $lines;
182    }
183
184    /**
185     * Clean up the backlog
186     *
187     * Shortens IPs, referers, entry pages, user agents etc. to preserve space and memory
188     *
189     * @param string $month where to clean up
190     */
191    private function clean_month($month) {
192        if(!$month) return;
193
194        foreach(array('ip', 'page_url', 'referer_url', 'entry', 'useragent') as $type) {
195            if(is_array($this->logdata[$month][$type])) {
196                arsort($this->logdata[$month][$type]);
197                $this->logdata[$month][$type] = array_slice($this->logdata[$month][$type], 0, $this->top_limit);
198            }
199        }
200    }
201
202    /**
203     * Returns the common user agent name and version as a string
204     *
205     * @param $useragent
206     * @return string
207     */
208    private function ua($useragent) {
209        $ua = new Browser($useragent);
210        list($version) = explode('.', $ua->getVersion());
211        if(!$version) $version = ''; // no zero version
212        if($version == 'unknown') $version = '';
213        return trim($ua->getBrowser().' '.$version);
214    }
215
216    /**
217     * Lock the the analysis process
218     *
219     * @author Tom N Harris <tnharris@whoopdedo.org>
220     */
221    private function lock() {
222        global $conf;
223        $run  = 0;
224        $lock = $conf['lockdir'].'/_statdisplay.lock';
225        while(!@mkdir($lock, $conf['dmode'])) {
226            usleep(50);
227            if(is_dir($lock) && time() - @filemtime($lock) > 60 * 5) {
228                // looks like a stale lock - remove it
229                @rmdir($lock);
230                return false;
231            } elseif($run++ == 1000) {
232                // we waited 5 seconds for that lock
233                return false;
234            }
235        }
236        if($conf['dperm'])
237            chmod($lock, $conf['dperm']);
238        return true;
239    }
240
241    /**
242     * Unlock the the analysis process
243     *
244     * @author Tom N Harris <tnharris@whoopdedo.org>
245     */
246    private function unlock() {
247        global $conf;
248        @rmdir($conf['lockdir'].'/_statdisplay.lock');
249        return true;
250    }
251
252    /**
253     * Return the last 7 day's user traffic
254     *
255     * @param $date
256     * @return array
257     */
258    public function usertraffic($date) {
259        if(!$date) $date = date('Y-m');
260
261        $data = $this->logdata[$date]['usertraffic'];
262        $data = array_slice((array) $data, -7, 7, true); // limit to seven days
263
264        // add from previous month if needed
265        $num = count($data);
266        if($num < 7) {
267            $data += array_slice((array) $this->logdata[$this->prevmonth($date)]['usertraffic'], -1 * (7 - $num), 7 - $num, true);
268        }
269
270        // count up the traffic
271        $alltraffic  = 0;
272        $usertraffic = array();
273        foreach($data as $day => $info) {
274            foreach((array) $info as $user => $traffic) {
275                $usertraffic[$user] += $traffic;
276                $alltraffic += $traffic;
277            }
278        }
279        return $usertraffic;
280    }
281
282    /**
283     * Gives the sum of a certain column from the input array
284     *
285     * @param $input
286     * @param $key
287     * @return int
288     */
289    public function sum($input, $key=null) {
290        $sum = 0;
291        foreach((array) $input as $item) {
292            if(is_null($key)){
293                $val = $item;
294            }else{
295                $val = $item[$key];
296            }
297            $sum += $val;
298        }
299
300        return $sum;
301    }
302
303    /**
304     * Avarages a certain column from the input array
305     *
306     * @param $input
307     * @param $key
308     * @return float
309     */
310    public function avg($input, $key=null) {
311        $cnt = 0;
312        $all = 0;
313        foreach((array) $input as $item) {
314            if(is_null($key)){
315                $all += $item;
316            }else{
317                $all += $item[$key];
318            }
319            $cnt++;
320        }
321
322        if(!$cnt) return 0;
323        return $all / $cnt;
324    }
325
326    /**
327     * Gives maximum of a certain column from the input array
328     *
329     * @param $input
330     * @param $key
331     * @return int
332     */
333    public function max($input, $key=null) {
334        $max = 0;
335        foreach((array) $input as $item) {
336            if(is_null($key)){
337                $val = $item;
338            }else{
339                $val = $item[$key];
340            }
341
342            if($val > $max) $max = $val;
343        }
344
345        return $max;
346    }
347
348    /**
349     * return the month before the given month
350     *
351     * @param $date
352     * @return string
353     */
354    private function prevmonth($date) {
355        list($year, $month) = explode('-', $date);
356        $month = $month - 1;
357        if($month < 1) {
358            $year  = $year - 1;
359            $month = 12;
360        }
361        return sprintf("%d-%02d", $year, $month);
362    }
363
364}
365