1<?php
2
3require dirname(__FILE__) . '/StatisticsBrowscap.class.php';
4
5class StatisticsLogger {
6    private $hlp;
7
8    private $ua_agent;
9    private $ua_type;
10    private $ua_name;
11    private $ua_version;
12    private $ua_platform;
13
14    private $uid;
15
16    /**
17     * Parses browser info and set internal vars
18     */
19    public function __construct(helper_plugin_statistics $hlp) {
20        $this->hlp = $hlp;
21
22        $this->ua_agent = trim($_SERVER['HTTP_USER_AGENT']);
23        $bc             = new StatisticsBrowscap();
24        $ua             = $bc->getBrowser($this->ua_agent);
25        $this->ua_name  = $ua->Browser;
26        $this->ua_type  = 'browser';
27        if($ua->Crawler) $this->ua_type = 'robot';
28        if($ua->isSyndicationReader) $this->ua_type = 'feedreader';
29        $this->ua_version  = $ua->Version;
30        $this->ua_platform = $ua->Platform;
31
32        $this->uid = $this->getUID();
33
34        $this->log_lastseen();
35    }
36
37    /**
38     * get the unique user ID
39     */
40    protected function getUID() {
41        $uid = $_REQUEST['uid'];
42        if(!$uid) $uid = get_doku_pref('plgstats', false);
43        if(!$uid) $uid = session_id();
44        return $uid;
45    }
46
47    /**
48     * Return the user's session ID
49     *
50     * This is usually our own managed session, not a PHP session (only in fallback)
51     *
52     * @return string
53     */
54    protected function getSession() {
55        $ses = $_REQUEST['ses'];
56        if(!$ses) $ses = get_doku_pref('plgstatsses', false);
57        if(!$ses) $ses = session_id();
58        return $ses;
59    }
60
61    /**
62     * Log that we've seen the user (authenticated only)
63     *
64     * This is called directly from the constructor and thus logs always,
65     * regardless from where the log is initiated
66     */
67    public function log_lastseen() {
68        if(empty($_SERVER['REMOTE_USER'])) return;
69        $user = addslashes($_SERVER['REMOTE_USER']);
70
71        $sql = "REPLACE INTO " . $this->hlp->prefix . "lastseen
72                    SET `user` = '$user'
73               ";
74        $this->hlp->runSQL($sql);
75    }
76
77    /**
78     * Log actions by groups
79     *
80     * @param string $type   The type of access to log ('view','edit')
81     * @param array  $groups The groups to log
82     */
83    public function log_groups($type, $groups) {
84        if(!is_array($groups)) {
85            return;
86        }
87
88        $tolog = $this->hlp->getConf('loggroups');
89        if($tolog) {
90            foreach($groups as $pos => $group) {
91                if(!in_array($group, $tolog)) unset($groups[$pos]);
92            }
93        }
94        if (!count($groups)) {
95            return;
96        }
97
98        $type = addslashes($type);
99
100        $sql = "INSERT DELAYED INTO " . $this->hlp->prefix . "groups
101                     (`dt`, `type`, `group`) VALUES ";
102        foreach($groups as $group) {
103            $group = addslashes($group);
104            $sql .= "( NOW(), '$type', '$group' ),";
105        }
106        $sql = rtrim($sql, ',');
107
108        $ok = $this->hlp->runSQL($sql);
109        if(is_null($ok)) {
110            global $MSG;
111            print_r($MSG);
112        }
113    }
114
115    /**
116     * Log external search queries
117     *
118     * Will not write anything if the referer isn't a search engine
119     */
120    public function log_externalsearch($referer, &$type) {
121        $referer = utf8_strtolower($referer);
122        include(dirname(__FILE__) . '/searchengines.php');
123        /** @var array $SEARCHENGINES */
124
125        $query = '';
126        $name  = '';
127
128        // parse the referer
129        $urlparts = parse_url($referer);
130        $domain   = $urlparts['host'];
131        $qpart    = $urlparts['query'];
132        if(!$qpart) $qpart = $urlparts['fragment']; //google does this
133
134        $params = array();
135        parse_str($qpart, $params);
136
137        // check domain against common search engines
138        foreach($SEARCHENGINES as $regex => $info) {
139            if(preg_match('/' . $regex . '/', $domain)) {
140                $type = 'search';
141                $name = array_shift($info);
142                // check the known parameters for content
143                foreach($info as $k) {
144                    if(empty($params[$k])) continue;
145                    $query = $params[$k];
146                    break;
147                }
148                break;
149            }
150        }
151
152        // try some generic search engin parameters
153        if($type != 'search') foreach(array('search', 'query', 'q', 'keywords', 'keyword') as $k) {
154            if(empty($params[$k])) continue;
155            $query = $params[$k];
156            // we seem to have found some generic search, generate name from domain
157            $name = preg_replace('/(\.co)?\.([a-z]{2,5})$/', '', $domain); //strip tld
158            $name = explode('.', $name);
159            $name = array_pop($name);
160            $type = 'search';
161            break;
162        }
163
164        // still no hit? return
165        if($type != 'search') return;
166
167        // clean the query
168        $query = preg_replace('/^(cache|related):[^\+]+/', '', $query); // non-search queries
169        $query = preg_replace('/ +/', ' ', $query); // ws compact
170        $query = trim($query);
171        if(!utf8_check($query)) $query = utf8_encode($query); // assume latin1 if not utf8
172
173        // no query? no log
174        if(!$query) return;
175
176        // log it!
177        $words = explode(' ', utf8_stripspecials($query, ' ', '\._\-:\*'));
178        $this->log_search($_REQUEST['p'], $query, $words, $name);
179    }
180
181    /**
182     * The given data to the search related tables
183     */
184    public function log_search($page, $query, $words, $engine) {
185        $page   = addslashes($page);
186        $query  = addslashes($query);
187        $engine = addslashes($engine);
188
189        $sql = "INSERT INTO " . $this->hlp->prefix . "search
190                    SET dt       = NOW(),
191                        page     = '$page',
192                        query    = '$query',
193                        engine   = '$engine'";
194        $id  = $this->hlp->runSQL($sql);
195        if(is_null($id)) return;
196
197        foreach($words as $word) {
198            if(!$word) continue;
199            $word = addslashes($word);
200            $sql  = "INSERT DELAYED INTO " . $this->hlp->prefix . "searchwords
201                       SET sid  = $id,
202                           word = '$word'";
203            $this->hlp->runSQL($sql);
204        }
205    }
206
207    /**
208     * Log that the session was seen
209     *
210     * This is used to calculate the time people spend on the whole site
211     * during their session
212     *
213     * Viewcounts are used for bounce calculation
214     *
215     * @param int $addview set to 1 to count a view
216     */
217    public function log_session($addview = 0) {
218        // only log browser sessions
219        if($this->ua_type != 'browser') return;
220
221        $addview = addslashes($addview);
222        $session = addslashes($this->getSession());
223        $uid     = addslashes($this->uid);
224        $sql     = "INSERT DELAYED INTO " . $this->hlp->prefix . "session
225                   SET session = '$session',
226                       dt      = NOW(),
227                       end     = NOW(),
228                       views   = $addview,
229                       uid     = '$uid'
230                ON DUPLICATE KEY UPDATE
231                       end     = NOW(),
232                       views   = views + $addview,
233                       uid     = '$uid'";
234        $this->hlp->runSQL($sql);
235    }
236
237    /**
238     * Resolve IP to country/city
239     */
240    public function log_ip($ip) {
241        // check if IP already known and up-to-date
242        $sql    = "SELECT ip
243                  FROM " . $this->hlp->prefix . "iplocation
244                 WHERE ip ='" . addslashes($ip) . "'
245                   AND lastupd > DATE_SUB(CURDATE(),INTERVAL 30 DAY)";
246        $result = $this->hlp->runSQL($sql);
247        if($result[0]['ip']) return;
248
249        $http          = new DokuHTTPClient();
250        $http->timeout = 10;
251        $data          = $http->get('http://api.hostip.info/get_html.php?ip=' . $ip);
252
253        if(preg_match('/^Country: (.*?) \((.*?)\)\nCity: (.*?)$/s', $data, $match)) {
254            $country = addslashes(ucwords(strtolower(trim($match[1]))));
255            $code    = addslashes(strtolower(trim($match[2])));
256            $city    = addslashes(ucwords(strtolower(trim($match[3]))));
257            $host    = addslashes(gethostbyaddr($ip));
258            $ip      = addslashes($ip);
259
260            $sql = "REPLACE INTO " . $this->hlp->prefix . "iplocation
261                        SET ip = '$ip',
262                            country = '$country',
263                            code    = '$code',
264                            city    = '$city',
265                            host    = '$host'";
266            $this->hlp->runSQL($sql);
267        }
268    }
269
270    /**
271     * log a click on an external link
272     *
273     * called from log.php
274     */
275    public function log_outgoing() {
276        if(!$_REQUEST['ol']) return;
277
278        $link     = addslashes($_REQUEST['ol']);
279        $link_md5 = md5($link);
280        $session  = addslashes($this->getSession());
281        $page     = addslashes($_REQUEST['p']);
282
283        $sql = "INSERT DELAYED INTO " . $this->hlp->prefix . "outlinks
284                    SET dt       = NOW(),
285                        session  = '$session',
286                        page     = '$page',
287                        link_md5 = '$link_md5',
288                        link     = '$link'";
289        $ok  = $this->hlp->runSQL($sql);
290        if(is_null($ok)) {
291            global $MSG;
292            print_r($MSG);
293        }
294    }
295
296    /**
297     * log a page access
298     *
299     * called from log.php
300     */
301    public function log_access() {
302        if(!$_REQUEST['p']) return;
303        global $USERINFO;
304
305        # FIXME check referer against blacklist and drop logging for bad boys
306
307        // handle referer
308        $referer = trim($_REQUEST['r']);
309        if($referer) {
310            $ref     = addslashes($referer);
311            $ref_md5 = ($ref) ? md5($referer) : '';
312            if(strpos($referer, DOKU_URL) === 0) {
313                $ref_type = 'internal';
314            } else {
315                $ref_type = 'external';
316                $this->log_externalsearch($referer, $ref_type);
317            }
318        } else {
319            $ref      = '';
320            $ref_md5  = '';
321            $ref_type = '';
322        }
323
324        // handle user agent
325        $ua      = addslashes($this->ua_agent);
326        $ua_type = addslashes($this->ua_type);
327        $ua_ver  = addslashes($this->ua_version);
328        $os      = addslashes($this->ua_platform);
329        $ua_info = addslashes($this->ua_name);
330
331        $page    = addslashes($_REQUEST['p']);
332        $ip      = addslashes(clientIP(true));
333        $sx      = (int) $_REQUEST['sx'];
334        $sy      = (int) $_REQUEST['sy'];
335        $vx      = (int) $_REQUEST['vx'];
336        $vy      = (int) $_REQUEST['vy'];
337        $js      = (int) $_REQUEST['js'];
338        $uid     = addslashes($this->uid);
339        $user    = addslashes($_SERVER['REMOTE_USER']);
340        $session = addslashes($this->getSession());
341
342        $sql = "INSERT DELAYED INTO " . $this->hlp->prefix . "access
343                    SET dt       = NOW(),
344                        page     = '$page',
345                        ip       = '$ip',
346                        ua       = '$ua',
347                        ua_info  = '$ua_info',
348                        ua_type  = '$ua_type',
349                        ua_ver   = '$ua_ver',
350                        os       = '$os',
351                        ref      = '$ref',
352                        ref_md5  = '$ref_md5',
353                        ref_type = '$ref_type',
354                        screen_x = '$sx',
355                        screen_y = '$sy',
356                        view_x   = '$vx',
357                        view_y   = '$vy',
358                        js       = '$js',
359                        user     = '$user',
360                        session  = '$session',
361                        uid      = '$uid'";
362        $ok  = $this->hlp->runSQL($sql);
363        if(is_null($ok)) {
364            global $MSG;
365            print_r($MSG);
366        }
367
368        $sql = "INSERT DELAYED IGNORE INTO " . $this->hlp->prefix . "refseen
369                   SET ref_md5  = '$ref_md5',
370                       dt       = NOW()";
371        $ok  = $this->hlp->runSQL($sql);
372        if(is_null($ok)) {
373            global $MSG;
374            print_r($MSG);
375        }
376
377        // log group access
378        if(isset($USERINFO['grps'])) {
379            $this->log_groups('view', $USERINFO['grps']);
380        }
381
382        // resolve the IP
383        $this->log_ip(clientIP(true));
384    }
385
386    /**
387     * Log access to a media file
388     *
389     * called from action.php
390     *
391     * @param string $media the media ID
392     * @param string $mime  the media's mime type
393     * @param bool $inline is this displayed inline?
394     * @param int $size size of the media file
395     */
396    public function log_media($media, $mime, $inline, $size) {
397        // handle user agent
398        $ua      = addslashes($this->ua_agent);
399        $ua_type = addslashes($this->ua_type);
400        $ua_ver  = addslashes($this->ua_version);
401        $os      = addslashes($this->ua_platform);
402        $ua_info = addslashes($this->ua_name);
403
404        $media    = addslashes($media);
405        list($mime1, $mime2)     = explode('/', strtolower($mime));
406        $mime1   = addslashes($mime1);
407        $mime2   = addslashes($mime2);
408        $inline  = $inline ? 1 : 0;
409        $size    = (int) $size;
410
411        $ip      = addslashes(clientIP(true));
412        $uid     = addslashes($this->uid);
413        $user    = addslashes($_SERVER['REMOTE_USER']);
414        $session = addslashes($this->getSession());
415
416        $sql = "INSERT DELAYED INTO " . $this->hlp->prefix . "media
417                    SET dt       = NOW(),
418                        media    = '$media',
419                        ip       = '$ip',
420                        ua       = '$ua',
421                        ua_info  = '$ua_info',
422                        ua_type  = '$ua_type',
423                        ua_ver   = '$ua_ver',
424                        os       = '$os',
425                        user     = '$user',
426                        session  = '$session',
427                        uid      = '$uid',
428                        size     = $size,
429                        mime1    = '$mime1',
430                        mime2    = '$mime2',
431                        inline   = $inline
432                        ";
433        $ok  = $this->hlp->runSQL($sql);
434        if(is_null($ok)) {
435            global $MSG;
436            dbglog($MSG);
437        }
438    }
439
440    /**
441     * Log edits
442     */
443    public function log_edit($page, $type) {
444        global $USERINFO;
445
446        $ip      = addslashes(clientIP(true));
447        $user    = addslashes($_SERVER['REMOTE_USER']);
448        $session = addslashes($this->getSession());
449        $uid     = addslashes($this->uid);
450        $page    = addslashes($page);
451        $type    = addslashes($type);
452
453        $sql = "INSERT DELAYED INTO " . $this->hlp->prefix . "edits
454                    SET dt       = NOW(),
455                        page     = '$page',
456                        type     = '$type',
457                        ip       = '$ip',
458                        user     = '$user',
459                        session  = '$session',
460                        uid      = '$uid'";
461        $this->hlp->runSQL($sql);
462
463        // log group access
464        if(isset($USERINFO['grps'])) {
465            $this->log_groups('edit', $USERINFO['grps']);
466        }
467    }
468
469    /**
470     * Log login/logoffs and user creations
471     */
472    public function log_login($type, $user = '') {
473        if(!$user) $user = $_SERVER['REMOTE_USER'];
474
475        $ip      = addslashes(clientIP(true));
476        $user    = addslashes($user);
477        $session = addslashes($this->getSession());
478        $uid     = addslashes($this->uid);
479        $type    = addslashes($type);
480
481        $sql = "INSERT DELAYED INTO " . $this->hlp->prefix . "logins
482                    SET dt       = NOW(),
483                        type     = '$type',
484                        ip       = '$ip',
485                        user     = '$user',
486                        session  = '$session',
487                        uid      = '$uid'";
488        $this->hlp->runSQL($sql);
489    }
490
491    /**
492     * Log the current page count and size as today's history entry
493     */
494    public function log_history_pages() {
495        global $conf;
496
497        // use the popularity plugin's search method to find the wanted data
498        /** @var helper_plugin_popularity $pop */
499        $pop = plugin_load('helper', 'popularity');
500        $list = array();
501        search($list, $conf['datadir'], array($pop,'searchCountCallback'), array('all' => false), '');
502        $page_count = $list['file_count'];
503        $page_size  = $list['file_size'];
504
505        print_r($list);
506
507        $sql = "REPLACE INTO " . $this->hlp->prefix . "history
508                        (`info`, `value`, `dt`)
509                        VALUES
510                        ( 'page_count', $page_count, DATE(NOW()) ),
511                        ( 'page_size',  $page_size, DATE(NOW()) )
512                        ";
513        $ok = $this->hlp->runSQL($sql);
514        if(is_null($ok)) {
515            global $MSG;
516            print_r($MSG);
517        }
518    }
519
520    /**
521     * Log the current page count and size as today's history entry
522     */
523    public function log_history_media() {
524        global $conf;
525
526        // use the popularity plugin's search method to find the wanted data
527        /** @var helper_plugin_popularity $pop */
528        $pop = plugin_load('helper', 'popularity');
529        $list = array();
530        search($list, $conf['mediadir'], array($pop, 'searchCountCallback'), array('all' => true), '');
531        $media_count = $list['file_count'];
532        $media_size  = $list['file_size'];
533
534        $sql = "REPLACE INTO " . $this->hlp->prefix . "history
535                        (`info`, `value`, `dt`)
536                        VALUES
537                        ( 'media_count', $media_count, DATE(NOW()) ),
538                        ( 'media_size',  $media_size, DATE(NOW()) )
539                        ";
540        $ok = $this->hlp->runSQL($sql);
541        if(is_null($ok)) {
542            global $MSG;
543            print_r($MSG);
544        }
545    }
546
547}
548