xref: /plugin/statistics/Logger.php (revision 483101d3943f16fbc0d08b2a8b4db598dd56d6ec)
1<?php
2
3namespace dokuwiki\plugin\statistics;
4
5use DeviceDetector\DeviceDetector;
6use DeviceDetector\Parser\Client\Browser;
7use DeviceDetector\Parser\Device\AbstractDeviceParser;
8use DeviceDetector\Parser\OperatingSystem;
9use dokuwiki\HTTP\DokuHTTPClient;
10use dokuwiki\plugin\sqlite\SQLiteDB;
11use dokuwiki\Utf8\Clean;
12use helper_plugin_popularity;
13use helper_plugin_statistics;
14
15
16class Logger
17{
18    /** @var helper_plugin_statistics The statistics helper plugin instance */
19    protected helper_plugin_statistics $hlp;
20
21    /** @var SQLiteDB The SQLite database instance */
22    protected SQLiteDB $db;
23
24    /** @var string The full user agent string */
25    protected string $uaAgent;
26
27    /** @var string The type of user agent (browser, robot, feedreader) */
28    protected string $uaType = 'browser';
29
30    /** @var string The browser/client name */
31    protected string $uaName;
32
33    /** @var string The browser/client version */
34    protected string $uaVersion;
35
36    /** @var string The operating system/platform */
37    protected string $uaPlatform;
38
39    /** @var string The unique user identifier */
40    protected string $uid;
41
42
43    /**
44     * Constructor
45     *
46     * Parses browser info and set internal vars
47     */
48    public function __construct(helper_plugin_statistics $hlp)
49    {
50        global $INPUT;
51
52        $this->hlp = $hlp;
53        $this->db = $this->hlp->getDB();
54
55        $ua = trim($INPUT->server->str('HTTP_USER_AGENT'));
56
57        AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR);
58        $dd = new DeviceDetector($ua); // FIXME we could use client hints, but need to add headers
59        $dd->discardBotInformation();
60        $dd->parse();
61
62        if ($dd->isBot()) {
63            $this->uaType = 'robot';
64
65            // for now ignore bots
66            throw new \RuntimeException('Bot detected, not logging');
67        }
68
69        $this->uaAgent = $ua;
70        $this->uaName = Browser::getBrowserFamily($dd->getClient('name'));
71        $this->uaVersion = $dd->getClient('version');
72        $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name'));
73        $this->uid = $this->getUID();
74
75        if ($dd->isFeedReader()) {
76            $this->uaType = 'feedreader';
77        }
78
79        $this->logLastseen();
80    }
81
82    /**
83     * Should be called before logging
84     *
85     * This starts a transaction, so all logging is done in one go
86     */
87    public function begin(): void
88    {
89        $this->hlp->getDB()->getPdo()->beginTransaction();
90    }
91
92    /**
93     * Should be called after logging
94     *
95     * This commits the transaction started in begin()
96     */
97    public function end(): void
98    {
99        $this->hlp->getDB()->getPdo()->commit();
100    }
101
102    /**
103     * Get the unique user ID
104     *
105     * @return string The unique user identifier
106     */
107    protected function getUID(): string
108    {
109        global $INPUT;
110
111        $uid = $INPUT->str('uid');
112        if (!$uid) $uid = get_doku_pref('plgstats', false);
113        if (!$uid) $uid = session_id();
114        return $uid;
115    }
116
117    /**
118     * Return the user's session ID
119     *
120     * This is usually our own managed session, not a PHP session (only in fallback)
121     *
122     * @return string The session identifier
123     */
124    protected function getSession(): string
125    {
126        global $INPUT;
127
128        $ses = $INPUT->str('ses');
129        if (!$ses) $ses = get_doku_pref('plgstatsses', false);
130        if (!$ses) $ses = session_id();
131        return $ses;
132    }
133
134    /**
135     * Log that we've seen the user (authenticated only)
136     */
137    public function logLastseen(): void
138    {
139        global $INPUT;
140
141        if (empty($INPUT->server->str('REMOTE_USER'))) return;
142
143        $this->db->exec(
144            'REPLACE INTO lastseen (user, dt) VALUES (?, CURRENT_TIMESTAMP)',
145            $INPUT->server->str('REMOTE_USER'),
146        );
147    }
148
149    /**
150     * Log actions by groups
151     *
152     * @param string $type The type of access to log ('view','edit')
153     * @param array $groups The groups to log
154     */
155    public function logGroups(string $type, array $groups): void
156    {
157        if (!$groups) return;
158
159        $toLog = (array)$this->hlp->getConf('loggroups');
160        $groups = array_intersect($groups, $toLog);
161        if (!$groups) return;
162
163        $placeholders = join(',', array_fill(0, count($groups), '(?, ?)'));
164        $params = [];
165        $sql = "INSERT INTO groups (`type`, `group`) VALUES $placeholders";
166        foreach ($groups as $group) {
167            $params[] = $type;
168            $params[] = $group;
169        }
170        $sql = rtrim($sql, ',');
171        $this->db->exec($sql, $params);
172    }
173
174    /**
175     * Log external search queries
176     *
177     * Will not write anything if the referer isn't a search engine
178     *
179     * @param string $referer The HTTP referer URL
180     * @param string $type Reference to the type variable that will be modified
181     */
182    public function logExternalSearch(string $referer, string &$type): void
183    {
184        global $INPUT;
185
186        $searchEngine = new SearchEngines($referer);
187
188        if (!$searchEngine->isSearchEngine()) {
189            return; // not a search engine
190        }
191
192        $type = 'search';
193        $query = $searchEngine->getQuery();
194
195        // log it!
196        $words = explode(' ', Clean::stripspecials($query, ' ', '\._\-:\*'));
197        $this->logSearch($INPUT->str('p'), $query, $words, $searchEngine->getEngine());
198    }
199
200    /**
201     * Log search data to the search related tables
202     *
203     * @param string $page The page being searched from
204     * @param string $query The search query
205     * @param array $words Array of search words
206     * @param string $engine The search engine name
207     */
208    public function logSearch(string $page, string $query, array $words, string $engine): void
209    {
210        $sid = $this->db->exec(
211            'INSERT INTO search (dt, page, query, engine) VALUES (CURRENT_TIMESTAMP, ?, ?, ?)',
212            $page, $query, $engine
213        );
214        if (!$sid) return;
215
216        foreach ($words as $word) {
217            if (!$word) continue;
218            $this->db->exec(
219                'INSERT INTO searchwords (sid, word) VALUES (?, ?)',
220                $sid, $word
221            );
222        }
223    }
224
225    /**
226     * Log that the session was seen
227     *
228     * This is used to calculate the time people spend on the whole site
229     * during their session
230     *
231     * Viewcounts are used for bounce calculation
232     *
233     * @param int $addview set to 1 to count a view
234     */
235    public function logSession(int $addview = 0): void
236    {
237        // only log browser sessions
238        if ($this->uaType != 'browser') return;
239
240        $session = $this->getSession();
241        $this->db->exec(
242            'INSERT OR REPLACE INTO session (
243                session, dt, end, views, uid
244             ) VALUES (
245                ?,
246                CURRENT_TIMESTAMP,
247                CURRENT_TIMESTAMP,
248                COALESCE((SELECT views FROM session WHERE session = ?) + ?, ?),
249                ?
250             )',
251            $session, $session, $addview, $addview, $this->uid
252        );
253    }
254
255    /**
256     * Resolve IP to country/city and store in database
257     *
258     * @param string $ip The IP address to resolve
259     */
260    public function logIp(string $ip): void
261    {
262        // check if IP already known and up-to-date
263        $result = $this->db->queryValue(
264            "SELECT ip
265             FROM   iplocation
266             WHERE  ip = ?
267               AND  lastupd > date('now', '-30 days')",
268            $ip
269        );
270        if ($result) return;
271
272        $http = new DokuHTTPClient();
273        $http->timeout = 10;
274        $json = $http->get('http://ip-api.com/json/' . $ip); // yes, it's HTTP only
275
276        if (!$json) return; // FIXME log error
277        try {
278            $data = json_decode($json, true, 512, JSON_THROW_ON_ERROR);
279        } catch (\JsonException $e) {
280            return; // FIXME log error
281        }
282        if (!isset($data['status']) || $data['status'] !== 'success') {
283            return; // FIXME log error
284        }
285
286        $host = gethostbyaddr($ip);
287        $this->db->exec(
288            'INSERT OR REPLACE INTO iplocation (
289                    ip, country, code, city, host, lastupd
290                 ) VALUES (
291                    ?, ?, ?, ?, ?, CURRENT_TIMESTAMP
292                 )',
293            $ip, $data['country'], $data['countryCode'], $data['city'], $host
294        );
295    }
296
297    /**
298     * Log a click on an external link
299     *
300     * Called from log.php
301     */
302    public function logOutgoing(): void
303    {
304        global $INPUT;
305
306        if (!$INPUT->str('ol')) return;
307
308        $link = $INPUT->str('ol');
309        $link_md5 = md5($link);
310        $session = $this->getSession();
311        $page = $INPUT->str('p');
312
313        $this->db->exec(
314            'INSERT INTO outlinks (
315                dt, session, page, link_md5, link
316             ) VALUES (
317                CURRENT_TIMESTAMP, ?, ?, ?, ?
318             )',
319            $session, $page, $link_md5, $link
320        );
321    }
322
323    /**
324     * Log a page access
325     *
326     * Called from log.php
327     */
328    public function logAccess(): void
329    {
330        global $INPUT, $USERINFO;
331
332        if (!$INPUT->str('p')) return;
333
334        # FIXME check referer against blacklist and drop logging for bad boys
335
336        // handle referer
337        $referer = trim($INPUT->str('r'));
338        if ($referer) {
339            $ref = $referer;
340            $ref_md5 = md5($referer);
341            if (str_starts_with($referer, DOKU_URL)) {
342                $ref_type = 'internal';
343            } else {
344                $ref_type = 'external';
345                $this->logExternalSearch($referer, $ref_type);
346            }
347        } else {
348            $ref = '';
349            $ref_md5 = '';
350            $ref_type = '';
351        }
352
353        $page = $INPUT->str('p');
354        $ip = clientIP(true);
355        $sx = $INPUT->int('sx');
356        $sy = $INPUT->int('sy');
357        $vx = $INPUT->int('vx');
358        $vy = $INPUT->int('vy');
359        $js = $INPUT->int('js');
360        $user = $INPUT->server->str('REMOTE_USER');
361        $session = $this->getSession();
362
363        $this->db->exec(
364            'INSERT INTO access (
365                dt, page, ip, ua, ua_info, ua_type, ua_ver, os, ref, ref_md5, ref_type,
366                screen_x, screen_y, view_x, view_y, js, user, session, uid
367             ) VALUES (
368                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
369                ?, ?, ?, ?, ?, ?, ?, ?
370             )',
371            $page, $ip, $this->uaAgent, $this->uaName, $this->uaType, $this->uaVersion, $this->uaPlatform,
372            $ref, $ref_md5, $ref_type, $sx, $sy, $vx, $vy, $js, $user, $session, $this->uid
373        );
374
375        if ($ref_md5) {
376            $this->db->exec(
377                'INSERT OR IGNORE INTO refseen (
378                    ref_md5, dt
379                 ) VALUES (
380                    ?, CURRENT_TIMESTAMP
381                 )',
382                $ref_md5
383            );
384        }
385
386        // log group access
387        if (isset($USERINFO['grps'])) {
388            $this->logGroups('view', $USERINFO['grps']);
389        }
390
391        // resolve the IP
392        $this->logIp(clientIP(true));
393    }
394
395    /**
396     * Log access to a media file
397     *
398     * Called from action.php
399     *
400     * @param string $media The media ID
401     * @param string $mime The media's mime type
402     * @param bool $inline Is this displayed inline?
403     * @param int $size Size of the media file
404     */
405    public function logMedia(string $media, string $mime, bool $inline, int $size): void
406    {
407        global $INPUT;
408
409        [$mime1, $mime2] = explode('/', strtolower($mime));
410        $inline = $inline ? 1 : 0;
411        $size = (int)$size;
412
413        $ip = clientIP(true);
414        $user = $INPUT->server->str('REMOTE_USER');
415        $session = $this->getSession();
416
417        $this->db->exec(
418            'INSERT INTO media (
419                dt, media, ip, ua, ua_info, ua_type, ua_ver, os, user, session, uid,
420                size, mime1, mime2, inline
421             ) VALUES (
422                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
423                ?, ?, ?, ?
424             )',
425            $media, $ip, $this->uaAgent, $this->uaName, $this->uaType, $this->uaVersion, $this->uaPlatform,
426            $user, $session, $this->uid, $size, $mime1, $mime2, $inline
427        );
428    }
429
430    /**
431     * Log page edits
432     *
433     * @param string $page The page that was edited
434     * @param string $type The type of edit (create, edit, etc.)
435     */
436    public function logEdit(string $page, string $type): void
437    {
438        global $INPUT, $USERINFO;
439
440        $ip = clientIP(true);
441        $user = $INPUT->server->str('REMOTE_USER');
442        $session = $this->getSession();
443
444        $this->db->exec(
445            'INSERT INTO edits (
446                dt, page, type, ip, user, session, uid
447             ) VALUES (
448                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?
449             )',
450            $page, $type, $ip, $user, $session, $this->uid
451        );
452
453        // log group access
454        if (isset($USERINFO['grps'])) {
455            $this->logGroups('edit', $USERINFO['grps']);
456        }
457    }
458
459    /**
460     * Log login/logoffs and user creations
461     *
462     * @param string $type The type of login event (login, logout, create)
463     * @param string $user The username (optional, will use current user if empty)
464     */
465    public function logLogin(string $type, string $user = ''): void
466    {
467        global $INPUT;
468
469        if (!$user) $user = $INPUT->server->str('REMOTE_USER');
470
471        $ip = clientIP(true);
472        $session = $this->getSession();
473
474        $this->db->exec(
475            'INSERT INTO logins (
476                dt, type, ip, user, session, uid
477             ) VALUES (
478                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?
479             )',
480            $type, $ip, $user, $session, $this->uid
481        );
482    }
483
484    /**
485     * Log the current page count and size as today's history entry
486     */
487    public function logHistoryPages(): void
488    {
489        global $conf;
490
491        // use the popularity plugin's search method to find the wanted data
492        /** @var helper_plugin_popularity $pop */
493        $pop = plugin_load('helper', 'popularity');
494        $list = [];
495        search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], '');
496        $page_count = $list['file_count'];
497        $page_size = $list['file_size'];
498
499        $this->db->exec(
500            'INSERT OR REPLACE INTO history (
501                info, value, dt
502             ) VALUES (
503                ?, ?, CURRENT_TIMESTAMP
504             )',
505            'page_count', $page_count
506        );
507        $this->db->exec(
508            'INSERT OR REPLACE INTO history (
509                info, value, dt
510             ) VALUES (
511                ?, ?, CURRENT_TIMESTAMP
512             )',
513            'page_size', $page_size
514        );
515    }
516
517    /**
518     * Log the current media count and size as today's history entry
519     */
520    public function logHistoryMedia(): void
521    {
522        global $conf;
523
524        // use the popularity plugin's search method to find the wanted data
525        /** @var helper_plugin_popularity $pop */
526        $pop = plugin_load('helper', 'popularity');
527        $list = [];
528        search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], '');
529        $media_count = $list['file_count'];
530        $media_size = $list['file_size'];
531
532        $this->db->exec(
533            'INSERT OR REPLACE INTO history (
534                info, value, dt
535             ) VALUES (
536                ?, ?, CURRENT_TIMESTAMP
537             )',
538            'media_count', $media_count
539        );
540        $this->db->exec(
541            'INSERT OR REPLACE INTO history (
542                info, value, dt
543             ) VALUES (
544                ?, ?, CURRENT_TIMESTAMP
545             )',
546            'media_size', $media_size
547        );
548    }
549}
550